diff --git a/docs/examples/tox21.nblink b/docs/examples/tox21.nblink
new file mode 100644
index 0000000..8dc72eb
--- /dev/null
+++ b/docs/examples/tox21.nblink
@@ -0,0 +1,3 @@
+{
+  "path": "../../examples/tox21.ipynb"
+}
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index d87d018..b430e03 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -83,6 +83,7 @@ arguments are mostly the same.
     examples/bace
     examples/pdbbind
     examples/rna
+    examples/tox21
 
 .. toctree::
     :maxdepth: 1
diff --git a/examples/bace.ipynb b/examples/bace.ipynb
index 77d1f66..f2ed9d9 100644
--- a/examples/bace.ipynb
+++ b/examples/bace.ipynb
@@ -115,7 +115,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## The output\n",
+    "### The output\n",
     "\n",
     "Finally, we inspect the e_split object as this holds all the assignments of the datapoints to the splits, for each run and each technique. First, the overall architecture is described, lastly we look at the first five assignments of the C1 run 0."
    ]
diff --git a/examples/pdbbind.ipynb b/examples/pdbbind.ipynb
index 775e32f..1079777 100644
--- a/examples/pdbbind.ipynb
+++ b/examples/pdbbind.ipynb
@@ -80,45 +80,6 @@
    "cell_type": "code",
    "execution_count": 3,
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[10:05:12] Explicit valence for atom # 50 C greater than permitted\n",
-      "[10:05:12] ERROR: Could not sanitize molecule ending on line 287\n",
-      "[10:05:12] ERROR: Explicit valence for atom # 50 C greater than permitted\n",
-      "[10:05:12] Explicit valence for atom # 0 C greater than permitted\n",
-      "[10:05:12] ERROR: Could not sanitize molecule ending on line 48\n",
-      "[10:05:12] ERROR: Explicit valence for atom # 0 C greater than permitted\n",
-      "[10:05:12] Explicit valence for atom # 26 C greater than permitted\n",
-      "[10:05:12] ERROR: Could not sanitize molecule ending on line 119\n",
-      "[10:05:12] ERROR: Explicit valence for atom # 26 C greater than permitted\n",
-      "[10:05:12] Explicit valence for atom # 26 C greater than permitted\n",
-      "[10:05:12] ERROR: Could not sanitize molecule ending on line 116\n",
-      "[10:05:12] ERROR: Explicit valence for atom # 26 C greater than permitted\n",
-      "[10:05:12] Explicit valence for atom # 4 C greater than permitted\n",
-      "[10:05:12] ERROR: Could not sanitize molecule ending on line 108\n",
-      "[10:05:12] ERROR: Explicit valence for atom # 4 C greater than permitted\n",
-      "[10:05:12] Explicit valence for atom # 23 C greater than permitted\n",
-      "[10:05:12] ERROR: Could not sanitize molecule ending on line 146\n",
-      "[10:05:12] ERROR: Explicit valence for atom # 23 C greater than permitted\n",
-      "[10:05:12] Explicit valence for atom # 2 C greater than permitted\n",
-      "[10:05:12] ERROR: Could not sanitize molecule ending on line 45\n",
-      "[10:05:12] ERROR: Explicit valence for atom # 2 C greater than permitted\n",
-      "[10:05:12] Explicit valence for atom # 42 C greater than permitted\n",
-      "[10:05:12] ERROR: Could not sanitize molecule ending on line 172\n",
-      "[10:05:12] ERROR: Explicit valence for atom # 42 C greater than permitted\n",
-      "[10:05:12] Explicit valence for atom # 6 C greater than permitted\n",
-      "[10:05:12] ERROR: Could not sanitize molecule ending on line 94\n",
-      "[10:05:12] ERROR: Explicit valence for atom # 6 C greater than permitted\n",
-      "[10:05:12] Explicit valence for atom # 28 C greater than permitted\n",
-      "[10:05:12] ERROR: Could not sanitize molecule ending on line 159\n",
-      "[10:05:12] ERROR: Explicit valence for atom # 28 C greater than permitted\n",
-      "[10:05:12] Can't kekulize mol.  Unkekulized atoms: 2 6 7 17 23 24 25\n",
-      "[10:05:12] ERROR: Could not sanitize molecule ending on line 114\n",
-      "[10:05:12] ERROR: Can't kekulize mol.  Unkekulized atoms: 2 6 7 17 23 24 25\n"
-     ]
-    },
     {
      "data": {
       "text/plain": "      ids                                             Ligand  \\\n0    2d3u  Cc1ccccc1S(=O)(=O)Nc1cc(-c2ccc(C#N)cc2)sc1C(=O...   \n1    3cyx  CC(C)(C)NC(=O)[C@@H]1C[C@@H]2CCCC[C@@H]2C[N@H+...   \n2    3uo4   O=C([O-])c1ccc(Nc2nccc(Nc3ccccc3-c3ccccc3)n2)cc1   \n3    1p1q             Cc1o[nH]c(=O)c1C[C@H]([NH3+])C(=O)[O-]   \n5    2wtv  O=C([O-])c1ccc(Nc2ncc3c(n2)-c2ccc(Cl)cc2C(c2c(...   \n..    ...                                                ...   \n188  2x0y               Cn1c(=O)c2c(ncn2C[C@H](O)CO)n(C)c1=O   \n189  3uex                         CCCCCCCCCCCCCCCCCC(=O)[O-]   \n190  2pq9  O=C([O-])C1=C[C@@H](OP(=O)([O-])[O-])[C@@H](O)...   \n191  1u1b  Cc1cn([C@H]2C[C@H](O[P@](=O)([O-])O[P@](=O)([O...   \n192  4gqq                        CCOC(=O)/C=C/c1ccc(O)c(O)c1   \n\n                                   Target         y  \n0    /tmp/v2013-core/2d3u/2d3u_pocket.pdb  0.268375  \n1    /tmp/v2013-core/3cyx/3cyx_pocket.pdb  0.749538  \n2    /tmp/v2013-core/3uo4/3uo4_pocket.pdb  0.090166  \n3    /tmp/v2013-core/1p1q/1p1q_pocket.pdb -0.636034  \n5    /tmp/v2013-core/2wtv/2wtv_pocket.pdb  1.079223  \n..                                    ...       ...  \n188  /tmp/v2013-core/2x0y/2x0y_pocket.pdb -0.765235  \n189  /tmp/v2013-core/3uex/3uex_pocket.pdb  0.268375  \n190  /tmp/v2013-core/2pq9/2pq9_pocket.pdb  0.798545  \n191  /tmp/v2013-core/1u1b/1u1b_pocket.pdb  0.660433  \n192  /tmp/v2013-core/4gqq/4gqq_pocket.pdb -1.527076  \n\n[182 rows x 4 columns]",
@@ -130,6 +91,9 @@
     }
    ],
    "source": [
+    "from rdkit import rdBase\n",
+    "blocker = rdBase.BlockLogs()\n",
+    "\n",
     "def sdf2smiles(x):\n",
     "    mols = Chem.SDMolSupplier(x)\n",
     "    if len(mols) != 1:\n",
@@ -227,7 +191,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## The output\n",
+    "### The output\n",
     "\n",
     "Finally, we inspect the returned split assignments as this holds all the assignments of the datapoints to the splits, for each run and each technique. First, the overall architecture is described, lastly we look at the first five assignments of the C1 run 0."
    ]
@@ -259,9 +223,9 @@
       "\tRun 2 - Type: <class 'dict'> - 182 assignments\n",
       "\tRun 3 - Type: <class 'dict'> - 182 assignments\n",
       "\n",
-      "ID: 2d3u - Split: val\n",
-      "ID: 3cyx - Split: val\n",
-      "ID: 3pww - Split: val\n",
+      "ID: 2d3u - Split: train\n",
+      "ID: 3cyx - Split: train\n",
+      "ID: 3pww - Split: train\n",
       "ID: 3uo4 - Split: train\n",
       "ID: 1p1q - Split: train\n"
      ]
@@ -302,11 +266,11 @@
       "\tRun 2 - Type: <class 'dict'> - 182 assignments\n",
       "\tRun 3 - Type: <class 'dict'> - 182 assignments\n",
       "\n",
-      "ID: 2d3u - Split: train\n",
-      "ID: 3cyx - Split: val\n",
-      "ID: 3uo4 - Split: train\n",
-      "ID: 1p1q - Split: train\n",
-      "ID: 2wtv - Split: train\n"
+      "ID: 2d3u - Split: val\n",
+      "ID: 3cyx - Split: train\n",
+      "ID: 3uo4 - Split: val\n",
+      "ID: 1p1q - Split: test\n",
+      "ID: 2wtv - Split: val\n"
      ]
     }
    ],
@@ -361,10 +325,10 @@
       "\tRun 3 - Type: <class 'dict'> - 182 assignments\n",
       "\n",
       "ID: ('2d3u', '2d3u') - Split: not selected\n",
-      "ID: ('3cyx', '3cyx') - Split: val\n",
-      "ID: ('3uo4', '3uo4') - Split: train\n",
-      "ID: ('1p1q', '1p1q') - Split: train\n",
-      "ID: ('2wtv', '2wtv') - Split: train\n"
+      "ID: ('3cyx', '3cyx') - Split: train\n",
+      "ID: ('3uo4', '3uo4') - Split: not selected\n",
+      "ID: ('1p1q', '1p1q') - Split: not selected\n",
+      "ID: ('2wtv', '2wtv') - Split: not selected\n"
      ]
     }
    ],
diff --git a/examples/qm9.ipynb b/examples/qm9.ipynb
index 72a485f..f5a46b2 100644
--- a/examples/qm9.ipynb
+++ b/examples/qm9.ipynb
@@ -35,295 +35,6 @@
    "execution_count": 2,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 2 4 5 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 2 4 5 6 7\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 3 4 6\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 6\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 6\n",
-      "[10:09:29] Can't kekulize mol.  Unkekulized atoms: 1 2 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 3 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 4 5\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 3 4 5\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 3 4 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 4 5 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 3 4 5\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 3 4 5\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 4 5 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 5 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 5 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 1 2 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 4 5 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 4 5 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 4 5 7 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 4 5 6 8\n",
-      "[10:09:30] Can't kekulize mol.  Unkekulized atoms: 2 4 5 6 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 2 4 5 6 7\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 2 3 4 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 2 3 4 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 2 3 4 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 2 3 4 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 7\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 7\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 2 3 5 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 2 3 5 6 7\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 2 3 5 6 7\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 2 3 5 6 7\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 3 4 5 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 3 4 5 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 3 4 5 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 3 5 6 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 3 5 6 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 3 5 6 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 3 5 6 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 1 5 6\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 3 7 8\n",
-      "[10:09:31] Can't kekulize mol.  Unkekulized atoms: 1 2 3\n",
-      "[10:09:32] Can't kekulize mol.  Unkekulized atoms: 3 4 8\n",
-      "[10:09:32] Can't kekulize mol.  Unkekulized atoms: 3 7 8\n",
-      "[10:09:32] Can't kekulize mol.  Unkekulized atoms: 3 4 8\n",
-      "[10:09:33] Can't kekulize mol.  Unkekulized atoms: 1 2 8\n",
-      "[10:09:33] Can't kekulize mol.  Unkekulized atoms: 1 2 6\n",
-      "[10:09:33] Can't kekulize mol.  Unkekulized atoms: 1 2 5 7\n",
-      "[10:09:33] Can't kekulize mol.  Unkekulized atoms: 4 5 7\n",
-      "[10:09:33] Can't kekulize mol.  Unkekulized atoms: 4 5 7\n",
-      "[10:09:34] Can't kekulize mol.  Unkekulized atoms: 2 3 4\n",
-      "[10:09:34] Can't kekulize mol.  Unkekulized atoms: 2 6 7\n",
-      "[10:09:34] Can't kekulize mol.  Unkekulized atoms: 2 6 7\n",
-      "[10:09:34] Can't kekulize mol.  Unkekulized atoms: 2 6 7\n",
-      "[10:09:34] Can't kekulize mol.  Unkekulized atoms: 2 6 7\n",
-      "[10:09:34] Can't kekulize mol.  Unkekulized atoms: 2 3 7\n",
-      "[10:09:34] Can't kekulize mol.  Unkekulized atoms: 2 3 7\n",
-      "[10:09:34] Can't kekulize mol.  Unkekulized atoms: 3 5 6\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 3 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 3 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 6 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 6 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 6 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 4 5 6\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 6 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 4 5 6\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 5 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 5 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 5 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 5 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 3 4 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 5 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 5 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 7 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 4 5 6 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 4 5 6 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 4 5 6 8\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 4 5 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7\n",
-      "[10:09:35] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7\n"
-     ]
-    },
     {
      "data": {
       "text/plain": "                ID                                             SMILES  \\\n0       Comp000001                                                  C   \n1       Comp000002                                                  N   \n2       Comp000003                                                  O   \n3       Comp000004                                                C#C   \n4       Comp000005                                                C#N   \n...            ...                                                ...   \n132475  Comp132193  C1[C@H]2[C@@H]3[C@H]2[N@H+]2[C@@H]4C[C@]12[C@H]34   \n132476  Comp132194  C1[C@H]2[C@@H]3[C@H]4[C@H]5O[C@@]13[C@@H]2[C@H]54   \n132477  Comp132195  C1[N@H+]2[C@@H]3[C@H]2[C@H]2[N@@H+]4C[C@]12[C@...   \n132478  Comp132196  C1[C@H]2[C@@H]3[C@H]2[C@H]2[N@@H+]4C[C@]12[C@H]34   \n132479  Comp132197  C1[N@@H+]2[C@H]3[C@@H]4[C@@H]5O[C@]13[C@H]2[C@...   \n\n              mu     alpha      homo      lumo       gap        r2      zpve  \\\n0      -1.695514 -5.140947 -5.551545  1.965764  4.744480 -3.370877 -2.278929   \n1      -0.560317 -5.574660 -0.343349  1.281473  1.451169 -3.414138 -2.606638   \n2      -0.402845 -5.938979 -1.769924  0.997352  1.882555 -3.447759 -3.016092   \n3      -1.695514 -4.785881 -1.439182  0.635197  1.355305 -3.257365 -2.843707   \n4       0.325228 -5.166392 -4.463681  0.004929  2.244040 -3.307999 -3.166654   \n...          ...       ...       ...       ...       ...       ...       ...   \n132475 -0.706407 -0.695101 -3.100863 -0.711376  0.844033  0.469551 -1.241336   \n132476 -0.112342  0.703189 -0.144106 -0.931470 -0.857544  0.936507  0.150709   \n132477  0.450717  0.063607 -0.984909 -1.223594 -0.727729  0.617526 -0.169841   \n132478  0.707701  0.382820 -0.128167 -0.003074  0.061148  0.565689 -0.168043   \n132479  0.571597 -0.166550 -0.905212 -0.403245  0.053159  0.448744 -0.496793   \n\n              cv        u0      u298      h298      g298  \n0      -4.317699  6.607249  6.607212  6.607212  6.607387  \n1      -4.354192  6.229272  6.229231  6.229231  6.229431  \n2      -4.429086  5.761037  5.760991  5.760991  5.761238  \n3      -3.815621  5.739750  5.739706  5.739706  5.739920  \n4      -4.363256  5.360441  5.360385  5.360385  5.360611  \n...          ...       ...       ...       ...       ...  \n132475 -0.050394 -0.434861 -0.434856 -0.434856 -0.434869  \n132476  0.962825  0.294224  0.294260  0.294260  0.294169  \n132477  0.431887 -0.085259 -0.085238 -0.085238 -0.085293  \n132478  0.597179 -0.084153 -0.084130 -0.084130 -0.084187  \n132479  0.128255 -0.463497 -0.463485 -0.463485 -0.463519  \n\n[132197 rows x 14 columns]",
@@ -335,6 +46,9 @@
     }
    ],
    "source": [
+    "from rdkit import rdBase\n",
+    "blocker = rdBase.BlockLogs()\n",
+    "\n",
     "def mol2smiles(mol):\n",
     "    try:\n",
     "        return Chem.MolToSmiles(Chem.rdmolops.RemoveHs(mol))\n",
@@ -399,8 +113,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "\n",
-    "## The output\n",
+    "### The output\n",
     "\n",
     "Finally, we inspect the e_split object as this holds all the assignments of the datapoints to the splits, for each run and each technique. First, the overall architecture is described, lastly we look at the first five assignments of the C1 run 0."
    ]
@@ -420,9 +133,9 @@
       "\tRun 2 - Type: <class 'dict'> - 132197 assignments\n",
       "\tRun 3 - Type: <class 'dict'> - 132197 assignments\n",
       "\n",
-      "ID: Comp000001 - Split: train\n",
-      "ID: Comp000002 - Split: train\n",
-      "ID: Comp000003 - Split: train\n",
+      "ID: Comp000001 - Split: val\n",
+      "ID: Comp000002 - Split: val\n",
+      "ID: Comp000003 - Split: val\n",
       "ID: Comp000004 - Split: train\n",
       "ID: Comp000005 - Split: train\n"
      ]
diff --git a/examples/rna.ipynb b/examples/rna.ipynb
index 84ea2cd..4a10547 100644
--- a/examples/rna.ipynb
+++ b/examples/rna.ipynb
@@ -11,7 +11,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -44,7 +44,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 6,
    "metadata": {
     "scrolled": true
    },
@@ -58,7 +58,7 @@
       "Command: cd-hit-est -i ../cdhitest.fasta -o clusters -d 0 -T 1\n",
       "         -c 0.9 -n 10 -l 9\n",
       "\n",
-      "Started: Mon Mar 18 09:56:54 2024\n",
+      "Started: Tue Mar 26 16:30:55 2024\n",
       "================================================================\n",
       "                            Output                              \n",
       "----------------------------------------------------------------\n",
@@ -87,13 +87,13 @@
       "writing clustering information\n",
       "program completed !\n",
       "\n",
-      "Total CPU time 2.63\n",
+      "Total CPU time 2.49\n",
       "================================================================\n",
       "Program: CD-HIT, V4.8.1 (+OpenMP), May 15 2023, 22:49:31\n",
       "Command: cd-hit-est -i ../cdhitest.fasta -o clusters -d 0 -T 1\n",
       "         -c 0.8 -n 5 -l 4\n",
       "\n",
-      "Started: Mon Mar 18 09:56:57 2024\n",
+      "Started: Tue Mar 26 16:30:58 2024\n",
       "================================================================\n",
       "                            Output                              \n",
       "----------------------------------------------------------------\n",
@@ -122,113 +122,110 @@
       "writing clustering information\n",
       "program completed !\n",
       "\n",
-      "Total CPU time 93.34\n",
-      "2024-03-18 09:58:31,281 cdhit_est cannot optimally cluster the data. The minimal number of clusters is 6309.\n",
-      "(CVXPY) Mar 18 09:58:44 AM: Your problem has 18960 variables, 4 constraints, and 0 parameters.\n",
-      "(CVXPY) Mar 18 09:58:44 AM: It is compliant with the following grammars: DCP, DQCP\n",
-      "(CVXPY) Mar 18 09:58:44 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
-      "(CVXPY) Mar 18 09:58:44 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:58:44 AM: Your problem is compiled with the CPP canonicalization backend.\n",
-      "(CVXPY) Mar 18 09:58:44 AM: Compiling problem (target solver=SCIP).\n",
-      "(CVXPY) Mar 18 09:58:44 AM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
-      "(CVXPY) Mar 18 09:58:44 AM: Applying reduction Dcp2Cone\n",
-      "(CVXPY) Mar 18 09:58:45 AM: Applying reduction CvxAttr2Constr\n",
-      "(CVXPY) Mar 18 09:58:45 AM: Applying reduction ConeMatrixStuffing\n",
-      "(CVXPY) Mar 18 09:58:45 AM: Applying reduction SCIP\n",
-      "(CVXPY) Mar 18 09:58:45 AM: Finished problem compilation (took 6.328e-02 seconds).\n",
-      "(CVXPY) Mar 18 09:58:45 AM: Invoking solver SCIP  to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:58:49 AM: Problem status: optimal\n",
-      "(CVXPY) Mar 18 09:58:49 AM: Optimal value: 1.000e+00\n",
-      "(CVXPY) Mar 18 09:58:49 AM: Compilation took 6.328e-02 seconds\n",
-      "(CVXPY) Mar 18 09:58:49 AM: Solver (including time spent in interface) took 4.609e+00 seconds\n",
-      "(CVXPY) Mar 18 09:58:50 AM: Your problem has 1375 variables, 1229 constraints, and 0 parameters.\n",
-      "(CVXPY) Mar 18 09:58:50 AM: It is compliant with the following grammars: DCP, DQCP\n",
-      "(CVXPY) Mar 18 09:58:50 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
-      "(CVXPY) Mar 18 09:58:50 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:58:50 AM: Your problem is compiled with the CPP canonicalization backend.\n",
-      "(CVXPY) Mar 18 09:58:50 AM: Compiling problem (target solver=SCIP).\n",
-      "(CVXPY) Mar 18 09:58:50 AM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
-      "(CVXPY) Mar 18 09:58:50 AM: Applying reduction Dcp2Cone\n",
-      "(CVXPY) Mar 18 09:58:50 AM: Applying reduction CvxAttr2Constr\n",
-      "(CVXPY) Mar 18 09:58:50 AM: Applying reduction ConeMatrixStuffing\n",
-      "(CVXPY) Mar 18 09:58:51 AM: Applying reduction SCIP\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Finished problem compilation (took 1.587e+00 seconds).\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Invoking solver SCIP  to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Problem status: infeasible\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Optimal value: inf\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Compilation took 1.587e+00 seconds\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Solver (including time spent in interface) took 3.343e-01 seconds\n",
-      "2024-03-18 09:58:52,340 SCIP cannot solve the problem. Please consider relaxing split restrictions, e.g., less splits, or a higher tolerance level for exceeding cluster limits.\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Your problem has 18960 variables, 4 constraints, and 0 parameters.\n",
-      "(CVXPY) Mar 18 09:58:52 AM: It is compliant with the following grammars: DCP, DQCP\n",
-      "(CVXPY) Mar 18 09:58:52 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
-      "(CVXPY) Mar 18 09:58:52 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Your problem is compiled with the CPP canonicalization backend.\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Compiling problem (target solver=SCIP).\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Applying reduction Dcp2Cone\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Applying reduction CvxAttr2Constr\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Applying reduction ConeMatrixStuffing\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Applying reduction SCIP\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Finished problem compilation (took 2.134e-02 seconds).\n",
-      "(CVXPY) Mar 18 09:58:52 AM: Invoking solver SCIP  to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:58:56 AM: Problem status: optimal\n",
-      "(CVXPY) Mar 18 09:58:56 AM: Optimal value: 1.000e+00\n",
-      "(CVXPY) Mar 18 09:58:56 AM: Compilation took 2.134e-02 seconds\n",
-      "(CVXPY) Mar 18 09:58:56 AM: Solver (including time spent in interface) took 4.310e+00 seconds\n",
-      "(CVXPY) Mar 18 09:58:57 AM: Your problem has 1375 variables, 1229 constraints, and 0 parameters.\n",
-      "(CVXPY) Mar 18 09:58:57 AM: It is compliant with the following grammars: DCP, DQCP\n",
-      "(CVXPY) Mar 18 09:58:57 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
-      "(CVXPY) Mar 18 09:58:57 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:58:57 AM: Your problem is compiled with the CPP canonicalization backend.\n",
-      "(CVXPY) Mar 18 09:58:57 AM: Compiling problem (target solver=SCIP).\n",
-      "(CVXPY) Mar 18 09:58:57 AM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
-      "(CVXPY) Mar 18 09:58:57 AM: Applying reduction Dcp2Cone\n",
-      "(CVXPY) Mar 18 09:58:57 AM: Applying reduction CvxAttr2Constr\n",
-      "(CVXPY) Mar 18 09:58:57 AM: Applying reduction ConeMatrixStuffing\n",
-      "(CVXPY) Mar 18 09:58:58 AM: Applying reduction SCIP\n",
-      "(CVXPY) Mar 18 09:58:58 AM: Finished problem compilation (took 1.540e+00 seconds).\n",
-      "(CVXPY) Mar 18 09:58:58 AM: Invoking solver SCIP  to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Problem status: infeasible\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Optimal value: inf\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Compilation took 1.540e+00 seconds\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Solver (including time spent in interface) took 2.767e-01 seconds\n",
-      "2024-03-18 09:58:59,197 SCIP cannot solve the problem. Please consider relaxing split restrictions, e.g., less splits, or a higher tolerance level for exceeding cluster limits.\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Your problem has 18960 variables, 4 constraints, and 0 parameters.\n",
-      "(CVXPY) Mar 18 09:58:59 AM: It is compliant with the following grammars: DCP, DQCP\n",
-      "(CVXPY) Mar 18 09:58:59 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
-      "(CVXPY) Mar 18 09:58:59 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Your problem is compiled with the CPP canonicalization backend.\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Compiling problem (target solver=SCIP).\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Applying reduction Dcp2Cone\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Applying reduction CvxAttr2Constr\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Applying reduction ConeMatrixStuffing\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Applying reduction SCIP\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Finished problem compilation (took 2.118e-02 seconds).\n",
-      "(CVXPY) Mar 18 09:58:59 AM: Invoking solver SCIP  to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:59:03 AM: Problem status: optimal\n",
-      "(CVXPY) Mar 18 09:59:03 AM: Optimal value: 1.000e+00\n",
-      "(CVXPY) Mar 18 09:59:03 AM: Compilation took 2.118e-02 seconds\n",
-      "(CVXPY) Mar 18 09:59:03 AM: Solver (including time spent in interface) took 4.260e+00 seconds\n",
-      "(CVXPY) Mar 18 09:59:04 AM: Your problem has 1375 variables, 1229 constraints, and 0 parameters.\n",
-      "(CVXPY) Mar 18 09:59:04 AM: It is compliant with the following grammars: DCP, DQCP\n",
-      "(CVXPY) Mar 18 09:59:04 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
-      "(CVXPY) Mar 18 09:59:04 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:59:04 AM: Your problem is compiled with the CPP canonicalization backend.\n",
-      "(CVXPY) Mar 18 09:59:04 AM: Compiling problem (target solver=SCIP).\n",
-      "(CVXPY) Mar 18 09:59:04 AM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
-      "(CVXPY) Mar 18 09:59:04 AM: Applying reduction Dcp2Cone\n",
-      "(CVXPY) Mar 18 09:59:04 AM: Applying reduction CvxAttr2Constr\n",
-      "(CVXPY) Mar 18 09:59:04 AM: Applying reduction ConeMatrixStuffing\n",
-      "(CVXPY) Mar 18 09:59:05 AM: Applying reduction SCIP\n",
-      "(CVXPY) Mar 18 09:59:05 AM: Finished problem compilation (took 1.617e+00 seconds).\n",
-      "(CVXPY) Mar 18 09:59:05 AM: Invoking solver SCIP  to obtain a solution.\n",
-      "(CVXPY) Mar 18 09:59:06 AM: Problem status: infeasible\n",
-      "(CVXPY) Mar 18 09:59:06 AM: Optimal value: inf\n",
-      "(CVXPY) Mar 18 09:59:06 AM: Compilation took 1.617e+00 seconds\n",
-      "(CVXPY) Mar 18 09:59:06 AM: Solver (including time spent in interface) took 3.111e-01 seconds\n",
-      "2024-03-18 09:59:06,083 SCIP cannot solve the problem. Please consider relaxing split restrictions, e.g., less splits, or a higher tolerance level for exceeding cluster limits.\n"
+      "Total CPU time 88.15\n",
+      "2024-03-26 16:32:26,501 cdhit_est cannot optimally cluster the data. The minimal number of clusters is 6309.\n",
+      "(CVXPY) Mar 26 04:32:39 PM: Your problem has 18960 variables, 4 constraints, and 0 parameters.\n",
+      "(CVXPY) Mar 26 04:32:39 PM: It is compliant with the following grammars: DCP, DQCP\n",
+      "(CVXPY) Mar 26 04:32:39 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
+      "(CVXPY) Mar 26 04:32:39 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:32:39 PM: Your problem is compiled with the CPP canonicalization backend.\n",
+      "(CVXPY) Mar 26 04:32:39 PM: Compiling problem (target solver=SCIP).\n",
+      "(CVXPY) Mar 26 04:32:39 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
+      "(CVXPY) Mar 26 04:32:39 PM: Applying reduction Dcp2Cone\n",
+      "(CVXPY) Mar 26 04:32:39 PM: Applying reduction CvxAttr2Constr\n",
+      "(CVXPY) Mar 26 04:32:39 PM: Applying reduction ConeMatrixStuffing\n",
+      "(CVXPY) Mar 26 04:32:39 PM: Applying reduction SCIP\n",
+      "(CVXPY) Mar 26 04:32:39 PM: Finished problem compilation (took 2.108e-02 seconds).\n",
+      "(CVXPY) Mar 26 04:32:39 PM: Invoking solver SCIP  to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:32:43 PM: Problem status: optimal\n",
+      "(CVXPY) Mar 26 04:32:43 PM: Optimal value: 1.000e+00\n",
+      "(CVXPY) Mar 26 04:32:43 PM: Compilation took 2.108e-02 seconds\n",
+      "(CVXPY) Mar 26 04:32:43 PM: Solver (including time spent in interface) took 4.125e+00 seconds\n",
+      "(CVXPY) Mar 26 04:32:44 PM: Your problem has 1375 variables, 1229 constraints, and 0 parameters.\n",
+      "(CVXPY) Mar 26 04:32:44 PM: It is compliant with the following grammars: DCP, DQCP\n",
+      "(CVXPY) Mar 26 04:32:44 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
+      "(CVXPY) Mar 26 04:32:44 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:32:44 PM: Your problem is compiled with the CPP canonicalization backend.\n",
+      "(CVXPY) Mar 26 04:32:44 PM: Compiling problem (target solver=SCIP).\n",
+      "(CVXPY) Mar 26 04:32:44 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
+      "(CVXPY) Mar 26 04:32:44 PM: Applying reduction Dcp2Cone\n",
+      "(CVXPY) Mar 26 04:32:44 PM: Applying reduction CvxAttr2Constr\n",
+      "(CVXPY) Mar 26 04:32:44 PM: Applying reduction ConeMatrixStuffing\n",
+      "(CVXPY) Mar 26 04:32:45 PM: Applying reduction SCIP\n",
+      "(CVXPY) Mar 26 04:32:46 PM: Finished problem compilation (took 1.571e+00 seconds).\n",
+      "(CVXPY) Mar 26 04:32:46 PM: Invoking solver SCIP  to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Problem status: optimal\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Optimal value: 1.430e+02\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Compilation took 1.571e+00 seconds\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Solver (including time spent in interface) took 1.926e+01 seconds\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Your problem has 18960 variables, 4 constraints, and 0 parameters.\n",
+      "(CVXPY) Mar 26 04:33:05 PM: It is compliant with the following grammars: DCP, DQCP\n",
+      "(CVXPY) Mar 26 04:33:05 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
+      "(CVXPY) Mar 26 04:33:05 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Your problem is compiled with the CPP canonicalization backend.\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Compiling problem (target solver=SCIP).\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Applying reduction Dcp2Cone\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Applying reduction CvxAttr2Constr\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Applying reduction ConeMatrixStuffing\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Applying reduction SCIP\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Finished problem compilation (took 2.043e-02 seconds).\n",
+      "(CVXPY) Mar 26 04:33:05 PM: Invoking solver SCIP  to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:33:09 PM: Problem status: optimal\n",
+      "(CVXPY) Mar 26 04:33:09 PM: Optimal value: 1.000e+00\n",
+      "(CVXPY) Mar 26 04:33:09 PM: Compilation took 2.043e-02 seconds\n",
+      "(CVXPY) Mar 26 04:33:09 PM: Solver (including time spent in interface) took 4.122e+00 seconds\n",
+      "(CVXPY) Mar 26 04:33:09 PM: Your problem has 1375 variables, 1229 constraints, and 0 parameters.\n",
+      "(CVXPY) Mar 26 04:33:10 PM: It is compliant with the following grammars: DCP, DQCP\n",
+      "(CVXPY) Mar 26 04:33:10 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
+      "(CVXPY) Mar 26 04:33:10 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:33:10 PM: Your problem is compiled with the CPP canonicalization backend.\n",
+      "(CVXPY) Mar 26 04:33:10 PM: Compiling problem (target solver=SCIP).\n",
+      "(CVXPY) Mar 26 04:33:10 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
+      "(CVXPY) Mar 26 04:33:10 PM: Applying reduction Dcp2Cone\n",
+      "(CVXPY) Mar 26 04:33:10 PM: Applying reduction CvxAttr2Constr\n",
+      "(CVXPY) Mar 26 04:33:10 PM: Applying reduction ConeMatrixStuffing\n",
+      "(CVXPY) Mar 26 04:33:11 PM: Applying reduction SCIP\n",
+      "(CVXPY) Mar 26 04:33:11 PM: Finished problem compilation (took 1.505e+00 seconds).\n",
+      "(CVXPY) Mar 26 04:33:11 PM: Invoking solver SCIP  to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Problem status: optimal\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Optimal value: 1.430e+02\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Compilation took 1.505e+00 seconds\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Solver (including time spent in interface) took 1.905e+01 seconds\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Your problem has 18960 variables, 4 constraints, and 0 parameters.\n",
+      "(CVXPY) Mar 26 04:33:30 PM: It is compliant with the following grammars: DCP, DQCP\n",
+      "(CVXPY) Mar 26 04:33:30 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
+      "(CVXPY) Mar 26 04:33:30 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Your problem is compiled with the CPP canonicalization backend.\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Compiling problem (target solver=SCIP).\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Applying reduction Dcp2Cone\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Applying reduction CvxAttr2Constr\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Applying reduction ConeMatrixStuffing\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Applying reduction SCIP\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Finished problem compilation (took 6.218e-02 seconds).\n",
+      "(CVXPY) Mar 26 04:33:30 PM: Invoking solver SCIP  to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:33:34 PM: Problem status: optimal\n",
+      "(CVXPY) Mar 26 04:33:34 PM: Optimal value: 1.000e+00\n",
+      "(CVXPY) Mar 26 04:33:34 PM: Compilation took 6.218e-02 seconds\n",
+      "(CVXPY) Mar 26 04:33:34 PM: Solver (including time spent in interface) took 4.027e+00 seconds\n",
+      "(CVXPY) Mar 26 04:33:35 PM: Your problem has 1375 variables, 1229 constraints, and 0 parameters.\n",
+      "(CVXPY) Mar 26 04:33:35 PM: It is compliant with the following grammars: DCP, DQCP\n",
+      "(CVXPY) Mar 26 04:33:35 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)\n",
+      "(CVXPY) Mar 26 04:33:35 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:33:35 PM: Your problem is compiled with the CPP canonicalization backend.\n",
+      "(CVXPY) Mar 26 04:33:35 PM: Compiling problem (target solver=SCIP).\n",
+      "(CVXPY) Mar 26 04:33:35 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> SCIP\n",
+      "(CVXPY) Mar 26 04:33:35 PM: Applying reduction Dcp2Cone\n",
+      "(CVXPY) Mar 26 04:33:35 PM: Applying reduction CvxAttr2Constr\n",
+      "(CVXPY) Mar 26 04:33:35 PM: Applying reduction ConeMatrixStuffing\n",
+      "(CVXPY) Mar 26 04:33:36 PM: Applying reduction SCIP\n",
+      "(CVXPY) Mar 26 04:33:37 PM: Finished problem compilation (took 1.536e+00 seconds).\n",
+      "(CVXPY) Mar 26 04:33:37 PM: Invoking solver SCIP  to obtain a solution.\n",
+      "(CVXPY) Mar 26 04:33:56 PM: Problem status: optimal\n",
+      "(CVXPY) Mar 26 04:33:56 PM: Optimal value: 1.430e+02\n",
+      "(CVXPY) Mar 26 04:33:56 PM: Compilation took 1.536e+00 seconds\n",
+      "(CVXPY) Mar 26 04:33:56 PM: Solver (including time spent in interface) took 1.937e+01 seconds\n"
      ]
     }
    ],
@@ -238,7 +235,8 @@
     "    techniques=[\"I1e\", \"C1e\"],\n",
     "    splits=[7, 2, 1], \n",
     "    names=[\"train\",\"val\", \"test\"], \n",
-    "    runs=3, \n",
+    "    runs=3,\n",
+    "    epsilon=0.2,\n",
     "    solver=\"SCIP\",\n",
     "    e_type=\"G\",\n",
     "    e_data=\"dataset_Rfam_6320_13classes.fasta\",\n",
@@ -249,15 +247,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "\n",
-    "## The output\n",
+    "### The output\n",
     "\n",
     "Finally, we inspect the e_split object as this holds all the assignments of the datapoints to the splits, for each run and each technique. First, the overall architecture is described, lastly we look at the first five assignments of the C1 run 0."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -269,12 +266,16 @@
       "\tRun 1 - Type: <class 'dict'> - 6320 assignments\n",
       "\tRun 2 - Type: <class 'dict'> - 6320 assignments\n",
       "\tRun 3 - Type: <class 'dict'> - 6320 assignments\n",
+      "C1e - Type: <class 'list'> - Length: 3\n",
+      "\tRun 1 - Type: <class 'dict'> - 6320 assignments\n",
+      "\tRun 2 - Type: <class 'dict'> - 6320 assignments\n",
+      "\tRun 3 - Type: <class 'dict'> - 6320 assignments\n",
       "\n",
-      "ID: RF00001_AF095839_1_346-228_5S_rRNA - Split: val\n",
-      "ID: RF00001_AY245018_1_1-119_5S_rRNA - Split: val\n",
-      "ID: RF00001_X52048_1_2-120_5S_rRNA - Split: val\n",
-      "ID: RF00001_M28193_1_1-119_5S_rRNA - Split: val\n",
-      "ID: RF00001_X14816_1_860-978_5S_rRNA - Split: val\n"
+      "ID: RF00001_AF095839_1_346-228_5S_rRNA - Split: train\n",
+      "ID: RF00001_AY245018_1_1-119_5S_rRNA - Split: train\n",
+      "ID: RF00001_X52048_1_2-120_5S_rRNA - Split: train\n",
+      "ID: RF00001_M28193_1_1-119_5S_rRNA - Split: train\n",
+      "ID: RF00001_X14816_1_860-978_5S_rRNA - Split: test\n"
      ]
     }
    ],
diff --git a/examples/tox21.ipynb b/examples/tox21.ipynb
new file mode 100644
index 0000000..086c548
--- /dev/null
+++ b/examples/tox21.ipynb
@@ -0,0 +1,190 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "source": [
+    "# Split Tox21 with Stratification\n",
+    "\n",
+    "In this notebook, we will split the Tox21 dataset using stratification. We will use the `e_strat` keyword and split the dataset into 2 splits. Therefore, we first import all necessary tools."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "import deepchem as dc\n",
+    "from datasail.sail import datasail"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Load Tox21 Dataset\n",
+    "\n",
+    "We will load the Tox21 dataset and convert it to a pandas dataframe. We will then rename the columns to match the sub-challenge names of Tox21 and reduce the dataframe to the one target we are interested in, which is the `SR-ARE` target."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "              ID                                             SMILES  SR-ARE\n0     Comp000001                       CCOc1ccc2nc(S(N)(=O)=O)sc2c1     1.0\n1     Comp000002                          CCN1C(=O)NC(c2ccccc2)C1=O     0.0\n2     Comp000003  CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]...     0.0\n3     Comp000004                    CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C     0.0\n4     Comp000005                          CC(O)(P(=O)(O)O)P(=O)(O)O     0.0\n...          ...                                                ...     ...\n7826  Comp007827  CCOc1nc2cccc(C(=O)O)c2n1Cc1ccc(-c2ccccc2-c2nnn...     0.0\n7827  Comp007828  CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(...     0.0\n7828  Comp007829  C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C...     1.0\n7829  Comp007830  C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C...     0.0\n7830  Comp007831            COc1ccc2c(c1OC)CN1CCc3cc4c(cc3C1C2)OCO4     0.0\n\n[7831 rows x 3 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>ID</th>\n      <th>SMILES</th>\n      <th>SR-ARE</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Comp000001</td>\n      <td>CCOc1ccc2nc(S(N)(=O)=O)sc2c1</td>\n      <td>1.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Comp000002</td>\n      <td>CCN1C(=O)NC(c2ccccc2)C1=O</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Comp000003</td>\n      <td>CC[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]...</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Comp000004</td>\n      <td>CCCN(CC)C(CC)C(=O)Nc1c(C)cccc1C</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Comp000005</td>\n      <td>CC(O)(P(=O)(O)O)P(=O)(O)O</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>7826</th>\n      <td>Comp007827</td>\n      <td>CCOc1nc2cccc(C(=O)O)c2n1Cc1ccc(-c2ccccc2-c2nnn...</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>7827</th>\n      <td>Comp007828</td>\n      <td>CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(...</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>7828</th>\n      <td>Comp007829</td>\n      <td>C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C...</td>\n      <td>1.0</td>\n    </tr>\n    <tr>\n      <th>7829</th>\n      <td>Comp007830</td>\n      <td>C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C...</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>7830</th>\n      <td>Comp007831</td>\n      <td>COc1ccc2c(c1OC)CN1CCc3cc4c(cc3C1C2)OCO4</td>\n      <td>0.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>7831 rows × 3 columns</p>\n</div>"
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset = dc.molnet.load_tox21(featurizer=dc.feat.DummyFeaturizer(), splitter=None)[1][0]\n",
+    "df = dataset.to_dataframe()\n",
+    "name_map = dict([(f\"y{i + 1}\", task) for i, task in enumerate(dataset.tasks)] + [(\"y\", dataset.tasks[0]), (\"X\", \"SMILES\")])\n",
+    "df.rename(columns=name_map, inplace=True)\n",
+    "df.rename(columns=dict([(\"y\", dataset.tasks[0]), (\"X\", \"SMILES\")]), inplace=True)\n",
+    "df[\"ID\"] = [f\"Comp{i + 1:06d}\" for i in range(len(df))]\n",
+    "df = df[[\"ID\", \"SMILES\", \"SR-ARE\"]]\n",
+    "df"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Run DataSAIL\n",
+    "\n",
+    "Use DataSAIL to split the data into an identity-based single cold split (I1e) and a cluster-based single cold split (C1e). We define\n",
+    "  - the techniques as list: C1e\n",
+    "    The e in the end is important to split the e-data.\n",
+    "  - the spits as list. The values will be normalized to ratios.\n",
+    "  - the names as list. Similarly to the list of split sizes, DataSAIL needs names to name the splits.\n",
+    "  - the number of runs. This will determine how many different splits to compute per technique to compute.\n",
+    "  - the solving algorithm for optimizing the final problem formulation.\n",
+    "  - the type of the dataset in the first axis.\n",
+    "  - the data as mapping from IDs to SMILES strings.\n",
+    "  - a mapping of sample names to the stratification target values.\n",
+    "\n",
+    "For an extensive description of the arguments please refer to the according pages of the documentation.\n",
+    "\n",
+    "Given there exist files storing the data and distance as described in the documentation, the according call to DataSAIL in the commandline would be:\n",
+    "```bash\n",
+    "$ datasail -t C1e -s 8 2 -n train test -r 3 --solver SCIP --e-type M --e-data <filepath> --e-strat <filepath>\n",
+    "```"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "e_splits, _, _ = datasail(\n",
+    "    techniques=[\"C1e\"],\n",
+    "    splits=[8, 2],\n",
+    "    names=[\"train\", \"test\"],\n",
+    "    runs=3,\n",
+    "    solver=\"SCIP\",\n",
+    "    e_type=\"M\",\n",
+    "    e_data=dict(df[[\"ID\", \"SMILES\"]].values.tolist()),\n",
+    "    e_strat=dict(df[[\"ID\", \"SR-ARE\"]].values.tolist()),\n",
+    ")"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### The output\n",
+    "\n",
+    "Finally, we inspect the e_split object as this holds all the assignments of the datapoints to the splits, for each run and each technique. First, the overall architecture is described, lastly we look at the first five assignments of the C1 run 0."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'dict'>\n",
+      "C1e - Type: <class 'list'> - Length: 3\n",
+      "\tRun 1 - Type: <class 'dict'> - 7827 assignments\n",
+      "\tRun 2 - Type: <class 'dict'> - 7827 assignments\n",
+      "\tRun 3 - Type: <class 'dict'> - 7827 assignments\n",
+      "\n",
+      "ID: Comp000001 - Split: train\n",
+      "ID: Comp000002 - Split: train\n",
+      "ID: Comp000003 - Split: test\n",
+      "ID: Comp000004 - Split: train\n",
+      "ID: Comp000005 - Split: test\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(type(e_splits))\n",
+    "for key in e_splits.keys():\n",
+    "    print(f\"{key} - Type: {type(e_splits[key])} - Length: {len(e_splits[key])}\")\n",
+    "    for run in range(len(e_splits[key])):\n",
+    "        print(f\"\\tRun {run + 1} - Type: {type(e_splits[key][run])} - {len(e_splits[key][run])} assignments\")\n",
+    "print(\"\\n\" + \"\\n\".join(f\"ID: {idx} - Split: {split}\" for idx, split in list(e_splits[key][0].items())[:5]))"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}