functionality for custom losses, kernel_rel, sym_weights, PBM with du…

…al batches, and others.
theislab · Jan 23, 2023 · 270dc2a · 270dc2a
1 parent dc8200f
commit 270dc2a
Show file tree

Hide file tree

Showing 8 changed files with 4,242 additions and 3,276 deletions.
diff --git a/mubind/models/models.py b/mubind/models/models.py
diff --git a/mubind/pl/__init__.py b/mubind/pl/__init__.py
@@ -6,7 +6,7 @@
     create_logo,
     kmer_enrichment,
     activities,
-    plot_loss,
+    loss,
     scatter,
     alignment_protein,
     R2_per_protein,

diff --git a/mubind/pl/plotting.py b/mubind/pl/plotting.py
@@ -484,7 +484,7 @@ def activities(model, n_rows=None, n_cols=None, batch_i=0, batch_names=None, fig
     plt.show()
 
 
-def plot_loss(model):
+def loss(model):
     h, c = model.loss_history, model.loss_color
     for i in range(len(h) - 2):
         plt.plot([i, i + 1], h[i : i + 2], c=c[i])

diff --git a/mubind/tl/aggregation.py b/mubind/tl/aggregation.py
@@ -165,15 +165,15 @@ def submatrix(m, start, length, flip, filter_neg_weights=True):
 
 
 # @jit
-def distances_dataframe(a, b, min_w_sum=0):
+def distances_dataframe(a, b, min_w_sum=0, **kwargs):
     d = []
     min_w = min(a.shape[-1], b.shape[-1])
     # k = min_w
     # lowest_d = -1, -1
     for k in np.arange(5, min_w):
         # print(k)
         for i in np.arange(0, a.shape[-1] - k + 1):
-            ai = submatrix(a, i, k, 0)
+            ai = submatrix(a, i, k, 0, **kwargs)
             ai_sum = ai.sum()
             if ai_sum < min_w_sum:
                 continue
@@ -182,7 +182,7 @@ def distances_dataframe(a, b, min_w_sum=0):
                 continue
             for j in np.arange(0, b.shape[-1] - k + 1):
                 # print(i, j)
-                bi = submatrix(b, j, k, 0)
+                bi = submatrix(b, j, k, 0, **kwargs)
                 bi_sum = bi.sum()
                 if bi_sum < min_w_sum:
                     continue
@@ -194,29 +194,29 @@ def distances_dataframe(a, b, min_w_sum=0):
                 # if lowest_d[-1] == -1 or d[-1] < lowest_d[-1] or d[-2] < lowest_d[-1]:
                 #     lowest_d = i, 0, d[-1]
 
-                bi_rev = submatrix(b, j, k, 1)
+                bi_rev = submatrix(b, j, k, 1, **kwargs)
                 # flipped version
                 d2 = ((bi_rev - ai) ** 2).sum() / bi.shape[-1]
                 d.append([i, j, k, ai.shape[-1], bi.shape[-1],
                           ai.sum(), bi.sum(), 1, d2])
                 # if lowest_d[-1] == -1 or d[-1] < lowest_d[-1] or d[-2] < lowest_d[-1]:
                 #     lowest_d = i, 1, d[-1]
 
-        res = pd.DataFrame(d, columns=['a_start', 'b_start', 'k', 'a_shape', 'b_shape',
-                                       'a_sum', 'b_sum', 'b_flip', 'distance']).sort_values('distance')
+    res = pd.DataFrame(d, columns=['a_start', 'b_start', 'k', 'a_shape', 'b_shape',
+                                   'a_sum', 'b_sum', 'b_flip', 'distance']).sort_values('distance')
     return res
 
-def calculate_distances(mono_list, full=False, best=False):
+def calculate_distances(mono_list, full=False, best=False, **kwargs):
     res = []
     for a, b in itertools.product(enumerate(mono_list), repeat=2):
         # print(a[0], b[0])
         if not full and a[0] > b[0]:
             continue
-        df2 = mb.tl.distances_dataframe(a[1], b[1])
+        df2 = mb.tl.distances_dataframe(a[1], b[1], **kwargs)
         df2['a'] = a[0]
         df2['b'] = b[0]
         res.append(df2)
-        df3 = mb.tl.distances_dataframe(b[1], a[1])
+        df3 = mb.tl.distances_dataframe(b[1], a[1], **kwargs)
         df3['a'] = b[0]
         df3['b'] = a[0]
         df3['id'] = df3['a'].astype(str) + '_' + df3['b'].astype(str)

diff --git a/mubind/tl/prediction.py b/mubind/tl/prediction.py
@@ -68,6 +68,7 @@ def test_network(model, dataloader, device):
             else:
                 inputs = {"mono": mononuc, "batch": b, "countsum": countsum}
 
+            inputs['scale_countsum'] = model.datatype == 'selex'
             output = model(**inputs)
 
             output = output.cpu().detach().numpy()
@@ -287,7 +288,6 @@ def scores(model, train, by=None, **kwargs):
 def kmer_enrichment(model, train, k=None, base_round=0, enr_round=-1, pseudo_count=1):
     # getting the targets and predictions from the model
     seqs, targets, pred = mb.tl.test_network(model, train, next(model.parameters()).device)
-
     counts = None
     target_labels = ["t" + str(i) for i in range(max(train.dataset.n_rounds))]
     pred_labels = ["p" + str(i) for i in range(max(train.dataset.n_rounds))]