Skip to content

Commit

Permalink
functionality for custom losses, kernel_rel, sym_weights, PBM with du…
Browse files Browse the repository at this point in the history
…al batches, and others.
  • Loading branch information
ilibarra committed Jan 23, 2023
1 parent dc8200f commit 270dc2a
Show file tree
Hide file tree
Showing 8 changed files with 4,242 additions and 3,276 deletions.
323 changes: 246 additions & 77 deletions mubind/models/models.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion mubind/pl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
create_logo,
kmer_enrichment,
activities,
plot_loss,
loss,
scatter,
alignment_protein,
R2_per_protein,
Expand Down
2 changes: 1 addition & 1 deletion mubind/pl/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ def activities(model, n_rows=None, n_cols=None, batch_i=0, batch_names=None, fig
plt.show()


def plot_loss(model):
def loss(model):
h, c = model.loss_history, model.loss_color
for i in range(len(h) - 2):
plt.plot([i, i + 1], h[i : i + 2], c=c[i])
Expand Down
18 changes: 9 additions & 9 deletions mubind/tl/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,15 +165,15 @@ def submatrix(m, start, length, flip, filter_neg_weights=True):


# @jit
def distances_dataframe(a, b, min_w_sum=0):
def distances_dataframe(a, b, min_w_sum=0, **kwargs):
d = []
min_w = min(a.shape[-1], b.shape[-1])
# k = min_w
# lowest_d = -1, -1
for k in np.arange(5, min_w):
# print(k)
for i in np.arange(0, a.shape[-1] - k + 1):
ai = submatrix(a, i, k, 0)
ai = submatrix(a, i, k, 0, **kwargs)
ai_sum = ai.sum()
if ai_sum < min_w_sum:
continue
Expand All @@ -182,7 +182,7 @@ def distances_dataframe(a, b, min_w_sum=0):
continue
for j in np.arange(0, b.shape[-1] - k + 1):
# print(i, j)
bi = submatrix(b, j, k, 0)
bi = submatrix(b, j, k, 0, **kwargs)
bi_sum = bi.sum()
if bi_sum < min_w_sum:
continue
Expand All @@ -194,29 +194,29 @@ def distances_dataframe(a, b, min_w_sum=0):
# if lowest_d[-1] == -1 or d[-1] < lowest_d[-1] or d[-2] < lowest_d[-1]:
# lowest_d = i, 0, d[-1]

bi_rev = submatrix(b, j, k, 1)
bi_rev = submatrix(b, j, k, 1, **kwargs)
# flipped version
d2 = ((bi_rev - ai) ** 2).sum() / bi.shape[-1]
d.append([i, j, k, ai.shape[-1], bi.shape[-1],
ai.sum(), bi.sum(), 1, d2])
# if lowest_d[-1] == -1 or d[-1] < lowest_d[-1] or d[-2] < lowest_d[-1]:
# lowest_d = i, 1, d[-1]

res = pd.DataFrame(d, columns=['a_start', 'b_start', 'k', 'a_shape', 'b_shape',
'a_sum', 'b_sum', 'b_flip', 'distance']).sort_values('distance')
res = pd.DataFrame(d, columns=['a_start', 'b_start', 'k', 'a_shape', 'b_shape',
'a_sum', 'b_sum', 'b_flip', 'distance']).sort_values('distance')
return res

def calculate_distances(mono_list, full=False, best=False):
def calculate_distances(mono_list, full=False, best=False, **kwargs):
res = []
for a, b in itertools.product(enumerate(mono_list), repeat=2):
# print(a[0], b[0])
if not full and a[0] > b[0]:
continue
df2 = mb.tl.distances_dataframe(a[1], b[1])
df2 = mb.tl.distances_dataframe(a[1], b[1], **kwargs)
df2['a'] = a[0]
df2['b'] = b[0]
res.append(df2)
df3 = mb.tl.distances_dataframe(b[1], a[1])
df3 = mb.tl.distances_dataframe(b[1], a[1], **kwargs)
df3['a'] = b[0]
df3['b'] = a[0]
df3['id'] = df3['a'].astype(str) + '_' + df3['b'].astype(str)
Expand Down
2 changes: 1 addition & 1 deletion mubind/tl/prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def test_network(model, dataloader, device):
else:
inputs = {"mono": mononuc, "batch": b, "countsum": countsum}

inputs['scale_countsum'] = model.datatype == 'selex'
output = model(**inputs)

output = output.cpu().detach().numpy()
Expand Down Expand Up @@ -287,7 +288,6 @@ def scores(model, train, by=None, **kwargs):
def kmer_enrichment(model, train, k=None, base_round=0, enr_round=-1, pseudo_count=1):
# getting the targets and predictions from the model
seqs, targets, pred = mb.tl.test_network(model, train, next(model.parameters()).device)

counts = None
target_labels = ["t" + str(i) for i in range(max(train.dataset.n_rounds))]
pred_labels = ["p" + str(i) for i in range(max(train.dataset.n_rounds))]
Expand Down
Loading

0 comments on commit 270dc2a

Please sign in to comment.