Skip to content

Commit

Permalink
Merge branch 'development' of https://github.com/theislab/multibind i…
Browse files Browse the repository at this point in the history
…nto development
  • Loading branch information
ilibarra committed Aug 9, 2024
2 parents e29876a + 38d462c commit daa53ab
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 25 deletions.
2 changes: 1 addition & 1 deletion mubind/tl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@
)
from .probound import load_probound

from .graph import compute_contributions, metric_scramble_comparison, normalized_alignment_score
from .graph import compute_contributions, metric_scramble_comparison
87 changes: 63 additions & 24 deletions mubind/tl/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,88 @@
import pandas as pd
import numpy as np

'''
We want to understand, how each filter contributes to the overall result together with C @ D := H.
def compute_contributions(A, G, D, use_hadamard=True):
"""Computes contribution scores for activities linked to a filter.
Since the i-th row of (A @ H), now denoted as (A @ H)_{i,:}, is nothing but (H.T @ A.T_{:,i}).T, (X_{:,i} is the i-th column of X) we want to compute the matrix vector product between H.T and the i-th column of A.T and find out how much it scales.
We'll later normalize this with the maximum singular value of H.
'''
Arguments:
---------
A: : `torch.Tensor`
Activities matrix.
G: `torch.Tensor`
Graph matrix.
D: `torch.Tensor`
Graph scaling matrix.
use_hadamard: `bool` (default: `True`)
Use hadamard product instead of matrix multiplication.
def compute_contributions(A, G, D, use_hadamard=True):
H = G * D if use_hadamard else G @ D
Returns:
-------
indices: `torch.Tensor`
Indices of the contributions sorted by their absolute values descendingly
contributions: `torch.Tensor`
Contribution scores for each column of matrix A
max_singular_value: `float`
Maximum singular value of the matrix H.T with H = G * D or H = G @ D
"""
efficient implementation:

# check if input matrices are torch tensors
if not isinstance(A, torch.Tensor) or not isinstance(G, torch.Tensor) or not isinstance(D, torch.Tensor):
raise TypeError("A, G, and D must be torch tensors")
# check if input matrices are not empty
if A.numel() == 0 or G.numel() == 0 or D.numel() == 0:
raise ValueError("A, G, and D must not be empty")
# check if dimensions are correct
if A.shape[1] != G.shape[0]:
raise ValueError("Dimension mismatch: number of columns of A must match the number of rows of G")
if not use_hadamard and G.shape[1] != D.shape[0]:
raise ValueError("Dimension mismatch: number of columns of G must match the number of rows of D")
if use_hadamard and G.shape != D.shape:
raise ValueError("Dimension mismatch: G and D must have the same shape when use_hadamard is True")

H = G * D if use_hadamard else G @ D
contributions = torch.norm(A @ H, dim=1) / torch.norm(A, dim=1)
"""
# this implementation for contributions is not efficient, but easier to interpret

'''
this implementation for contributions is not efficient, but easier to interpret
contributions = torch.zeros(A.T.shape[1]) # number of columns of A.T
i = 0
for column in torch.unbind(A.T, dim=1):
contributions[i] = (torch.norm((H.T @ column)) / torch.norm(column)).item()
i += 1
max_singular_value = np.linalg.svd(H.to_dense().detach().numpy(), compute_uv=False)[0]# torch.max(torch.abs(torch.linalg.eigvals(H.to_dense()))).item()
'''

max_singular_value = np.linalg.svd(H.T.to_dense().detach().numpy(), compute_uv=False)[0]
# sort the contributions by their absolute values descendingly
_, indices = torch.sort(contributions, descending=True)
return indices, contributions, max_singular_value


# evaluate_metric function, that compares the metric of the original matrix with the metric of the scrambled matrices
def metric_scramble_comparison(C,
D,
metric,
scramble_type,
n_scrambles=1000,
verbose=True):
"""Comparing metric scores of the original matrix with metric scores of scrambled matrices
Arguments:
---------
C: : `torch.Tensor`
Graph matrix.
D: `torch.Tensor`
Graph scaling matrix.
scramble_type: `str`
Type of scrambling: 'flat', 'row', or 'column'
n_scrambles: `int` (default: `1000`)
Number of scrambled matrices to compare
verbose: `bool` (default: `True`)
Print summary statistics of the scores of scrambled matrices and the score of the original matrix
Returns:
-------
scores_scrambled_df: `pandas.DataFrame`
Results of the metric scores of the scrambled matrices
"""
if C.is_sparse:
C = C.to_dense()
if D.is_sparse:
Expand All @@ -55,15 +105,4 @@ def metric_scramble_comparison(C,
print(f"Summary statistics of the scores of scrambled matrices: \n{scores_scrambled_df.describe()} \n \n")
print(f"This is the score of the original matrix: {score_D}")

return scores_scrambled_df


def normalized_alignment_score(C, D):
# Compute the element-wise product and then sum all elements
numerator = torch.sum(C * D)
# Compute the Frobenius norms of C and D
norm_C = torch.norm(C, p='fro')
norm_D = torch.norm(D, p='fro')
# Compute the normalized alignment score
score = numerator / (norm_C * norm_D)
return score.item()
return scores_scrambled_df
73 changes: 73 additions & 0 deletions tests/test_graph_contribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import numpy as np
import pandas as pd
import torch
from mubind.tl.graph import compute_contributions
import pytest

def test_compute_contributions_hadamard():
A = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
G = torch.tensor([[1.0, 0.5], [0.5, 1.0]])
D = torch.tensor([[0.5, 1.0], [1.0, 0.5]])

indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=True)

assert indices.shape == contributions.shape
assert len(contributions) == A.shape[0]
assert max_singular_value >= 0

def test_compute_contributions_no_hadamard():
A = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
G = torch.tensor([[1.0, 0.5], [0.5, 1.0]])
D = torch.tensor([[0.5, 1.0], [1.0, 0.5]])

indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=False)

assert indices.shape == contributions.shape
assert len(contributions) == A.shape[0]
assert max_singular_value >= 0

def test_compute_contributions_empty():
A = torch.tensor([[]])
G = torch.tensor([[]])
D = torch.tensor([[]])

with pytest.raises(ValueError):
compute_contributions(A, G, D)

def test_compute_contributions_single_value():
A = torch.tensor([[1.0]])
G = torch.tensor([[2.0]])
D = torch.tensor([[3.0]])

indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=True)

assert indices.shape == contributions.shape
assert len(contributions) == A.shape[0]
assert max_singular_value >= 0

def test_compute_contributions_large_matrix():
A = torch.rand(100, 100)
G = torch.rand(100, 100)
D = torch.rand(100, 100)

indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=False)

assert indices.shape == contributions.shape
assert len(contributions) == A.shape[0]
assert max_singular_value >= 0

def test_compute_contributions_different_dimensions_hadamard():
A = torch.rand(2, 3)
G = torch.rand(3, 2)
D = torch.rand(2, 3)

with pytest.raises(ValueError):
compute_contributions(A, G, D, use_hadamard=True)

def test_compute_contributions_different_dimensions_no_hadamard():
A = torch.rand(2, 4)
G = torch.rand(3, 3)
D = torch.rand(3, 3)

with pytest.raises(ValueError):
compute_contributions(A, G, D, use_hadamard=False)

0 comments on commit daa53ab

Please sign in to comment.