Merge branch 'development' of https://github.com/theislab/multibind i…

…nto development
theislab · Aug 9, 2024 · daa53ab · daa53ab
2 parents e29876a + 38d462c
commit daa53ab
Show file tree

Hide file tree

Showing 3 changed files with 137 additions and 25 deletions.
diff --git a/mubind/tl/__init__.py b/mubind/tl/__init__.py
@@ -38,4 +38,4 @@
 )
 from .probound import load_probound
 
-from .graph import compute_contributions, metric_scramble_comparison, normalized_alignment_score
+from .graph import compute_contributions, metric_scramble_comparison
diff --git a/mubind/tl/graph.py b/mubind/tl/graph.py
@@ -2,38 +2,88 @@
 import pandas as pd
 import numpy as np
 
-'''
-We want to understand, how each filter contributes to the overall result together with C @ D := H.
+def compute_contributions(A, G, D, use_hadamard=True):
+    """Computes contribution scores for activities linked to a filter.
 
-Since the i-th row of (A @ H), now denoted as (A @ H)_{i,:}, is nothing but (H.T @ A.T_{:,i}).T, (X_{:,i} is the i-th column of X) we want to compute the matrix vector product between H.T and the i-th column of A.T and find out how much it scales.
-We'll later normalize this with the maximum singular value of H.
-'''
+    Arguments:
+    ---------
+    A: : `torch.Tensor`
+        Activities matrix.
+    G: `torch.Tensor`
+        Graph matrix.
+    D: `torch.Tensor`
+        Graph scaling matrix.
+    use_hadamard: `bool` (default: `True`)
+        Use hadamard product instead of matrix multiplication.
 
-def compute_contributions(A, G, D, use_hadamard=True):
-    H = G * D if use_hadamard else G @ D
+    Returns:
+    -------
+    indices: `torch.Tensor`
+        Indices of the contributions sorted by their absolute values descendingly
+    contributions: `torch.Tensor`
+        Contribution scores for each column of matrix A
+    max_singular_value: `float`
+        Maximum singular value of the matrix H.T with H = G * D or H = G @ D
     """
-    efficient implementation:
+
+    # check if input matrices are torch tensors
+    if not isinstance(A, torch.Tensor) or not isinstance(G, torch.Tensor) or not isinstance(D, torch.Tensor):
+        raise TypeError("A, G, and D must be torch tensors")
+    # check if input matrices are not empty
+    if A.numel() == 0 or G.numel() == 0 or D.numel() == 0:
+        raise ValueError("A, G, and D must not be empty")
+    # check if dimensions are correct
+    if A.shape[1] != G.shape[0]:
+        raise ValueError("Dimension mismatch: number of columns of A must match the number of rows of G")
+    if not use_hadamard and G.shape[1] != D.shape[0]:
+        raise ValueError("Dimension mismatch: number of columns of G must match the number of rows of D")
+    if use_hadamard and G.shape != D.shape:
+        raise ValueError("Dimension mismatch: G and D must have the same shape when use_hadamard is True")
+
+    H = G * D if use_hadamard else G @ D
     contributions = torch.norm(A @ H, dim=1) / torch.norm(A, dim=1)
-    """
-    # this implementation for contributions is not efficient, but easier to interpret
+
+    ''' 
+    this implementation for contributions is not efficient, but easier to interpret
     contributions = torch.zeros(A.T.shape[1]) # number of columns of A.T
     i = 0
     for column in torch.unbind(A.T, dim=1):
         contributions[i] = (torch.norm((H.T @ column)) / torch.norm(column)).item()
         i += 1
-    max_singular_value = np.linalg.svd(H.to_dense().detach().numpy(), compute_uv=False)[0]# torch.max(torch.abs(torch.linalg.eigvals(H.to_dense()))).item()
+    '''
+
+    max_singular_value = np.linalg.svd(H.T.to_dense().detach().numpy(), compute_uv=False)[0]
     # sort the contributions by their absolute values descendingly
     _, indices = torch.sort(contributions, descending=True)
     return indices, contributions, max_singular_value
 
 
-# evaluate_metric function, that compares the metric of the original matrix with the metric of the scrambled matrices
 def metric_scramble_comparison(C,
                                D,
                                metric,
                                scramble_type,
                                n_scrambles=1000,
                                verbose=True):
+    """Comparing metric scores of the original matrix with metric scores of scrambled matrices
+
+    Arguments:
+    ---------
+    C: : `torch.Tensor`
+        Graph matrix.
+    D: `torch.Tensor`
+        Graph scaling matrix.
+    scramble_type: `str`
+        Type of scrambling: 'flat', 'row', or 'column'
+    n_scrambles: `int` (default: `1000`)
+        Number of scrambled matrices to compare
+    verbose: `bool` (default: `True`)
+        Print summary statistics of the scores of scrambled matrices and the score of the original matrix
+
+    Returns:
+    -------
+    scores_scrambled_df: `pandas.DataFrame`
+        Results of the metric scores of the scrambled matrices
+    """
     if C.is_sparse:
         C = C.to_dense()
     if D.is_sparse:
@@ -55,15 +105,4 @@ def metric_scramble_comparison(C,
         print(f"Summary statistics of the scores of scrambled matrices: \n{scores_scrambled_df.describe()} \n \n")
         print(f"This is the score of the original matrix: {score_D}")
 
-    return scores_scrambled_df
-
-
-def normalized_alignment_score(C, D):
-    # Compute the element-wise product and then sum all elements
-    numerator = torch.sum(C * D)
-    # Compute the Frobenius norms of C and D
-    norm_C = torch.norm(C, p='fro')
-    norm_D = torch.norm(D, p='fro')
-    # Compute the normalized alignment score
-    score = numerator / (norm_C * norm_D)
-    return score.item()
+    return scores_scrambled_df
diff --git a/tests/test_graph_contribution.py b/tests/test_graph_contribution.py
@@ -0,0 +1,73 @@
+import numpy as np
+import pandas as pd
+import torch
+from mubind.tl.graph import compute_contributions
+import pytest
+
+def test_compute_contributions_hadamard():
+    A = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+    G = torch.tensor([[1.0, 0.5], [0.5, 1.0]])
+    D = torch.tensor([[0.5, 1.0], [1.0, 0.5]])
+
+    indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=True)
+
+    assert indices.shape == contributions.shape
+    assert len(contributions) == A.shape[0]
+    assert max_singular_value >= 0
+
+def test_compute_contributions_no_hadamard():
+    A = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+    G = torch.tensor([[1.0, 0.5], [0.5, 1.0]])
+    D = torch.tensor([[0.5, 1.0], [1.0, 0.5]])
+
+    indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=False)
+
+    assert indices.shape == contributions.shape
+    assert len(contributions) == A.shape[0]
+    assert max_singular_value >= 0
+
+def test_compute_contributions_empty():
+    A = torch.tensor([[]])
+    G = torch.tensor([[]])
+    D = torch.tensor([[]])
+
+    with pytest.raises(ValueError):
+        compute_contributions(A, G, D)
+
+def test_compute_contributions_single_value():
+    A = torch.tensor([[1.0]])
+    G = torch.tensor([[2.0]])
+    D = torch.tensor([[3.0]])
+
+    indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=True)
+
+    assert indices.shape == contributions.shape
+    assert len(contributions) == A.shape[0]
+    assert max_singular_value >= 0
+
+def test_compute_contributions_large_matrix():
+    A = torch.rand(100, 100)
+    G = torch.rand(100, 100)
+    D = torch.rand(100, 100)
+
+    indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=False)
+
+    assert indices.shape == contributions.shape
+    assert len(contributions) == A.shape[0]
+    assert max_singular_value >= 0
+
+def test_compute_contributions_different_dimensions_hadamard():
+    A = torch.rand(2, 3)
+    G = torch.rand(3, 2)
+    D = torch.rand(2, 3)
+
+    with pytest.raises(ValueError):
+        compute_contributions(A, G, D, use_hadamard=True)
+
+def test_compute_contributions_different_dimensions_no_hadamard():
+    A = torch.rand(2, 4)
+    G = torch.rand(3, 3)
+    D = torch.rand(3, 3)
+
+    with pytest.raises(ValueError):
+        compute_contributions(A, G, D, use_hadamard=False)