From 27296f1c61918f1eeecd838947c5a6a3cb788b73 Mon Sep 17 00:00:00 2001
From: Lennart Redl <lennredl@gmail.com>
Date: Thu, 8 Aug 2024 11:01:18 +0200
Subject: [PATCH 1/4] Added tests for graph_contribution

---
 mubind/tl/graph.py               | 40 +++++++++--------
 tests/test_graph_contribution.py | 73 ++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 18 deletions(-)
 create mode 100644 tests/test_graph_contribution.py

diff --git a/mubind/tl/graph.py b/mubind/tl/graph.py
index a1e7b41..3baca18 100644
--- a/mubind/tl/graph.py
+++ b/mubind/tl/graph.py
@@ -10,24 +10,39 @@
 '''
 
 def compute_contributions(A, G, D, use_hadamard=True):
+    # check if input matrices are torch tensors
+    if not isinstance(A, torch.Tensor) or not isinstance(G, torch.Tensor) or not isinstance(D, torch.Tensor):
+        raise TypeError("A, G, and D must be torch tensors")
+    # check if input matrices are not empty
+    if A.numel() == 0 or G.numel() == 0 or D.numel() == 0:
+        raise ValueError("A, G, and D must not be empty")
+    # check if dimensions are correct
+    if A.shape[1] != G.shape[0]:
+        raise ValueError("Dimension mismatch: number of columns of A must match the number of rows of G")
+    if not use_hadamard and G.shape[1] != D.shape[0]:
+        raise ValueError("Dimension mismatch: number of columns of G must match the number of rows of D")
+    if use_hadamard and G.shape != D.shape:
+        raise ValueError("Dimension mismatch: G and D must have the same shape when use_hadamard is True")
+    
     H = G * D if use_hadamard else G @ D
-    """
-    efficient implementation:
     contributions = torch.norm(A @ H, dim=1) / torch.norm(A, dim=1)
-    """
-    # this implementation for contributions is not efficient, but easier to interpret
+
+    ''' 
+    this implementation for contributions is not efficient, but easier to interpret
     contributions = torch.zeros(A.T.shape[1]) # number of columns of A.T
     i = 0
     for column in torch.unbind(A.T, dim=1):
         contributions[i] = (torch.norm((H.T @ column)) / torch.norm(column)).item()
         i += 1
-    max_singular_value = np.linalg.svd(H.to_dense().detach().numpy(), compute_uv=False)[0]# torch.max(torch.abs(torch.linalg.eigvals(H.to_dense()))).item()
+    '''
+
+    max_singular_value = np.linalg.svd(H.T.to_dense().detach().numpy(), compute_uv=False)[0]
     # sort the contributions by their absolute values descendingly
     _, indices = torch.sort(contributions, descending=True)
     return indices, contributions, max_singular_value
 
 
-# evaluate_metric function, that compares the metric of the original matrix with the metric of the scrambled matrices
+# comparing the metric scores of the original matrix with metric scores of scrambled matrices
 def metric_scramble_comparison(C,
                                D,
                                metric,
@@ -55,15 +70,4 @@ def metric_scramble_comparison(C,
         print(f"Summary statistics of the scores of scrambled matrices: \n{scores_scrambled_df.describe()} \n \n")
         print(f"This is the score of the original matrix: {score_D}")
     
-    return scores_scrambled_df
-
-
-def normalized_alignment_score(C, D):
-    # Compute the element-wise product and then sum all elements
-    numerator = torch.sum(C * D)
-    # Compute the Frobenius norms of C and D
-    norm_C = torch.norm(C, p='fro')
-    norm_D = torch.norm(D, p='fro')
-    # Compute the normalized alignment score
-    score = numerator / (norm_C * norm_D)
-    return score.item()
\ No newline at end of file
+    return scores_scrambled_df
\ No newline at end of file
diff --git a/tests/test_graph_contribution.py b/tests/test_graph_contribution.py
new file mode 100644
index 0000000..312aaae
--- /dev/null
+++ b/tests/test_graph_contribution.py
@@ -0,0 +1,73 @@
+import numpy as np
+import pandas as pd
+import torch
+from mubind.tl.graph import compute_contributions
+import pytest
+
+def test_compute_contributions_hadamard():
+    A = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+    G = torch.tensor([[1.0, 0.5], [0.5, 1.0]])
+    D = torch.tensor([[0.5, 1.0], [1.0, 0.5]])
+
+    indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=True)
+    
+    assert indices.shape == contributions.shape
+    assert len(contributions) == A.shape[0]
+    assert max_singular_value >= 0
+
+def test_compute_contributions_no_hadamard():
+    A = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+    G = torch.tensor([[1.0, 0.5], [0.5, 1.0]])
+    D = torch.tensor([[0.5, 1.0], [1.0, 0.5]])
+
+    indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=False)
+    
+    assert indices.shape == contributions.shape
+    assert len(contributions) == A.shape[0]
+    assert max_singular_value >= 0
+
+def test_compute_contributions_empty():
+    A = torch.tensor([[]])
+    G = torch.tensor([[]])
+    D = torch.tensor([[]])
+
+    with pytest.raises(ValueError):
+        compute_contributions(A, G, D)
+
+def test_compute_contributions_single_value():
+    A = torch.tensor([[1.0]])
+    G = torch.tensor([[2.0]])
+    D = torch.tensor([[3.0]])
+
+    indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=True)
+    
+    assert indices.shape == contributions.shape
+    assert len(contributions) == A.shape[0]
+    assert max_singular_value >= 0
+
+def test_compute_contributions_large_matrix():
+    A = torch.rand(100, 100)
+    G = torch.rand(100, 100)
+    D = torch.rand(100, 100)
+
+    indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=False)
+    
+    assert indices.shape == contributions.shape
+    assert len(contributions) == A.shape[0]
+    assert max_singular_value >= 0
+
+def test_compute_contributions_different_dimensions_hadamard():
+    A = torch.rand(2, 3)
+    G = torch.rand(3, 2)
+    D = torch.rand(2, 3)
+
+    with pytest.raises(ValueError):
+        compute_contributions(A, G, D, use_hadamard=True)
+
+def test_compute_contributions_different_dimensions_no_hadamard():
+    A = torch.rand(2, 4)
+    G = torch.rand(3, 3)
+    D = torch.rand(3, 3)
+
+    with pytest.raises(ValueError):
+        compute_contributions(A, G, D, use_hadamard=False)
\ No newline at end of file

From e89c190578accb3f28847d7bc5ce9427198bb240 Mon Sep 17 00:00:00 2001
From: Lennart Redl <lennredl@gmail.com>
Date: Thu, 8 Aug 2024 11:14:03 +0200
Subject: [PATCH 2/4] Added docstrings for compute_contributions

---
 mubind/tl/graph.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/mubind/tl/graph.py b/mubind/tl/graph.py
index 3baca18..cca0774 100644
--- a/mubind/tl/graph.py
+++ b/mubind/tl/graph.py
@@ -2,14 +2,30 @@
 import pandas as pd
 import numpy as np
 
-'''
-We want to understand, how each filter contributes to the overall result together with C @ D := H.
+def compute_contributions(A, G, D, use_hadamard=True):
+    """Computes contribution scores for activities linked to a filter.
 
-Since the i-th row of (A @ H), now denoted as (A @ H)_{i,:}, is nothing but (H.T @ A.T_{:,i}).T, (X_{:,i} is the i-th column of X) we want to compute the matrix vector product between H.T and the i-th column of A.T and find out how much it scales.
-We'll later normalize this with the maximum singular value of H.
-'''
+    Arguments:
+    ---------
+    A: : `torch.Tensor`
+        Activities matrix.
+    G: `torch.Tensor`
+        Graph matrix.
+    D: `torch.Tensor`
+        Graph scaling matrix.
+    use_hadamard: `bool` (default: `True`)
+        Use hadamard product instead of matrix multiplication.
+
+    Returns:
+    -------
+    indices: `torch.Tensor`
+        Indices of the contributions sorted by their absolute values descendingly
+    contributions: `torch.Tensor`
+        Contribution scores for each column of matrix A
+    max_singular_value: `float`
+        Maximum singular value of the matrix H.T with H = G * D or H = G @ D
+    """
 
-def compute_contributions(A, G, D, use_hadamard=True):
     # check if input matrices are torch tensors
     if not isinstance(A, torch.Tensor) or not isinstance(G, torch.Tensor) or not isinstance(D, torch.Tensor):
         raise TypeError("A, G, and D must be torch tensors")

From 3e064c355735db9b109e488b918b874e0f642672 Mon Sep 17 00:00:00 2001
From: Lennart Redl <lennredl@gmail.com>
Date: Thu, 8 Aug 2024 16:06:14 +0200
Subject: [PATCH 3/4] Added docstring for metric_scramble_comparison

---
 mubind/tl/graph.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/mubind/tl/graph.py b/mubind/tl/graph.py
index cca0774..f6e1921 100644
--- a/mubind/tl/graph.py
+++ b/mubind/tl/graph.py
@@ -58,13 +58,32 @@ def compute_contributions(A, G, D, use_hadamard=True):
     return indices, contributions, max_singular_value
 
 
-# comparing the metric scores of the original matrix with metric scores of scrambled matrices
 def metric_scramble_comparison(C,
                                D,
                                metric,
                                scramble_type,
                                n_scrambles=1000,
                                verbose=True):
+    """Comparing metric scores of the original matrix with metric scores of scrambled matrices
+
+    Arguments:
+    ---------
+    C: : `torch.Tensor`
+        Graph matrix.
+    D: `torch.Tensor`
+        Graph scaling matrix.
+    scramble_type: `str`
+        Type of scrambling: 'flat', 'row', or 'column'
+    n_scrambles: `int` (default: `1000`)
+        Number of scrambled matrices to compare
+    verbose: `bool` (default: `True`)
+        Print summary statistics of the scores of scrambled matrices and the score of the original matrix
+
+    Returns:
+    -------
+    scores_scrambled_df: `pandas.DataFrame`
+        Results of the metric scores of the scrambled matrices
+    """
     if C.is_sparse:
         C = C.to_dense()
     if D.is_sparse:

From bfcfc4ca74fd347f4ef0f9d8ef9a0cfbb578e516 Mon Sep 17 00:00:00 2001
From: Lennart Redl <lennredl@gmail.com>
Date: Thu, 8 Aug 2024 17:33:00 +0200
Subject: [PATCH 4/4] Omitted normalized_alignment_score in __init__

---
 mubind/tl/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mubind/tl/__init__.py b/mubind/tl/__init__.py
index f623fac..bb49117 100644
--- a/mubind/tl/__init__.py
+++ b/mubind/tl/__init__.py
@@ -38,4 +38,4 @@
 )
 from .probound import load_probound
 
-from .graph import compute_contributions, metric_scramble_comparison, normalized_alignment_score
\ No newline at end of file
+from .graph import compute_contributions, metric_scramble_comparison
\ No newline at end of file