From 27296f1c61918f1eeecd838947c5a6a3cb788b73 Mon Sep 17 00:00:00 2001 From: Lennart Redl Date: Thu, 8 Aug 2024 11:01:18 +0200 Subject: [PATCH 1/4] Added tests for graph_contribution --- mubind/tl/graph.py | 40 +++++++++-------- tests/test_graph_contribution.py | 73 ++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 18 deletions(-) create mode 100644 tests/test_graph_contribution.py diff --git a/mubind/tl/graph.py b/mubind/tl/graph.py index a1e7b41..3baca18 100644 --- a/mubind/tl/graph.py +++ b/mubind/tl/graph.py @@ -10,24 +10,39 @@ ''' def compute_contributions(A, G, D, use_hadamard=True): + # check if input matrices are torch tensors + if not isinstance(A, torch.Tensor) or not isinstance(G, torch.Tensor) or not isinstance(D, torch.Tensor): + raise TypeError("A, G, and D must be torch tensors") + # check if input matrices are not empty + if A.numel() == 0 or G.numel() == 0 or D.numel() == 0: + raise ValueError("A, G, and D must not be empty") + # check if dimensions are correct + if A.shape[1] != G.shape[0]: + raise ValueError("Dimension mismatch: number of columns of A must match the number of rows of G") + if not use_hadamard and G.shape[1] != D.shape[0]: + raise ValueError("Dimension mismatch: number of columns of G must match the number of rows of D") + if use_hadamard and G.shape != D.shape: + raise ValueError("Dimension mismatch: G and D must have the same shape when use_hadamard is True") + H = G * D if use_hadamard else G @ D - """ - efficient implementation: contributions = torch.norm(A @ H, dim=1) / torch.norm(A, dim=1) - """ - # this implementation for contributions is not efficient, but easier to interpret + + ''' + this implementation for contributions is not efficient, but easier to interpret contributions = torch.zeros(A.T.shape[1]) # number of columns of A.T i = 0 for column in torch.unbind(A.T, dim=1): contributions[i] = (torch.norm((H.T @ column)) / torch.norm(column)).item() i += 1 - max_singular_value = np.linalg.svd(H.to_dense().detach().numpy(), compute_uv=False)[0]# torch.max(torch.abs(torch.linalg.eigvals(H.to_dense()))).item() + ''' + + max_singular_value = np.linalg.svd(H.T.to_dense().detach().numpy(), compute_uv=False)[0] # sort the contributions by their absolute values descendingly _, indices = torch.sort(contributions, descending=True) return indices, contributions, max_singular_value -# evaluate_metric function, that compares the metric of the original matrix with the metric of the scrambled matrices +# comparing the metric scores of the original matrix with metric scores of scrambled matrices def metric_scramble_comparison(C, D, metric, @@ -55,15 +70,4 @@ def metric_scramble_comparison(C, print(f"Summary statistics of the scores of scrambled matrices: \n{scores_scrambled_df.describe()} \n \n") print(f"This is the score of the original matrix: {score_D}") - return scores_scrambled_df - - -def normalized_alignment_score(C, D): - # Compute the element-wise product and then sum all elements - numerator = torch.sum(C * D) - # Compute the Frobenius norms of C and D - norm_C = torch.norm(C, p='fro') - norm_D = torch.norm(D, p='fro') - # Compute the normalized alignment score - score = numerator / (norm_C * norm_D) - return score.item() \ No newline at end of file + return scores_scrambled_df \ No newline at end of file diff --git a/tests/test_graph_contribution.py b/tests/test_graph_contribution.py new file mode 100644 index 0000000..312aaae --- /dev/null +++ b/tests/test_graph_contribution.py @@ -0,0 +1,73 @@ +import numpy as np +import pandas as pd +import torch +from mubind.tl.graph import compute_contributions +import pytest + +def test_compute_contributions_hadamard(): + A = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) + G = torch.tensor([[1.0, 0.5], [0.5, 1.0]]) + D = torch.tensor([[0.5, 1.0], [1.0, 0.5]]) + + indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=True) + + assert indices.shape == contributions.shape + assert len(contributions) == A.shape[0] + assert max_singular_value >= 0 + +def test_compute_contributions_no_hadamard(): + A = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) + G = torch.tensor([[1.0, 0.5], [0.5, 1.0]]) + D = torch.tensor([[0.5, 1.0], [1.0, 0.5]]) + + indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=False) + + assert indices.shape == contributions.shape + assert len(contributions) == A.shape[0] + assert max_singular_value >= 0 + +def test_compute_contributions_empty(): + A = torch.tensor([[]]) + G = torch.tensor([[]]) + D = torch.tensor([[]]) + + with pytest.raises(ValueError): + compute_contributions(A, G, D) + +def test_compute_contributions_single_value(): + A = torch.tensor([[1.0]]) + G = torch.tensor([[2.0]]) + D = torch.tensor([[3.0]]) + + indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=True) + + assert indices.shape == contributions.shape + assert len(contributions) == A.shape[0] + assert max_singular_value >= 0 + +def test_compute_contributions_large_matrix(): + A = torch.rand(100, 100) + G = torch.rand(100, 100) + D = torch.rand(100, 100) + + indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=False) + + assert indices.shape == contributions.shape + assert len(contributions) == A.shape[0] + assert max_singular_value >= 0 + +def test_compute_contributions_different_dimensions_hadamard(): + A = torch.rand(2, 3) + G = torch.rand(3, 2) + D = torch.rand(2, 3) + + with pytest.raises(ValueError): + compute_contributions(A, G, D, use_hadamard=True) + +def test_compute_contributions_different_dimensions_no_hadamard(): + A = torch.rand(2, 4) + G = torch.rand(3, 3) + D = torch.rand(3, 3) + + with pytest.raises(ValueError): + compute_contributions(A, G, D, use_hadamard=False) \ No newline at end of file From e89c190578accb3f28847d7bc5ce9427198bb240 Mon Sep 17 00:00:00 2001 From: Lennart Redl Date: Thu, 8 Aug 2024 11:14:03 +0200 Subject: [PATCH 2/4] Added docstrings for compute_contributions --- mubind/tl/graph.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/mubind/tl/graph.py b/mubind/tl/graph.py index 3baca18..cca0774 100644 --- a/mubind/tl/graph.py +++ b/mubind/tl/graph.py @@ -2,14 +2,30 @@ import pandas as pd import numpy as np -''' -We want to understand, how each filter contributes to the overall result together with C @ D := H. +def compute_contributions(A, G, D, use_hadamard=True): + """Computes contribution scores for activities linked to a filter. -Since the i-th row of (A @ H), now denoted as (A @ H)_{i,:}, is nothing but (H.T @ A.T_{:,i}).T, (X_{:,i} is the i-th column of X) we want to compute the matrix vector product between H.T and the i-th column of A.T and find out how much it scales. -We'll later normalize this with the maximum singular value of H. -''' + Arguments: + --------- + A: : `torch.Tensor` + Activities matrix. + G: `torch.Tensor` + Graph matrix. + D: `torch.Tensor` + Graph scaling matrix. + use_hadamard: `bool` (default: `True`) + Use hadamard product instead of matrix multiplication. + + Returns: + ------- + indices: `torch.Tensor` + Indices of the contributions sorted by their absolute values descendingly + contributions: `torch.Tensor` + Contribution scores for each column of matrix A + max_singular_value: `float` + Maximum singular value of the matrix H.T with H = G * D or H = G @ D + """ -def compute_contributions(A, G, D, use_hadamard=True): # check if input matrices are torch tensors if not isinstance(A, torch.Tensor) or not isinstance(G, torch.Tensor) or not isinstance(D, torch.Tensor): raise TypeError("A, G, and D must be torch tensors") From 3e064c355735db9b109e488b918b874e0f642672 Mon Sep 17 00:00:00 2001 From: Lennart Redl Date: Thu, 8 Aug 2024 16:06:14 +0200 Subject: [PATCH 3/4] Added docstring for metric_scramble_comparison --- mubind/tl/graph.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/mubind/tl/graph.py b/mubind/tl/graph.py index cca0774..f6e1921 100644 --- a/mubind/tl/graph.py +++ b/mubind/tl/graph.py @@ -58,13 +58,32 @@ def compute_contributions(A, G, D, use_hadamard=True): return indices, contributions, max_singular_value -# comparing the metric scores of the original matrix with metric scores of scrambled matrices def metric_scramble_comparison(C, D, metric, scramble_type, n_scrambles=1000, verbose=True): + """Comparing metric scores of the original matrix with metric scores of scrambled matrices + + Arguments: + --------- + C: : `torch.Tensor` + Graph matrix. + D: `torch.Tensor` + Graph scaling matrix. + scramble_type: `str` + Type of scrambling: 'flat', 'row', or 'column' + n_scrambles: `int` (default: `1000`) + Number of scrambled matrices to compare + verbose: `bool` (default: `True`) + Print summary statistics of the scores of scrambled matrices and the score of the original matrix + + Returns: + ------- + scores_scrambled_df: `pandas.DataFrame` + Results of the metric scores of the scrambled matrices + """ if C.is_sparse: C = C.to_dense() if D.is_sparse: From bfcfc4ca74fd347f4ef0f9d8ef9a0cfbb578e516 Mon Sep 17 00:00:00 2001 From: Lennart Redl Date: Thu, 8 Aug 2024 17:33:00 +0200 Subject: [PATCH 4/4] Omitted normalized_alignment_score in __init__ --- mubind/tl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mubind/tl/__init__.py b/mubind/tl/__init__.py index f623fac..bb49117 100644 --- a/mubind/tl/__init__.py +++ b/mubind/tl/__init__.py @@ -38,4 +38,4 @@ ) from .probound import load_probound -from .graph import compute_contributions, metric_scramble_comparison, normalized_alignment_score \ No newline at end of file +from .graph import compute_contributions, metric_scramble_comparison \ No newline at end of file