diff --git a/mubind/tl/__init__.py b/mubind/tl/__init__.py index f623fac..bb49117 100644 --- a/mubind/tl/__init__.py +++ b/mubind/tl/__init__.py @@ -38,4 +38,4 @@ ) from .probound import load_probound -from .graph import compute_contributions, metric_scramble_comparison, normalized_alignment_score \ No newline at end of file +from .graph import compute_contributions, metric_scramble_comparison \ No newline at end of file diff --git a/mubind/tl/graph.py b/mubind/tl/graph.py index a1e7b41..f6e1921 100644 --- a/mubind/tl/graph.py +++ b/mubind/tl/graph.py @@ -2,38 +2,88 @@ import pandas as pd import numpy as np -''' -We want to understand, how each filter contributes to the overall result together with C @ D := H. +def compute_contributions(A, G, D, use_hadamard=True): + """Computes contribution scores for activities linked to a filter. -Since the i-th row of (A @ H), now denoted as (A @ H)_{i,:}, is nothing but (H.T @ A.T_{:,i}).T, (X_{:,i} is the i-th column of X) we want to compute the matrix vector product between H.T and the i-th column of A.T and find out how much it scales. -We'll later normalize this with the maximum singular value of H. -''' + Arguments: + --------- + A: : `torch.Tensor` + Activities matrix. + G: `torch.Tensor` + Graph matrix. + D: `torch.Tensor` + Graph scaling matrix. + use_hadamard: `bool` (default: `True`) + Use hadamard product instead of matrix multiplication. -def compute_contributions(A, G, D, use_hadamard=True): - H = G * D if use_hadamard else G @ D + Returns: + ------- + indices: `torch.Tensor` + Indices of the contributions sorted by their absolute values descendingly + contributions: `torch.Tensor` + Contribution scores for each column of matrix A + max_singular_value: `float` + Maximum singular value of the matrix H.T with H = G * D or H = G @ D """ - efficient implementation: + + # check if input matrices are torch tensors + if not isinstance(A, torch.Tensor) or not isinstance(G, torch.Tensor) or not isinstance(D, torch.Tensor): + raise TypeError("A, G, and D must be torch tensors") + # check if input matrices are not empty + if A.numel() == 0 or G.numel() == 0 or D.numel() == 0: + raise ValueError("A, G, and D must not be empty") + # check if dimensions are correct + if A.shape[1] != G.shape[0]: + raise ValueError("Dimension mismatch: number of columns of A must match the number of rows of G") + if not use_hadamard and G.shape[1] != D.shape[0]: + raise ValueError("Dimension mismatch: number of columns of G must match the number of rows of D") + if use_hadamard and G.shape != D.shape: + raise ValueError("Dimension mismatch: G and D must have the same shape when use_hadamard is True") + + H = G * D if use_hadamard else G @ D contributions = torch.norm(A @ H, dim=1) / torch.norm(A, dim=1) - """ - # this implementation for contributions is not efficient, but easier to interpret + + ''' + this implementation for contributions is not efficient, but easier to interpret contributions = torch.zeros(A.T.shape[1]) # number of columns of A.T i = 0 for column in torch.unbind(A.T, dim=1): contributions[i] = (torch.norm((H.T @ column)) / torch.norm(column)).item() i += 1 - max_singular_value = np.linalg.svd(H.to_dense().detach().numpy(), compute_uv=False)[0]# torch.max(torch.abs(torch.linalg.eigvals(H.to_dense()))).item() + ''' + + max_singular_value = np.linalg.svd(H.T.to_dense().detach().numpy(), compute_uv=False)[0] # sort the contributions by their absolute values descendingly _, indices = torch.sort(contributions, descending=True) return indices, contributions, max_singular_value -# evaluate_metric function, that compares the metric of the original matrix with the metric of the scrambled matrices def metric_scramble_comparison(C, D, metric, scramble_type, n_scrambles=1000, verbose=True): + """Comparing metric scores of the original matrix with metric scores of scrambled matrices + + Arguments: + --------- + C: : `torch.Tensor` + Graph matrix. + D: `torch.Tensor` + Graph scaling matrix. + scramble_type: `str` + Type of scrambling: 'flat', 'row', or 'column' + n_scrambles: `int` (default: `1000`) + Number of scrambled matrices to compare + verbose: `bool` (default: `True`) + Print summary statistics of the scores of scrambled matrices and the score of the original matrix + + Returns: + ------- + scores_scrambled_df: `pandas.DataFrame` + Results of the metric scores of the scrambled matrices + """ if C.is_sparse: C = C.to_dense() if D.is_sparse: @@ -55,15 +105,4 @@ def metric_scramble_comparison(C, print(f"Summary statistics of the scores of scrambled matrices: \n{scores_scrambled_df.describe()} \n \n") print(f"This is the score of the original matrix: {score_D}") - return scores_scrambled_df - - -def normalized_alignment_score(C, D): - # Compute the element-wise product and then sum all elements - numerator = torch.sum(C * D) - # Compute the Frobenius norms of C and D - norm_C = torch.norm(C, p='fro') - norm_D = torch.norm(D, p='fro') - # Compute the normalized alignment score - score = numerator / (norm_C * norm_D) - return score.item() \ No newline at end of file + return scores_scrambled_df \ No newline at end of file diff --git a/tests/test_graph_contribution.py b/tests/test_graph_contribution.py new file mode 100644 index 0000000..312aaae --- /dev/null +++ b/tests/test_graph_contribution.py @@ -0,0 +1,73 @@ +import numpy as np +import pandas as pd +import torch +from mubind.tl.graph import compute_contributions +import pytest + +def test_compute_contributions_hadamard(): + A = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) + G = torch.tensor([[1.0, 0.5], [0.5, 1.0]]) + D = torch.tensor([[0.5, 1.0], [1.0, 0.5]]) + + indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=True) + + assert indices.shape == contributions.shape + assert len(contributions) == A.shape[0] + assert max_singular_value >= 0 + +def test_compute_contributions_no_hadamard(): + A = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) + G = torch.tensor([[1.0, 0.5], [0.5, 1.0]]) + D = torch.tensor([[0.5, 1.0], [1.0, 0.5]]) + + indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=False) + + assert indices.shape == contributions.shape + assert len(contributions) == A.shape[0] + assert max_singular_value >= 0 + +def test_compute_contributions_empty(): + A = torch.tensor([[]]) + G = torch.tensor([[]]) + D = torch.tensor([[]]) + + with pytest.raises(ValueError): + compute_contributions(A, G, D) + +def test_compute_contributions_single_value(): + A = torch.tensor([[1.0]]) + G = torch.tensor([[2.0]]) + D = torch.tensor([[3.0]]) + + indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=True) + + assert indices.shape == contributions.shape + assert len(contributions) == A.shape[0] + assert max_singular_value >= 0 + +def test_compute_contributions_large_matrix(): + A = torch.rand(100, 100) + G = torch.rand(100, 100) + D = torch.rand(100, 100) + + indices, contributions, max_singular_value = compute_contributions(A, G, D, use_hadamard=False) + + assert indices.shape == contributions.shape + assert len(contributions) == A.shape[0] + assert max_singular_value >= 0 + +def test_compute_contributions_different_dimensions_hadamard(): + A = torch.rand(2, 3) + G = torch.rand(3, 2) + D = torch.rand(2, 3) + + with pytest.raises(ValueError): + compute_contributions(A, G, D, use_hadamard=True) + +def test_compute_contributions_different_dimensions_no_hadamard(): + A = torch.rand(2, 4) + G = torch.rand(3, 3) + D = torch.rand(3, 3) + + with pytest.raises(ValueError): + compute_contributions(A, G, D, use_hadamard=False) \ No newline at end of file