-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspk_veri_metric1.py
55 lines (45 loc) · 2.56 KB
/
spk_veri_metric1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import numpy as np
def length_norm(mat):
"""Normalize the embeddings to unit length (L2 normalization)."""
return mat / np.sqrt(np.sum(mat * mat, axis=1, keepdims=True))
def compute_cosine_similarity(embd1, embd2):
"""Compute cosine similarity between two L2-normalized embeddings."""
return np.dot(embd1, embd2) # Dot product after L2 normalization gives cosine similarity
def compute_llr(embd1, embd2):
"""Compute Log-Likelihood Ratio (LLR) based on cosine similarity."""
score = compute_cosine_similarity(embd1, embd2)
return score # Returning cosine similarity score for now (LLR could be modeled here)
class SVevaluation(object):
def __init__(self, trial_file, utt, embd=None):
"""Initialize the system with trial file and embeddings."""
# Update embeddings and trials
self.update_embd(embd)
self.utt_idx = {u: i for i, u in enumerate(utt)} # Map utterances to indices
self.update_trial(trial_file)
def update_trial(self, trial_file):
"""Update trial list from file containing enrollment and test utterance pairs."""
# No labels now, only pairs of enroll and test utterances
self.trial_idx = [[self.utt_idx.get(line.split()[1]), self.utt_idx.get(line.split()[2])]
for line in open(trial_file)]
bad_idx = [i for i, ti in enumerate(self.trial_idx) if None in ti]
for i in sorted(bad_idx, reverse=True):
del self.trial_idx[i]
if len(bad_idx):
print('Number of bad trials: %d' % len(bad_idx))
def update_cohort(self, cohort):
"""Update cohort with L2 normalization."""
cohort = length_norm(cohort)
self.score_cohort = self.embd @ cohort.T
self.idx_cohort = self.score_cohort.argsort()[:, ::-1]
def update_embd(self, embd):
"""Update embeddings with L2 normalization."""
self.embd = length_norm(embd) if embd is not None else None
def compute_llr_for_trials(self, output_file="llr_output.txt"):
"""Compute LLR for each pair of trials and save to a file."""
with open(output_file, 'w') as f_out:
for enroll_idx, test_idx in self.trial_idx:
llr = compute_llr(self.embd[enroll_idx], self.embd[test_idx])
enroll_utt = list(self.utt_idx.keys())[list(self.utt_idx.values()).index(enroll_idx)]
test_utt = list(self.utt_idx.keys())[list(self.utt_idx.values()).index(test_idx)]
f_out.write(f"{enroll_utt} {test_utt} {llr:.4f}\n")
print(f"LLR values have been written to {output_file}")