Skip to content

Commit

Permalink
handle empty strings more consistently
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbachmann committed Apr 17, 2023
1 parent fd4ba10 commit c8be909
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 3 deletions.
2 changes: 1 addition & 1 deletion extern/rapidfuzz-cpp
3 changes: 3 additions & 0 deletions src/rapidfuzz/distance/Jaro_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ def similarity(
s1 = processor(s1)
s2 = processor(s2)

if not s1 and not s2:
return 1.0

if score_cutoff is None:
score_cutoff = 0

Expand Down
2 changes: 1 addition & 1 deletion tests/distance/test_Jaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_hash_special_case():


def test_edge_case_lengths():
assert pytest.approx(Jaro.similarity("", "")) == 0
assert pytest.approx(Jaro.similarity("", "")) == 1
assert pytest.approx(Jaro.similarity("0", "0")) == 1
assert pytest.approx(Jaro.similarity("00", "00")) == 1
assert pytest.approx(Jaro.similarity("0", "00")) == 0.833333
Expand Down
2 changes: 1 addition & 1 deletion tests/distance/test_JaroWinkler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_hash_special_case():


def test_edge_case_lengths():
assert pytest.approx(JaroWinkler.similarity("", "")) == 0
assert pytest.approx(JaroWinkler.similarity("", "")) == 1.0
assert pytest.approx(JaroWinkler.similarity("0", "0")) == 1
assert pytest.approx(JaroWinkler.similarity("00", "00")) == 1
assert pytest.approx(JaroWinkler.similarity("0", "00")) == 0.85
Expand Down
9 changes: 9 additions & 0 deletions tests/distance/test_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ def test_nan(scorer):
assert scorer.normalized_similarity("test", float("nan")) == 0.0


@pytest.mark.parametrize("scorer", all_scorer_modules)
def test_empty_strings(scorer):
"""
Test behavior when comparing two empty strings
"""
assert scorer.normalized_distance("", "") == 0.0
assert scorer.normalized_similarity("", "") == 1.0


@pytest.mark.parametrize("scorer", all_scorer_modules)
def test_similar_array(scorer):
"""
Expand Down
3 changes: 3 additions & 0 deletions tests/test_hypothesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ def normalize_distance(dist, s1, s2, weights=(1, 1, 1)):


def jaro_similarity(pattern, text):
if not pattern and not text:
return 1.0

P_flag = [0] * (len(pattern) + 1)
T_flag = [0] * (len(text) + 1)

Expand Down

0 comments on commit c8be909

Please sign in to comment.