From ddb3845b083d5cdd456e3c4e1ef170f080a70444 Mon Sep 17 00:00:00 2001
From: "g.casari" <giovanni.casari@ammagamma.com>
Date: Wed, 27 Sep 2023 11:39:33 +0200
Subject: [PATCH] Solved issue #40

---
 src/nervaluate/evaluate.py | 44 ++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/src/nervaluate/evaluate.py b/src/nervaluate/evaluate.py
index 290cc05..0468f7a 100644
--- a/src/nervaluate/evaluate.py
+++ b/src/nervaluate/evaluate.py
@@ -144,6 +144,10 @@ def compute_metrics(  # type: ignore
     true_named_entities = [clean_entities(ent) for ent in true_named_entities if ent["label"] in tags]
     pred_named_entities = [clean_entities(ent) for ent in pred_named_entities if ent["label"] in tags]
 
+    # Sort the lists to improve the speed of the overlap comparison
+    true_named_entities.sort(key=lambda x: x["start"])
+    pred_named_entities.sort(key=lambda x: x["end"])
+
     # go through each predicted named-entity
     for pred in pred_named_entities:
         found_overlap = False
@@ -169,6 +173,10 @@ def compute_metrics(  # type: ignore
         else:
             # check for overlaps with any of the true entities
             for true in true_named_entities:
+                # Only enter this block if an overlap is possible
+                if pred["end"] < true["start"]:
+                    break
+
                 # overlapping needs to take into account last token as well
                 pred_range = range(pred["start"], pred["end"] + 1)
                 true_range = range(true["start"], true["end"] + 1)
@@ -214,29 +222,27 @@ def compute_metrics(  # type: ignore
 
                         found_overlap = True
 
-                        break
+                    else:
+                        # Scenario VI: Entities overlap, but the entity type is
+                        # different.
 
-                    # Scenario VI: Entities overlap, but the entity type is
-                    # different.
-
-                    # overall results
-                    evaluation["strict"]["incorrect"] += 1
-                    evaluation["ent_type"]["incorrect"] += 1
-                    evaluation["partial"]["partial"] += 1
-                    evaluation["exact"]["incorrect"] += 1
+                        # overall results
+                        evaluation["strict"]["incorrect"] += 1
+                        evaluation["ent_type"]["incorrect"] += 1
+                        evaluation["partial"]["partial"] += 1
+                        evaluation["exact"]["incorrect"] += 1
 
-                    # aggregated by entity type results
-                    # Results against the true entity
+                        # aggregated by entity type results
+                        # Results against the true entity
 
-                    evaluation_agg_entities_type[true["label"]]["strict"]["incorrect"] += 1
-                    evaluation_agg_entities_type[true["label"]]["partial"]["partial"] += 1
-                    evaluation_agg_entities_type[true["label"]]["ent_type"]["incorrect"] += 1
-                    evaluation_agg_entities_type[true["label"]]["exact"]["incorrect"] += 1
+                        evaluation_agg_entities_type[true["label"]]["strict"]["incorrect"] += 1
+                        evaluation_agg_entities_type[true["label"]]["partial"]["partial"] += 1
+                        evaluation_agg_entities_type[true["label"]]["ent_type"]["incorrect"] += 1
+                        evaluation_agg_entities_type[true["label"]]["exact"]["incorrect"] += 1
 
-                    # Results against the predicted entity
-                    # evaluation_agg_entities_type[pred['label']]['strict']['spurious'] += 1
-                    found_overlap = True
-                    break
+                        # Results against the predicted entity
+                        # evaluation_agg_entities_type[pred['label']]['strict']['spurious'] += 1
+                        found_overlap = True
 
             # Scenario II: Entities are spurious (i.e., over-generated).
             if not found_overlap: