now using different thresholds for sentences and scores

uhh-lt · Aug 24, 2018 · f8accc8 · f8accc8
1 parent f329b8e
commit f8accc8
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 16 deletions.
diff --git a/src/Backend/ml_approach/classify.py b/src/Backend/ml_approach/classify.py
@@ -11,7 +11,6 @@
 from cam_pretrained.model_util import load_model
 
 USE_HEURISTICS = True
-SENTENCE_THRESHOLD = 5
 
 
 def classify_sentences(sentences, model):
@@ -26,7 +25,7 @@ def classify_sentences(sentences, model):
     return df
 
 
-def find_threshold(prepared_sentences, classification_results, aspects):
+def count_confindences(prepared_sentences, classification_results, aspects):
     perfect = 0
     excellent = 0
     good = 0
@@ -55,14 +54,19 @@ def find_threshold(prepared_sentences, classification_results, aspects):
     medium += good
     ok += medium
 
+    return (excellent, good, medium, ok)
+
+
+def find_threshold(counted_confidences, sentence_threshold):
+
     threshold = 0
-    if perfect > SENTENCE_THRESHOLD:
+    if counted_confidences[0] > sentence_threshold:
         threshold = 0.8
-    elif excellent > SENTENCE_THRESHOLD:
+    elif counted_confidences[1] > sentence_threshold:
         threshold = 0.7
-    elif good > SENTENCE_THRESHOLD:
+    elif counted_confidences[2] > sentence_threshold:
         threshold = 0.6
-    elif medium > SENTENCE_THRESHOLD:
+    elif counted_confidences[3] > sentence_threshold:
         threshold = 0.5
 
     print('Uses threshold', threshold)
@@ -76,12 +80,13 @@ def evaluate(sentences, prepared_sentences, classification_results, obj_a, obj_b
 
     print(max_sentscore)
 
-    threshold = find_threshold(
+    counts = count_confindences(
         prepared_sentences, classification_results, aspects)
+    threshold_sentences = find_threshold(counts, 5)
+    threshold_score = find_threshold(counts, 3)
 
     for index, row in prepared_sentences.iterrows():
         label = classification_results['max'][index]
-        # if label == 'NONE' or classification_results[label][index] < threshold:
         if label == 'NONE':
             continue
 
@@ -97,10 +102,10 @@ def evaluate(sentences, prepared_sentences, classification_results, obj_a, obj_b
         contained_aspects = find_aspects(sentence.text, aspects)
         if (label == 'BETTER' and row['object_a'] == obj_a.name) or (label == 'WORSE' and row['object_b'] == obj_a.name):
             add_points(contained_aspects, obj_a, sentence,
-                       max_sentscore, classification_confidence, score_function, threshold)
+                       max_sentscore, classification_confidence, score_function, threshold_sentences, threshold_score)
         else:
             add_points(contained_aspects, obj_b, sentence,
-                       max_sentscore, classification_confidence, score_function, threshold)
+                       max_sentscore, classification_confidence, score_function, threshold_sentences, threshold_score)
 
     if USE_HEURISTICS:
         for aspect in aspects:
@@ -117,7 +122,7 @@ def score_function(sentence_score, max_sentscore, weight, confidence, threshold)
     if weight < 1:
         weight = 1
     # return (sentence_score + confidence * max_sentscore) * weight
-    
+
     score = 0
     if confidence > threshold:
         score += max_sentscore

diff --git a/src/Backend/utils/answer_preparation.py b/src/Backend/utils/answer_preparation.py
@@ -39,7 +39,7 @@ def sentences_to_JSON(sentences):
     return [sentence.__dict__ for sentence in sentences]
 
 
-def add_points(contained_aspects, winner, sentence, max_score, classification_score, score_function, threshold=0):
+def add_points(contained_aspects, winner, sentence, max_score, classification_score, score_function, threshold_sentences=0, threshold_score=0):
     '''
     Adds the points of the won sentence to the points of the winner.
 
@@ -71,19 +71,21 @@ def add_points(contained_aspects, winner, sentence, max_score, classification_sc
         if len(contained_aspects) == 1:
             aspect = contained_aspects[0]
             points = score_function(
-                sentence.score, max_score, aspect.weight, classification_score, threshold)
-            winner.add_points(aspect.name, points * document_occurences)
+                sentence.score, max_score, aspect.weight, classification_score, threshold_sentences)
+            if classification_score > threshold_score:
+                winner.add_points(aspect.name, points * document_occurences)
             winner.add_sentence([points, sentence])
         else:
             for aspect in contained_aspects:
                 points += score_function(sentence.score, max_score,
-                                         aspect.weight, classification_score, threshold)
+                                         aspect.weight, classification_score, threshold_sentences)
             winner.add_points('multiple', points * document_occurences)
             winner.add_sentence([points, sentence])
     else:
         # multiple markers, multiple points
         points = score_function(
-            sentence.score, max_score, 0, classification_score, threshold)
+            sentence.score, max_score, 0, classification_score, threshold_sentences)
+        # if classification_score > threshold_score:
         winner.add_points('none', points * document_occurences)
         winner.add_sentence([points, sentence])