dismissed threshold for sentences, but implemented it for sentence so…

…rting
uhh-lt · Aug 24, 2018 · f329b8e · f329b8e
1 parent 8165ed9
commit f329b8e
Show file tree

Hide file tree

Showing 3 changed files with 70 additions and 14 deletions.
diff --git a/src/Backend/marker_approach/object_comparer.py b/src/Backend/marker_approach/object_comparer.py
@@ -43,7 +43,7 @@ def find_winner(sentences, obj_a, obj_b, aspects):
 
 
 
-def score_function(sentence_score, max_sentscore, weight, marker_count):
+def score_function(sentence_score, max_sentscore, weight, marker_count, threshold):
     return (sentence_score / max_sentscore) * (weight + marker_count)
 
 

diff --git a/src/Backend/ml_approach/classify.py b/src/Backend/ml_approach/classify.py
@@ -11,6 +11,8 @@
 from cam_pretrained.model_util import load_model
 
 USE_HEURISTICS = True
+SENTENCE_THRESHOLD = 5
+
 
 def classify_sentences(sentences, model):
     if model == 'infersent':
@@ -24,14 +26,63 @@ def classify_sentences(sentences, model):
     return df
 
 
+def find_threshold(prepared_sentences, classification_results, aspects):
+    perfect = 0
+    excellent = 0
+    good = 0
+    medium = 0
+    ok = 0
+
+    for index, row in prepared_sentences.iterrows():
+        if len(find_aspects(row['sentence'], aspects)) == 0:
+            continue
+        label = classification_results['max'][index]
+        if label != 'NONE':
+            confidence = classification_results[label][index]
+            if confidence > 0.8:
+                perfect += 1
+            elif confidence > 0.7:
+                excellent += 1
+            elif confidence > 0.6:
+                good += 1
+            elif confidence > 0.5:
+                medium += 1
+            else:
+                ok += 1
+
+    excellent += perfect
+    good += excellent
+    medium += good
+    ok += medium
+
+    threshold = 0
+    if perfect > SENTENCE_THRESHOLD:
+        threshold = 0.8
+    elif excellent > SENTENCE_THRESHOLD:
+        threshold = 0.7
+    elif good > SENTENCE_THRESHOLD:
+        threshold = 0.6
+    elif medium > SENTENCE_THRESHOLD:
+        threshold = 0.5
+
+    print('Uses threshold', threshold)
+    return threshold
+
+
 def evaluate(sentences, prepared_sentences, classification_results, obj_a, obj_b, aspects):
 
     if len(sentences) > 0:
         max_sentscore = max(sentence.score for sentence in sentences)
 
+    print(max_sentscore)
+
+    threshold = find_threshold(
+        prepared_sentences, classification_results, aspects)
+
     for index, row in prepared_sentences.iterrows():
         label = classification_results['max'][index]
-        if label == 'NONE' or classification_results[label][index] < 0.6:
+        # if label == 'NONE' or classification_results[label][index] < threshold:
+        if label == 'NONE':
             continue
 
         classification_confidence = classification_results[label][index]
@@ -42,35 +93,40 @@ def evaluate(sentences, prepared_sentences, classification_results, obj_a, obj_b
                 sentence = s
                 break
         sentences.remove(sentence)
-        
+
         contained_aspects = find_aspects(sentence.text, aspects)
         if (label == 'BETTER' and row['object_a'] == obj_a.name) or (label == 'WORSE' and row['object_b'] == obj_a.name):
             add_points(contained_aspects, obj_a, sentence,
-                       max_sentscore, classification_confidence, score_function)
+                       max_sentscore, classification_confidence, score_function, threshold)
         else:
             add_points(contained_aspects, obj_b, sentence,
-                       max_sentscore, classification_confidence, score_function)
+                       max_sentscore, classification_confidence, score_function, threshold)
 
     if USE_HEURISTICS:
         for aspect in aspects:
             negation_dissolve_heuristic(obj_a, obj_b, aspect.name, aspects)
             negation_dissolve_heuristic(obj_b, obj_a, aspect.name, aspects)
-
-
 
     obj_a.sentences = prepare_sentence_list(obj_a.sentences)
     obj_b.sentences = prepare_sentence_list(obj_b.sentences)
 
     return build_final_dict(obj_a, obj_b, sentences)
 
 
-def score_function(sentence_score, max_sentscore, weight, confidence):
+def score_function(sentence_score, max_sentscore, weight, confidence, threshold):
     if weight < 1:
         weight = 1
     # return (sentence_score + confidence * max_sentscore) * weight
-    return sentence_score * weight
+
+    score = 0
+    if confidence > threshold:
+        score += max_sentscore
+
+    return score + sentence_score + max_sentscore * weight
+    # return sentence_score * weight
     # return confidence * weight
 
+
 def set_use_heuristics(use_heuristics):
     global USE_HEURISTICS
-    USE_HEURISTICS=use_heuristics
+    USE_HEURISTICS = use_heuristics
diff --git a/src/Backend/utils/answer_preparation.py b/src/Backend/utils/answer_preparation.py
@@ -39,7 +39,7 @@ def sentences_to_JSON(sentences):
     return [sentence.__dict__ for sentence in sentences]
 
 
-def add_points(contained_aspects, winner, sentence, max_score, classification_score, score_function):
+def add_points(contained_aspects, winner, sentence, max_score, classification_score, score_function, threshold=0):
     '''
     Adds the points of the won sentence to the points of the winner.
 
@@ -71,19 +71,19 @@ def add_points(contained_aspects, winner, sentence, max_score, classification_sc
         if len(contained_aspects) == 1:
             aspect = contained_aspects[0]
             points = score_function(
-                sentence.score, max_score, aspect.weight, classification_score)
+                sentence.score, max_score, aspect.weight, classification_score, threshold)
             winner.add_points(aspect.name, points * document_occurences)
             winner.add_sentence([points, sentence])
         else:
             for aspect in contained_aspects:
                 points += score_function(sentence.score, max_score,
-                                         aspect.weight, classification_score)
+                                         aspect.weight, classification_score, threshold)
             winner.add_points('multiple', points * document_occurences)
             winner.add_sentence([points, sentence])
     else:
         # multiple markers, multiple points
         points = score_function(
-            sentence.score, max_score, 0, classification_score)
+            sentence.score, max_score, 0, classification_score, threshold)
         winner.add_points('none', points * document_occurences)
         winner.add_sentence([points, sentence])
Original file line number	Diff line number	Diff line change
Expand Up		@@ -43,7 +43,7 @@ def find_winner(sentences, obj_a, obj_b, aspects):



		def score_function(sentence_score, max_sentscore, weight, marker_count):
		def score_function(sentence_score, max_sentscore, weight, marker_count, threshold):
		return (sentence_score / max_sentscore) * (weight + marker_count)


Expand Down