Skip to content

Commit

Permalink
now using different thresholds for sentences and scores
Browse files Browse the repository at this point in the history
  • Loading branch information
mschild committed Aug 24, 2018
1 parent f329b8e commit f8accc8
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 16 deletions.
27 changes: 16 additions & 11 deletions src/Backend/ml_approach/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from cam_pretrained.model_util import load_model

USE_HEURISTICS = True
SENTENCE_THRESHOLD = 5


def classify_sentences(sentences, model):
Expand All @@ -26,7 +25,7 @@ def classify_sentences(sentences, model):
return df


def find_threshold(prepared_sentences, classification_results, aspects):
def count_confindences(prepared_sentences, classification_results, aspects):
perfect = 0
excellent = 0
good = 0
Expand Down Expand Up @@ -55,14 +54,19 @@ def find_threshold(prepared_sentences, classification_results, aspects):
medium += good
ok += medium

return (excellent, good, medium, ok)


def find_threshold(counted_confidences, sentence_threshold):

threshold = 0
if perfect > SENTENCE_THRESHOLD:
if counted_confidences[0] > sentence_threshold:
threshold = 0.8
elif excellent > SENTENCE_THRESHOLD:
elif counted_confidences[1] > sentence_threshold:
threshold = 0.7
elif good > SENTENCE_THRESHOLD:
elif counted_confidences[2] > sentence_threshold:
threshold = 0.6
elif medium > SENTENCE_THRESHOLD:
elif counted_confidences[3] > sentence_threshold:
threshold = 0.5

print('Uses threshold', threshold)
Expand All @@ -76,12 +80,13 @@ def evaluate(sentences, prepared_sentences, classification_results, obj_a, obj_b

print(max_sentscore)

threshold = find_threshold(
counts = count_confindences(
prepared_sentences, classification_results, aspects)
threshold_sentences = find_threshold(counts, 5)
threshold_score = find_threshold(counts, 3)

for index, row in prepared_sentences.iterrows():
label = classification_results['max'][index]
# if label == 'NONE' or classification_results[label][index] < threshold:
if label == 'NONE':
continue

Expand All @@ -97,10 +102,10 @@ def evaluate(sentences, prepared_sentences, classification_results, obj_a, obj_b
contained_aspects = find_aspects(sentence.text, aspects)
if (label == 'BETTER' and row['object_a'] == obj_a.name) or (label == 'WORSE' and row['object_b'] == obj_a.name):
add_points(contained_aspects, obj_a, sentence,
max_sentscore, classification_confidence, score_function, threshold)
max_sentscore, classification_confidence, score_function, threshold_sentences, threshold_score)
else:
add_points(contained_aspects, obj_b, sentence,
max_sentscore, classification_confidence, score_function, threshold)
max_sentscore, classification_confidence, score_function, threshold_sentences, threshold_score)

if USE_HEURISTICS:
for aspect in aspects:
Expand All @@ -117,7 +122,7 @@ def score_function(sentence_score, max_sentscore, weight, confidence, threshold)
if weight < 1:
weight = 1
# return (sentence_score + confidence * max_sentscore) * weight

score = 0
if confidence > threshold:
score += max_sentscore
Expand Down
12 changes: 7 additions & 5 deletions src/Backend/utils/answer_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def sentences_to_JSON(sentences):
return [sentence.__dict__ for sentence in sentences]


def add_points(contained_aspects, winner, sentence, max_score, classification_score, score_function, threshold=0):
def add_points(contained_aspects, winner, sentence, max_score, classification_score, score_function, threshold_sentences=0, threshold_score=0):
'''
Adds the points of the won sentence to the points of the winner.
Expand Down Expand Up @@ -71,19 +71,21 @@ def add_points(contained_aspects, winner, sentence, max_score, classification_sc
if len(contained_aspects) == 1:
aspect = contained_aspects[0]
points = score_function(
sentence.score, max_score, aspect.weight, classification_score, threshold)
winner.add_points(aspect.name, points * document_occurences)
sentence.score, max_score, aspect.weight, classification_score, threshold_sentences)
if classification_score > threshold_score:
winner.add_points(aspect.name, points * document_occurences)
winner.add_sentence([points, sentence])
else:
for aspect in contained_aspects:
points += score_function(sentence.score, max_score,
aspect.weight, classification_score, threshold)
aspect.weight, classification_score, threshold_sentences)
winner.add_points('multiple', points * document_occurences)
winner.add_sentence([points, sentence])
else:
# multiple markers, multiple points
points = score_function(
sentence.score, max_score, 0, classification_score, threshold)
sentence.score, max_score, 0, classification_score, threshold_sentences)
# if classification_score > threshold_score:
winner.add_points('none', points * document_occurences)
winner.add_sentence([points, sentence])

Expand Down

0 comments on commit f8accc8

Please sign in to comment.