Skip to content

Commit

Permalink
dismissed threshold for sentences, but implemented it for sentence so…
Browse files Browse the repository at this point in the history
…rting
  • Loading branch information
mschild committed Aug 24, 2018
1 parent 8165ed9 commit f329b8e
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 14 deletions.
2 changes: 1 addition & 1 deletion src/Backend/marker_approach/object_comparer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def find_winner(sentences, obj_a, obj_b, aspects):



def score_function(sentence_score, max_sentscore, weight, marker_count):
def score_function(sentence_score, max_sentscore, weight, marker_count, threshold):
return (sentence_score / max_sentscore) * (weight + marker_count)


Expand Down
74 changes: 65 additions & 9 deletions src/Backend/ml_approach/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from cam_pretrained.model_util import load_model

USE_HEURISTICS = True
SENTENCE_THRESHOLD = 5


def classify_sentences(sentences, model):
if model == 'infersent':
Expand All @@ -24,14 +26,63 @@ def classify_sentences(sentences, model):
return df


def find_threshold(prepared_sentences, classification_results, aspects):
perfect = 0
excellent = 0
good = 0
medium = 0
ok = 0

for index, row in prepared_sentences.iterrows():
if len(find_aspects(row['sentence'], aspects)) == 0:
continue
label = classification_results['max'][index]
if label != 'NONE':
confidence = classification_results[label][index]
if confidence > 0.8:
perfect += 1
elif confidence > 0.7:
excellent += 1
elif confidence > 0.6:
good += 1
elif confidence > 0.5:
medium += 1
else:
ok += 1

excellent += perfect
good += excellent
medium += good
ok += medium

threshold = 0
if perfect > SENTENCE_THRESHOLD:
threshold = 0.8
elif excellent > SENTENCE_THRESHOLD:
threshold = 0.7
elif good > SENTENCE_THRESHOLD:
threshold = 0.6
elif medium > SENTENCE_THRESHOLD:
threshold = 0.5

print('Uses threshold', threshold)
return threshold


def evaluate(sentences, prepared_sentences, classification_results, obj_a, obj_b, aspects):

if len(sentences) > 0:
max_sentscore = max(sentence.score for sentence in sentences)

print(max_sentscore)

threshold = find_threshold(
prepared_sentences, classification_results, aspects)

for index, row in prepared_sentences.iterrows():
label = classification_results['max'][index]
if label == 'NONE' or classification_results[label][index] < 0.6:
# if label == 'NONE' or classification_results[label][index] < threshold:
if label == 'NONE':
continue

classification_confidence = classification_results[label][index]
Expand All @@ -42,35 +93,40 @@ def evaluate(sentences, prepared_sentences, classification_results, obj_a, obj_b
sentence = s
break
sentences.remove(sentence)

contained_aspects = find_aspects(sentence.text, aspects)
if (label == 'BETTER' and row['object_a'] == obj_a.name) or (label == 'WORSE' and row['object_b'] == obj_a.name):
add_points(contained_aspects, obj_a, sentence,
max_sentscore, classification_confidence, score_function)
max_sentscore, classification_confidence, score_function, threshold)
else:
add_points(contained_aspects, obj_b, sentence,
max_sentscore, classification_confidence, score_function)
max_sentscore, classification_confidence, score_function, threshold)

if USE_HEURISTICS:
for aspect in aspects:
negation_dissolve_heuristic(obj_a, obj_b, aspect.name, aspects)
negation_dissolve_heuristic(obj_b, obj_a, aspect.name, aspects)



obj_a.sentences = prepare_sentence_list(obj_a.sentences)
obj_b.sentences = prepare_sentence_list(obj_b.sentences)

return build_final_dict(obj_a, obj_b, sentences)


def score_function(sentence_score, max_sentscore, weight, confidence):
def score_function(sentence_score, max_sentscore, weight, confidence, threshold):
if weight < 1:
weight = 1
# return (sentence_score + confidence * max_sentscore) * weight
return sentence_score * weight

score = 0
if confidence > threshold:
score += max_sentscore

return score + sentence_score + max_sentscore * weight
# return sentence_score * weight
# return confidence * weight


def set_use_heuristics(use_heuristics):
global USE_HEURISTICS
USE_HEURISTICS=use_heuristics
USE_HEURISTICS = use_heuristics
8 changes: 4 additions & 4 deletions src/Backend/utils/answer_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def sentences_to_JSON(sentences):
return [sentence.__dict__ for sentence in sentences]


def add_points(contained_aspects, winner, sentence, max_score, classification_score, score_function):
def add_points(contained_aspects, winner, sentence, max_score, classification_score, score_function, threshold=0):
'''
Adds the points of the won sentence to the points of the winner.
Expand Down Expand Up @@ -71,19 +71,19 @@ def add_points(contained_aspects, winner, sentence, max_score, classification_sc
if len(contained_aspects) == 1:
aspect = contained_aspects[0]
points = score_function(
sentence.score, max_score, aspect.weight, classification_score)
sentence.score, max_score, aspect.weight, classification_score, threshold)
winner.add_points(aspect.name, points * document_occurences)
winner.add_sentence([points, sentence])
else:
for aspect in contained_aspects:
points += score_function(sentence.score, max_score,
aspect.weight, classification_score)
aspect.weight, classification_score, threshold)
winner.add_points('multiple', points * document_occurences)
winner.add_sentence([points, sentence])
else:
# multiple markers, multiple points
points = score_function(
sentence.score, max_score, 0, classification_score)
sentence.score, max_score, 0, classification_score, threshold)
winner.add_points('none', points * document_occurences)
winner.add_sentence([points, sentence])

Expand Down

0 comments on commit f329b8e

Please sign in to comment.