From bd374465e52d81fa4c0e786437de7a62807df602 Mon Sep 17 00:00:00 2001 From: mschild Date: Fri, 24 Aug 2018 16:32:23 +0200 Subject: [PATCH] fix for negation dissovling heuristic --- .../heuristics/negation_dissolve_heuristic.py | 39 ++++++++++++------- src/Backend/ml_approach/classify.py | 4 +- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/Backend/heuristics/negation_dissolve_heuristic.py b/src/Backend/heuristics/negation_dissolve_heuristic.py index a2fc751e..5451300d 100644 --- a/src/Backend/heuristics/negation_dissolve_heuristic.py +++ b/src/Backend/heuristics/negation_dissolve_heuristic.py @@ -42,7 +42,7 @@ } -def move_assignment(sentences_to_move, from_object, to_object, aspect, aspects): +def move_assignment(sentences_to_move, from_object, to_object, aspect, aspects, threshold_score): points_to_move = 0 points_for_multiple = 0 @@ -51,12 +51,16 @@ def move_assignment(sentences_to_move, from_object, to_object, aspect, aspects): 'sentences from', from_object.name, 'to', to_object.name, '.') print('----') for sentence in sentences_to_move: + + points = (sentence[0]) / 10 if sentence[1].confidence < threshold_score else sentence[0] + if len(find_aspects(sentence[1].text, aspects)) > 1: - points_for_multiple = points_for_multiple + sentence[0] + points_for_multiple = points_for_multiple + points else: - points_to_move = points_to_move + sentence[0] + points_to_move = points_to_move + points - print('-' + re.sub(' +', ' ', re.sub('[^a-zA-Z0-9 ]', ' ', sentence[1].text))) + print('-' + re.sub(' +', ' ', + re.sub('[^a-zA-Z0-9 ]', ' ', sentence[1].text))) from_object.sentences = [ sentence for sentence in from_object.sentences if sentence not in sentences_to_move] @@ -79,35 +83,42 @@ def move_assignment(sentences_to_move, from_object, to_object, aspect, aspects): print('----') -def negation_dissolve_heuristic(object_a, object_b, aspect, aspects): +def negation_dissolve_heuristic(object_a, object_b, aspect, aspects, threshold_score): markers = positive_contrary_comparatives - filtered_sentences = get_matching_sentences(object_a.name, object_b.name, aspect, object_a.sentences, markers, True) + filtered_sentences = get_matching_sentences( + object_a.name, object_b.name, aspect, object_a.sentences, markers, True) if len(filtered_sentences) > 0: for sentence in filtered_sentences: - filtered_contrary = [v for k, v in markers.items() if k in sentence[1].text] - filtered_contrary = [item for sublist in filtered_contrary for item in sublist] + filtered_contrary = [ + v for k, v in markers.items() if k in sentence[1].text] + filtered_contrary = [ + item for sublist in filtered_contrary for item in sublist] if len(filtered_contrary) > 0: same_meaning_sentences = get_matching_sentences( object_b.name, object_a.name, aspect, object_b.sentences, filtered_contrary, False) if len(same_meaning_sentences) > 0: move_assignment(same_meaning_sentences, - object_b, object_a, aspect, aspects) + object_b, object_a, aspect, aspects, threshold_score) def get_matching_sentences(object_a, object_b, aspect, sentences, markers, is_positive): - locked_out_markers = [] + locked_out_markers = [] if is_positive: - locked_out_markers = [item for sublist in list(positive_contrary_comparatives.values()) for item in sublist] + locked_out_markers = [item for sublist in list( + positive_contrary_comparatives.values()) for item in sublist] else: - locked_out_markers = [marker for marker in positive_contrary_comparatives] - re_locked_out_markers = '|'.join([re.escape(x) for x in locked_out_markers]) + locked_out_markers = [ + marker for marker in positive_contrary_comparatives] + re_locked_out_markers = '|'.join( + [re.escape(x) for x in locked_out_markers]) re_markers = '|'.join([re.escape(x) for x in markers]) regex = re.compile(r'(?=.*(?:\b' + re.escape(object_a) + r'\b.*\b' + re.escape(aspect) + r'\b.*\b' + re.escape(object_b) + r'\b))(?=.*(?:\b' + re_markers + r'\b))(?!.*(?:\b' + re_locked_out_markers + r'\b))', re.IGNORECASE) - filtered_sentences = [x for x in sentences if regex.search(x[1].text) != None] + filtered_sentences = [ + x for x in sentences if regex.search(x[1].text) != None] return filtered_sentences diff --git a/src/Backend/ml_approach/classify.py b/src/Backend/ml_approach/classify.py index e8a48d82..23441b6b 100644 --- a/src/Backend/ml_approach/classify.py +++ b/src/Backend/ml_approach/classify.py @@ -108,8 +108,8 @@ def evaluate(sentences, prepared_sentences, classification_results, obj_a, obj_b if USE_HEURISTICS: for aspect in aspects: - negation_dissolve_heuristic(obj_a, obj_b, aspect.name, aspects) - negation_dissolve_heuristic(obj_b, obj_a, aspect.name, aspects) + negation_dissolve_heuristic(obj_a, obj_b, aspect.name, aspects, threshold_score) + negation_dissolve_heuristic(obj_b, obj_a, aspect.name, aspects, threshold_score) obj_a.sentences = prepare_sentence_list(obj_a.sentences) obj_b.sentences = prepare_sentence_list(obj_b.sentences)