From e11deae7cd04b0bf5383b5619c4e8c9862284325 Mon Sep 17 00:00:00 2001 From: mschild Date: Sat, 25 Aug 2018 14:39:25 +0200 Subject: [PATCH] dissmissed the special character removal (takes too long) --- src/Backend/utils/es_requester.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Backend/utils/es_requester.py b/src/Backend/utils/es_requester.py index 9d006852..8ce25d10 100644 --- a/src/Backend/utils/es_requester.py +++ b/src/Backend/utils/es_requester.py @@ -87,7 +87,8 @@ def extract_sentences(es_json, aggregate_duplicates=True): def prepare_sentence_comparison(sentence): - return re.sub('[^A-Za-z0-9]+', '', sentence).lower() + return sentence.lower() + # return re.sub('[^A-Za-z0-9]+', '', sentence).lower() # return ''.join(e for e in sentence if e.isalnum()).lower()