From 2f5c81156fcbd4048e98ac693d0dd49d27a55a22 Mon Sep 17 00:00:00 2001 From: John Hensley Date: Sat, 27 Jan 2018 17:43:51 -0500 Subject: [PATCH] Remove hyphen, quote from reserved characters Remove the hyphen and double quote from the list of reserved characters in Elastic searches, allowing more effective searches for hyphenated terms and quoted phrases. This introduces the possibility of Elastic rejecting malformed quoted queries, so additional exception handling is provided for that case. Actually presenting the more specific errors to users requires changes in edx-platform; installing this revision of edx-search without those changes will result in malformed queries producing the same "There was an error, try searching again." message any search error returns. --- .gitignore | 1 + search/api.py | 11 +++ search/elastic.py | 28 ++++--- search/tests/mock_search_engine.py | 28 +++---- search/tests/test_course_discovery.py | 60 ++++++++++++++- search/tests/test_course_discovery_views.py | 5 +- search/tests/test_views.py | 82 ++++++++++++++++++++- search/tests/utils.py | 10 ++- search/views.py | 12 ++- setup.py | 2 +- test_requirements.txt | 4 +- 11 files changed, 203 insertions(+), 40 deletions(-) diff --git a/.gitignore b/.gitignore index 06bad32c..d387259f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ .coverage .idea/ +.tox \ No newline at end of file diff --git a/search/api.py b/search/api.py index 3d085591..1065bce8 100644 --- a/search/api.py +++ b/search/api.py @@ -27,6 +27,17 @@ class NoSearchEngineError(Exception): pass +class QueryParseError(Exception): + """QueryParseError will be thrown if the query is malformed. + + If a query has mismatched quotes (e.g. '"some phrase', return a + more specific exception so the view can provide a more helpful + error message to the user. + + """ + pass + + def perform_search( search_term, user=None, diff --git a/search/elastic.py b/search/elastic.py index 8aabcd0c..63d2921a 100644 --- a/search/elastic.py +++ b/search/elastic.py @@ -1,4 +1,4 @@ -""" Elatic Search implementation for courseware search index """ +""" Elastic Search implementation for courseware search index """ import copy import logging @@ -7,6 +7,7 @@ from elasticsearch import Elasticsearch, exceptions from elasticsearch.helpers import bulk, BulkIndexError +from search.api import QueryParseError from search.search_engine_base import SearchEngine from search.utils import ValueRange, _is_iterable @@ -17,7 +18,7 @@ # We _may_ want to use these for their special uses for certain queries, # but for analysed fields these kinds of characters are removed anyway, so # we can safely remove them from analysed matches -RESERVED_CHARACTERS = "+-=>=2.3.1,<3.0.0 - # edX libraries -e git+https://github.com/edx/event-tracking.git@0.1.0#egg=event-tracking