Skip to content

Commit

Permalink
Merge pull request #49 from open-craft/ciuin/reduce-elastic-reserved-…
Browse files Browse the repository at this point in the history
…characters

Reduce set of reserved characters in Elastic searches
  • Loading branch information
awaisdar001 authored Jun 28, 2018
2 parents fa6e0bb + 2f5c811 commit efe2243
Show file tree
Hide file tree
Showing 11 changed files with 203 additions and 40 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@

.coverage
.idea/
.tox
11 changes: 11 additions & 0 deletions search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ class NoSearchEngineError(Exception):
pass


class QueryParseError(Exception):
"""QueryParseError will be thrown if the query is malformed.
If a query has mismatched quotes (e.g. '"some phrase', return a
more specific exception so the view can provide a more helpful
error message to the user.
"""
pass


def perform_search(
search_term,
user=None,
Expand Down
28 changes: 17 additions & 11 deletions search/elastic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Elatic Search implementation for courseware search index """
""" Elastic Search implementation for courseware search index """
import copy
import logging

Expand All @@ -7,6 +7,7 @@
from elasticsearch import Elasticsearch, exceptions
from elasticsearch.helpers import bulk, BulkIndexError

from search.api import QueryParseError
from search.search_engine_base import SearchEngine
from search.utils import ValueRange, _is_iterable

Expand All @@ -17,7 +18,7 @@
# We _may_ want to use these for their special uses for certain queries,
# but for analysed fields these kinds of characters are removed anyway, so
# we can safely remove them from analysed matches
RESERVED_CHARACTERS = "+-=><!(){}[]^\"~*:\\/&|?"
RESERVED_CHARACTERS = "+=><!(){}[]^~*:\\/&|?"


def _translate_hits(es_response):
Expand Down Expand Up @@ -129,12 +130,12 @@ def filter_item(field):
}
]
}
else:
return {
"missing": {
"field": field
}

return {
"missing": {
"field": field
}
}

return [filter_item(field) for field in filter_dictionary]

Expand Down Expand Up @@ -433,7 +434,7 @@ def search(self,
facet_terms=None,
exclude_ids=None,
use_field_match=False,
**kwargs): # pylint: disable=too-many-arguments, too-many-locals, too-many-branches
**kwargs): # pylint: disable=too-many-arguments, too-many-locals, too-many-branches, arguments-differ
"""
Implements call to search the index for the desired content.
Expand Down Expand Up @@ -599,8 +600,13 @@ def search(self,
**kwargs
)
except exceptions.ElasticsearchException as ex:
# log information and re-raise
log.exception("error while searching index - %s", ex.message)
raise
message = unicode(ex)
if 'QueryParsingException' in message:
log.exception("Malformed search query: %s", message)
raise QueryParseError('Malformed search query.')
else:
# log information and re-raise
log.exception("error while searching index - %s", ex.message)
raise

return _translate_hits(es_response)
28 changes: 12 additions & 16 deletions search/tests/mock_search_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def _find_field(doc, field_name):

if isinstance(field_value, dict):
return _find_field(field_value, remaining_path)
else:
return field_value

return field_value


def _filter_intersection(documents_to_search, dictionary_object, include_blanks=False):
Expand All @@ -66,7 +66,7 @@ def value_matches(doc, field_name, field_value):
return include_blanks

# if we have a string that we are trying to process as a date object
if isinstance(field_value, DateRange) or isinstance(field_value, datetime):
if isinstance(field_value, (DateRange, datetime)):
if isinstance(compare_value, basestring):
compare_value = json_date_to_datetime(compare_value)

Expand Down Expand Up @@ -98,8 +98,7 @@ def value_matches(doc, field_name, field_value):
elif _is_iterable(compare_value) and _is_iterable(field_value):
return any((unicode(item) in field_value for item in compare_value))

else:
return compare_value == field_value
return compare_value == field_value

filtered_documents = documents_to_search
for field_name, field_value in dictionary_object.items():
Expand Down Expand Up @@ -321,20 +320,17 @@ def __init__(self, index=None):
super(MockSearchEngine, self).__init__(index)
MockSearchEngine.load_index(self.index_name)

def index(self, doc_type, sources):
def index(self, doc_type, sources): # pylint: disable=arguments-differ
""" Add/update documents of given type to the index """
if MockSearchEngine._disabled:
return None

doc_ids = [s["id"] for s in sources if "id" in s]
MockSearchEngine.remove_documents(self.index_name, doc_type, doc_ids)
MockSearchEngine.add_documents(self.index_name, doc_type, sources)
if not MockSearchEngine._disabled:
doc_ids = [s["id"] for s in sources if "id" in s]
MockSearchEngine.remove_documents(self.index_name, doc_type, doc_ids)
MockSearchEngine.add_documents(self.index_name, doc_type, sources)

def remove(self, doc_type, doc_ids):
def remove(self, doc_type, doc_ids): # pylint: disable=arguments-differ
""" Remove documents of type with given ids from the index """
if MockSearchEngine._disabled:
return None
MockSearchEngine.remove_documents(self.index_name, doc_type, doc_ids)
if not MockSearchEngine._disabled:
MockSearchEngine.remove_documents(self.index_name, doc_type, doc_ids)

def search(self,
query_string=None,
Expand Down
60 changes: 59 additions & 1 deletion search/tests/test_course_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
""" Tests for search functionalty """
import copy
from datetime import datetime
import ddt

from django.core.cache import cache
from django.test import TestCase
from django.test.utils import override_settings
from elasticsearch import Elasticsearch

from search.api import course_discovery_search, NoSearchEngineError
from search.elastic import ElasticSearchEngine
from search.elastic import ElasticSearchEngine, QueryParseError
from search.tests.utils import SearcherMixin, TEST_INDEX_NAME
from .mock_search_engine import MockSearchEngine

Expand Down Expand Up @@ -100,6 +102,9 @@ def setUp(self):
# Make sure that we are fresh
_elasticsearch.indices.delete(index=TEST_INDEX_NAME, ignore=[400, 404])

# remove cached property mappings (along with everything else)
cache.clear()

config_body = {}
# ignore unexpected-keyword-arg; ES python client documents that it can be used
_elasticsearch.indices.create(index=TEST_INDEX_NAME, ignore=400, body=config_body)
Expand Down Expand Up @@ -298,13 +303,66 @@ def test_faceting_override(self):


@override_settings(SEARCH_ENGINE="search.tests.utils.ForceRefreshElasticSearchEngine")
@ddt.ddt
class TestElasticCourseDiscoverySearch(TestMockCourseDiscoverySearch):
""" version of tests that use Elastic Backed index instead of mock """

def setUp(self):
super(TestElasticCourseDiscoverySearch, self).setUp()
self.searcher.index("doc_type_that_is_meaninless_to_bootstrap_index", [{"test_doc_type": "bootstrap"}])

def test_course_matching_empty_index(self):
""" Check for empty result count before indexing """
results = course_discovery_search("defensive")
self.assertEqual(results["total"], 0)

@ddt.data(
('defensive', 1),
('offensive', 2),
('move', 3),
('"offensive move"', 1),
('"offensive move"', 1),
('"highly-offensive"', 1),
('"highly-offensive teams"', 1),
)
@ddt.unpack
# pylint: disable=arguments-differ
def test_course_matching(self, term, result_count):
""" Make sure that matches within content can be located and processed """
DemoCourse.get_and_index(self.searcher, {
"content": {
"short_description": "This is a defensive move",
"overview": "Defensive teams often win"
}
})

DemoCourse.get_and_index(self.searcher, {
"content": {
"short_description": "This is an offensive move",
"overview": "Offensive teams often win"
}
})

DemoCourse.get_and_index(self.searcher, {
"content": {
"short_description": "This is a hyphenated move",
"overview": "Highly-offensive teams often win"
}
})

results = course_discovery_search()

self.assertEqual(results["total"], 3)

results = course_discovery_search(term)
self.assertEqual(results["total"], result_count)

def test_malformed_query_handling(self):
"""Make sure that mismatched quotes produce a specific exception. """

with self.assertRaises(QueryParseError):
course_discovery_search('"missing quote')


@override_settings(SEARCH_ENGINE=None)
class TestNone(TestCase):
Expand Down
5 changes: 1 addition & 4 deletions search/tests/test_course_discovery_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from django.test.utils import override_settings

from search.tests.tests import TEST_INDEX_NAME
from search.tests.utils import post_discovery_request
from .test_views import MockSearchUrlTest
from .test_course_discovery import DemoCourse
from search.tests.utils import post_discovery_request

# Any class that inherits from TestCase will cause too-many-public-methods pylint error
# pylint: disable=too-many-public-methods
Expand Down Expand Up @@ -35,9 +35,6 @@ def setUp(self):
self.searcher, {"content": {"short_description": "Find this one somehow"}}
)

def tearDown(self):
super(DiscoveryUrlTest, self).tearDown()

def test_search_from_url(self):
""" test searching using the url """
code, results = post_discovery_request({})
Expand Down
82 changes: 81 additions & 1 deletion search/tests/test_views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" High-level view tests"""
from datetime import datetime
import ddt

from django.core.urlresolvers import Resolver404, resolve
from django.test import TestCase
Expand All @@ -9,7 +10,7 @@
from search.search_engine_base import SearchEngine
from search.tests.mock_search_engine import MockSearchEngine
from search.tests.tests import TEST_INDEX_NAME
from search.tests.utils import post_request, SearcherMixin
from search.tests.utils import post_discovery_request, post_request, SearcherMixin


# Any class that inherits from TestCase will cause too-many-public-methods pylint error
Expand Down Expand Up @@ -447,3 +448,82 @@ def test_search_from_url(self):
searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME)
with self.assertRaises(StandardError):
searcher.index("courseware_content", [{"id": "FAKE_ID_3", "content": {"text": "Here comes the sun"}}])


@override_settings(SEARCH_ENGINE="search.tests.utils.ForceRefreshElasticSearchEngine")
@override_settings(ELASTIC_FIELD_MAPPINGS={"start_date": {"type": "date"}})
@override_settings(COURSEWARE_INDEX_NAME=TEST_INDEX_NAME)
@ddt.ddt
class ElasticSearchUrlTest(TestCase, SearcherMixin):
"""Elastic-specific tests"""
def setUp(self):
self.searcher.index(
"courseware_content",
[
{
"course": "ABC/DEF/GHI",
"id": "FAKE_ID_1",
"content": {
"text": "It seems like k-means clustering would work in this context."
},
"test_date": datetime(2015, 1, 1),
"test_string": "ABC, It's easy as 123"
}
]
)
self.searcher.index(
"courseware_content",
[
{
"course": "ABC/DEF/GHI",
"id": "FAKE_ID_2",
"content": {
"text": "It looks like k-means clustering could work in this context."
}
}
]
)
self.searcher.index(
"courseware_content",
[
{
"course": "ABC/DEF/GHI",
"id": "FAKE_ID_3",
"content": {
"text": "It looks like k means something different in this context."
}
}
]
)

@ddt.data(
# Quoted phrases
('"in this context"', None, 3),
('"in this context"', "ABC/DEF/GHI", 3),
('"looks like"', None, 2),
('"looks like"', "ABC/DEF/GHI", 2),
# Hyphenated phrases
('k-means', None, 3),
('k-means', "ABC/DEF/GHI", 3),
)
@ddt.unpack
def test_valid_search(self, query, course_id, result_count):
code, results = post_request({"search_string": query}, course_id)
self.assertTrue(199 < code < 300)
self.assertEqual(results["total"], result_count)

def test_malformed_query_handling(self):
# root
code, results = post_request({"search_string": "\"missing quote"})
self.assertGreater(code, 499)
self.assertEqual(results["error"], 'Your query seems malformed. Check for unmatched quotes.')

# course ID
code, results = post_request({"search_string": "\"missing quote"}, "ABC/DEF/GHI")
self.assertGreater(code, 499)
self.assertEqual(results["error"], 'Your query seems malformed. Check for unmatched quotes.')

# course discovery
code, results = post_discovery_request({"search_string": "\"missing quote"})
self.assertGreater(code, 499)
self.assertEqual(results["error"], 'Your query seems malformed. Check for unmatched quotes.')
10 changes: 7 additions & 3 deletions search/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,21 +63,25 @@ def remove(self, doc_type, doc_ids, **kwargs):
class ErroringSearchEngine(MockSearchEngine):
""" Override to generate search engine error to test """

def search(self, query_string=None, field_dictionary=None, filter_dictionary=None, **kwargs):
def search(self,
query_string=None,
field_dictionary=None,
filter_dictionary=None,
**kwargs): # pylint: disable=arguments-differ
raise StandardError("There is a problem here")


class ErroringIndexEngine(MockSearchEngine):
""" Override to generate search engine error to test """

def index(self, doc_type, sources, **kwargs): # pylint: disable=unused-argument
def index(self, doc_type, sources, **kwargs): # pylint: disable=unused-argument, arguments-differ
raise StandardError("There is a problem here")


class ErroringElasticImpl(Elasticsearch):
""" Elasticsearch implementation that throws exceptions"""

# pylint: disable=unused-argument
def search(self, **kwargs):
def search(self, **kwargs): # pylint: disable=arguments-differ
""" this will definitely fail """
raise exceptions.ElasticsearchException("This search operation failed")
Loading

0 comments on commit efe2243

Please sign in to comment.