mirror of
https://github.com/wagtail/wagtail.git
synced 2024-12-01 11:41:20 +01:00
Don't use edgengram as query analyser
When a field uses the partial matching, the edgengram_analyser is added to that field. This breaks down the field data into "ngrams" like so: Hello -> "H", "He", "Hel", "Hell", "Hello" This allows a users query for "Hel" to match the above text. The issue that this commit solves is that this was accidentally set as both the index analyser (as described above) and also the query analyser. Setting this as the query analyser will instruct Elasticsearch to perform the above transformation on the users input to the box as well. So if, for example, there was a document with the word "Horse" in it, a users query for "Hello" will match this simply because they both start with the letter "H". The solution is to simply set the "index_analyser" instead of the "analyser" field (which will sets "query_analyser" as well).
This commit is contained in:
parent
869a16f5fa
commit
7af321a2e9
@ -62,7 +62,7 @@ class ElasticSearchMapping(object):
|
||||
mapping['boost'] = field.boost
|
||||
|
||||
if field.partial_match:
|
||||
mapping['analyzer'] = 'edgengram_analyzer'
|
||||
mapping['index_analyzer'] = 'edgengram_analyzer'
|
||||
|
||||
mapping['include_in_all'] = True
|
||||
elif isinstance(field, FilterField):
|
||||
@ -80,7 +80,7 @@ class ElasticSearchMapping(object):
|
||||
fields = {
|
||||
'pk': dict(type='string', index='not_analyzed', store='yes', include_in_all=False),
|
||||
'content_type': dict(type='string', index='not_analyzed', include_in_all=False),
|
||||
'_partials': dict(type='string', analyzer='edgengram_analyzer', include_in_all=False),
|
||||
'_partials': dict(type='string', index_analyzer='edgengram_analyzer', include_in_all=False),
|
||||
}
|
||||
|
||||
fields.update(dict(
|
||||
|
@ -89,7 +89,7 @@ class TestElasticSearchBackend(BackendTests, TestCase):
|
||||
|
||||
# Add some test data
|
||||
obj = models.SearchTest()
|
||||
obj.title = "Ĥéỻø"
|
||||
obj.title = "Ĥéllø"
|
||||
obj.live = True
|
||||
obj.save()
|
||||
self.backend.add(obj)
|
||||
@ -103,6 +103,38 @@ class TestElasticSearchBackend(BackendTests, TestCase):
|
||||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0].id, obj.id)
|
||||
|
||||
def test_query_analyser(self):
|
||||
"""
|
||||
This is testing that fields that use edgengram_analyzer as their index analyser do not
|
||||
have it also as their query analyser
|
||||
"""
|
||||
# Reset the index
|
||||
self.backend.reset_index()
|
||||
self.backend.add_type(models.SearchTest)
|
||||
self.backend.add_type(models.SearchTestChild)
|
||||
|
||||
# Add some test data
|
||||
obj = models.SearchTest()
|
||||
obj.title = "Hello"
|
||||
obj.live = True
|
||||
obj.save()
|
||||
self.backend.add(obj)
|
||||
|
||||
# Refresh the index
|
||||
self.backend.refresh_index()
|
||||
|
||||
# Test search for "Hello"
|
||||
results = self.backend.search("Hello", models.SearchTest.objects.all())
|
||||
|
||||
# Should find the result
|
||||
self.assertEqual(len(results), 1)
|
||||
|
||||
# Test search for "Horse"
|
||||
results = self.backend.search("Horse", models.SearchTest.objects.all())
|
||||
|
||||
# Even though they both start with the letter "H". This should not be considered a match
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
|
||||
class TestElasticSearchQuery(TestCase):
|
||||
def assertDictEqual(self, a, b):
|
||||
|
Loading…
Reference in New Issue
Block a user