0
0
mirror of https://github.com/wagtail/wagtail.git synced 2024-12-01 11:41:20 +01:00

Don't set "index: not_analyzed" on filter fields in ES5

We now convey this information to Elasticsearch through the type. Only
the "text" type is analyzed and any string fields that we don't want to
be analyzed can now use the "keyword" type.
This commit is contained in:
Karl Hobley 2016-11-08 10:08:28 +00:00 committed by Matt Westcott
parent abb993f901
commit 736138ae0e
3 changed files with 28 additions and 16 deletions

View File

@ -43,6 +43,7 @@ class ElasticsearchMapping(object):
keyword_type = 'string'
text_type = 'string'
set_index_not_analyzed_on_filter_fields = True
# Contains the configuration required to use the edgengram_analyzer
# on a field. It's different in Elasticsearch 2 so it's been put in
@ -100,7 +101,11 @@ class ElasticsearchMapping(object):
if mapping['type'] == 'string':
mapping['type'] = self.keyword_type
mapping['index'] = 'not_analyzed'
if self.set_index_not_analyzed_on_filter_fields:
# Not required on ES5 as that uses the "keyword" type for
# filtered string fields
mapping['index'] = 'not_analyzed'
mapping['include_in_all'] = False
if 'es_extra' in field.kwargs:
@ -112,12 +117,18 @@ class ElasticsearchMapping(object):
def get_mapping(self):
# Make field list
fields = {
'pk': dict(type=self.keyword_type, index='not_analyzed', store=True, include_in_all=False),
'content_type': dict(type=self.keyword_type, index='not_analyzed', include_in_all=False),
'pk': dict(type=self.keyword_type, store=True, include_in_all=False),
'content_type': dict(type=self.keyword_type, include_in_all=False),
'_partials': dict(type=self.text_type, include_in_all=False),
}
fields['_partials'].update(self.edgengram_analyzer_config)
if self.set_index_not_analyzed_on_filter_fields:
# Not required on ES5 as that uses the "keyword" type for
# filtered string fields
fields['pk']['index'] = 'not_analyzed'
fields['content_type']['index'] = 'not_analyzed'
fields.update(dict(
self.get_field_mapping(field) for field in self.model.get_search_fields()
))

View File

@ -7,6 +7,7 @@ from .elasticsearch2 import (
class Elasticsearch5Mapping(Elasticsearch2Mapping):
keyword_type = 'keyword'
text_type = 'text'
set_index_not_analyzed_on_filter_fields = False
class Elasticsearch5SearchQuery(Elasticsearch2SearchQuery):

View File

@ -753,20 +753,20 @@ class TestElasticsearch5Mapping(TestCase):
expected_result = {
'searchtests_searchtest': {
'properties': {
'pk': {'index': 'not_analyzed', 'type': 'keyword', 'store': True, 'include_in_all': False},
'content_type': {'index': 'not_analyzed', 'type': 'keyword', 'include_in_all': False},
'pk': {'type': 'keyword', 'store': True, 'include_in_all': False},
'content_type': {'type': 'keyword', 'include_in_all': False},
'_partials': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'include_in_all': False, 'type': 'text'},
'live_filter': {'index': 'not_analyzed', 'type': 'boolean', 'include_in_all': False},
'published_date_filter': {'index': 'not_analyzed', 'type': 'date', 'include_in_all': False},
'live_filter': {'type': 'boolean', 'include_in_all': False},
'published_date_filter': {'type': 'date', 'include_in_all': False},
'title': {'type': 'text', 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_filter': {'index': 'not_analyzed', 'type': 'keyword', 'include_in_all': False},
'title_filter': {'type': 'keyword', 'include_in_all': False},
'content': {'type': 'text', 'include_in_all': True},
'callable_indexed_field': {'type': 'text', 'include_in_all': True},
'tags': {
'type': 'nested',
'properties': {
'name': {'type': 'text', 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'slug_filter': {'index': 'not_analyzed', 'type': 'keyword', 'include_in_all': False},
'slug_filter': {'type': 'keyword', 'include_in_all': False},
}
},
}
@ -843,25 +843,25 @@ class TestElasticsearch5MappingInheritance(TestCase):
'properties': {
'title': {'type': 'text', 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'search_description': {'type': 'text', 'include_in_all': True},
'live_filter': {'index': 'not_analyzed', 'type': 'boolean', 'include_in_all': False},
'live_filter': {'type': 'boolean', 'include_in_all': False},
}
},
# Inherited
'pk': {'index': 'not_analyzed', 'type': 'keyword', 'store': True, 'include_in_all': False},
'content_type': {'index': 'not_analyzed', 'type': 'keyword', 'include_in_all': False},
'pk': {'type': 'keyword', 'store': True, 'include_in_all': False},
'content_type': {'type': 'keyword', 'include_in_all': False},
'_partials': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'include_in_all': False, 'type': 'text'},
'live_filter': {'index': 'not_analyzed', 'type': 'boolean', 'include_in_all': False},
'published_date_filter': {'index': 'not_analyzed', 'type': 'date', 'include_in_all': False},
'live_filter': {'type': 'boolean', 'include_in_all': False},
'published_date_filter': {'type': 'date', 'include_in_all': False},
'title': {'type': 'text', 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_filter': {'index': 'not_analyzed', 'type': 'keyword', 'include_in_all': False},
'title_filter': {'type': 'keyword', 'include_in_all': False},
'content': {'type': 'text', 'include_in_all': True},
'callable_indexed_field': {'type': 'text', 'include_in_all': True},
'tags': {
'type': 'nested',
'properties': {
'name': {'type': 'text', 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'slug_filter': {'index': 'not_analyzed', 'type': 'keyword', 'include_in_all': False},
'slug_filter': {'type': 'keyword', 'include_in_all': False},
}
},
}