mirror of
https://github.com/wagtail/wagtail.git
synced 2024-12-01 11:41:20 +01:00
Add Fuzzy() search query class (#8498)
This commit is contained in:
parent
417263aa85
commit
db7a0c96e3
@ -216,6 +216,24 @@ For example:
|
||||
|
||||
If you are looking to implement phrase queries using the double-quote syntax, see :ref:`wagtailsearch_query_string_parsing`.
|
||||
|
||||
Fuzzy matching
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
Fuzzy matching will return documents which contain terms similar to the search term, as measured by a `Levenshtein edit distance <https://en.wikipedia.org/wiki/Levenshtein_distance>`.
|
||||
|
||||
A maximum of one edit (transposition, insertion, or removal of a character) is permitted for three to five letter terms, two edits for longer terms, and shorter terms must match exactly.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> from wagtail.search.query import Fuzzy
|
||||
|
||||
>>> Page.objects.search(Fuzzy("Hallo"))
|
||||
[<Page: Hello World>]
|
||||
|
||||
Fuzzy matching is supported by the Elasticsearch search backend only.
|
||||
|
||||
|
||||
.. _wagtailsearch_complex_queries:
|
||||
|
||||
|
@ -24,7 +24,7 @@ from wagtail.search.index import (
|
||||
SearchField,
|
||||
class_is_indexed,
|
||||
)
|
||||
from wagtail.search.query import And, Boost, MatchAll, Not, Or, Phrase, PlainText
|
||||
from wagtail.search.query import And, Boost, Fuzzy, MatchAll, Not, Or, Phrase, PlainText
|
||||
from wagtail.utils.utils import deep_update
|
||||
|
||||
|
||||
@ -445,6 +445,26 @@ class Elasticsearch5SearchQueryCompiler(BaseSearchQueryCompiler):
|
||||
|
||||
return {"multi_match": match_query}
|
||||
|
||||
def _compile_fuzzy_query(self, query, fields):
|
||||
if self.partial_match:
|
||||
raise NotImplementedError(
|
||||
"Fuzzy search is not supported with partial matches. Pass "
|
||||
"partial_match=False into the search method."
|
||||
)
|
||||
elif len(fields) > 1:
|
||||
raise NotImplementedError(
|
||||
"Fuzzy search on multiple fields is not supported by the "
|
||||
"Elasticsearch search backend."
|
||||
)
|
||||
return {
|
||||
"match": {
|
||||
fields[0]: {
|
||||
"query": query.query_string,
|
||||
"fuzziness": "AUTO",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def _compile_phrase_query(self, query, fields):
|
||||
if len(fields) == 1:
|
||||
return {"match_phrase": {fields[0]: query.query_string}}
|
||||
@ -494,6 +514,9 @@ class Elasticsearch5SearchQueryCompiler(BaseSearchQueryCompiler):
|
||||
elif isinstance(query, PlainText):
|
||||
return self._compile_plaintext_query(query, [field], boost)
|
||||
|
||||
elif isinstance(query, Fuzzy):
|
||||
return self._compile_fuzzy_query(query, [field])
|
||||
|
||||
elif isinstance(query, Phrase):
|
||||
return self._compile_phrase_query(query, [field])
|
||||
|
||||
@ -530,6 +553,9 @@ class Elasticsearch5SearchQueryCompiler(BaseSearchQueryCompiler):
|
||||
elif isinstance(self.query, Phrase):
|
||||
return self._compile_phrase_query(self.query, fields)
|
||||
|
||||
elif isinstance(self.query, Fuzzy):
|
||||
return self._compile_fuzzy_query(self.query, fields)
|
||||
|
||||
else:
|
||||
if len(fields) == 1:
|
||||
return self._compile_query(self.query, fields[0])
|
||||
|
@ -51,6 +51,14 @@ class Phrase(SearchQuery):
|
||||
return "<Phrase {}>".format(repr(self.query_string))
|
||||
|
||||
|
||||
class Fuzzy(SearchQuery):
|
||||
def __init__(self, query_string: str):
|
||||
self.query_string = query_string
|
||||
|
||||
def __repr__(self):
|
||||
return "<Fuzzy {}>".format(repr(self.query_string))
|
||||
|
||||
|
||||
class MatchAll(SearchQuery):
|
||||
def __repr__(self):
|
||||
return "<MatchAll>"
|
||||
|
@ -8,7 +8,7 @@ from django.test import TestCase
|
||||
from elasticsearch.serializer import JSONSerializer
|
||||
|
||||
from wagtail.search.backends.elasticsearch5 import Elasticsearch5SearchBackend
|
||||
from wagtail.search.query import MATCH_ALL, Phrase
|
||||
from wagtail.search.query import MATCH_ALL, Fuzzy, Phrase
|
||||
from wagtail.test.search import models
|
||||
|
||||
from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests
|
||||
@ -560,6 +560,61 @@ class TestElasticsearch5SearchQuery(TestCase):
|
||||
expected_result = {"match_phrase": {"title": "Hello world"}}
|
||||
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
|
||||
|
||||
def test_fuzzy_query(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
partial_match=False,
|
||||
)
|
||||
|
||||
# Check it
|
||||
expected_result = {
|
||||
"match": {"_all": {"query": "Hello world", "fuzziness": "AUTO"}}
|
||||
}
|
||||
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
|
||||
|
||||
def test_fuzzy_query_single_field(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
fields=["title"],
|
||||
partial_match=False,
|
||||
)
|
||||
|
||||
# Check it
|
||||
expected_result = {
|
||||
"match": {"title": {"query": "Hello world", "fuzziness": "AUTO"}}
|
||||
}
|
||||
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
|
||||
|
||||
def test_fuzzy_query_multiple_fields_disallowed(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
fields=["title", "body"],
|
||||
partial_match=False,
|
||||
)
|
||||
|
||||
# Check it
|
||||
with self.assertRaises(NotImplementedError):
|
||||
query_compiler.get_inner_query()
|
||||
|
||||
def test_fuzzy_query_partial_match_disallowed(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
fields=["_all"],
|
||||
partial_match=True,
|
||||
)
|
||||
|
||||
# Check it
|
||||
with self.assertRaises(NotImplementedError):
|
||||
query_compiler.get_inner_query()
|
||||
|
||||
def test_year_filter(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
|
@ -8,7 +8,7 @@ from django.test import TestCase
|
||||
from elasticsearch.serializer import JSONSerializer
|
||||
|
||||
from wagtail.search.backends.elasticsearch6 import Elasticsearch6SearchBackend
|
||||
from wagtail.search.query import MATCH_ALL, Phrase
|
||||
from wagtail.search.query import MATCH_ALL, Fuzzy, Phrase
|
||||
from wagtail.test.search import models
|
||||
|
||||
from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests
|
||||
@ -605,6 +605,61 @@ class TestElasticsearch6SearchQuery(TestCase):
|
||||
expected_result = {"match_phrase": {"title": "Hello world"}}
|
||||
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
|
||||
|
||||
def test_fuzzy_query(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
partial_match=False,
|
||||
)
|
||||
|
||||
# Check it
|
||||
expected_result = {
|
||||
"match": {"_all_text": {"query": "Hello world", "fuzziness": "AUTO"}}
|
||||
}
|
||||
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
|
||||
|
||||
def test_fuzzy_query_single_field(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
fields=["title"],
|
||||
partial_match=False,
|
||||
)
|
||||
|
||||
# Check it
|
||||
expected_result = {
|
||||
"match": {"title": {"query": "Hello world", "fuzziness": "AUTO"}}
|
||||
}
|
||||
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
|
||||
|
||||
def test_fuzzy_query_multiple_fields_disallowed(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
fields=["title", "body"],
|
||||
partial_match=False,
|
||||
)
|
||||
|
||||
# Check it
|
||||
with self.assertRaises(NotImplementedError):
|
||||
query_compiler.get_inner_query()
|
||||
|
||||
def test_fuzzy_query_partial_match_disallowed(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
fields=["_all"],
|
||||
partial_match=True,
|
||||
)
|
||||
|
||||
# Check it
|
||||
with self.assertRaises(NotImplementedError):
|
||||
query_compiler.get_inner_query()
|
||||
|
||||
def test_year_filter(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
|
@ -8,7 +8,7 @@ from django.test import TestCase
|
||||
from elasticsearch.serializer import JSONSerializer
|
||||
|
||||
from wagtail.search.backends.elasticsearch7 import Elasticsearch7SearchBackend
|
||||
from wagtail.search.query import MATCH_ALL, Phrase
|
||||
from wagtail.search.query import MATCH_ALL, Fuzzy, Phrase
|
||||
from wagtail.test.search import models
|
||||
|
||||
from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests
|
||||
@ -605,6 +605,61 @@ class TestElasticsearch7SearchQuery(TestCase):
|
||||
expected_result = {"match_phrase": {"title": "Hello world"}}
|
||||
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
|
||||
|
||||
def test_fuzzy_query(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
partial_match=False,
|
||||
)
|
||||
|
||||
# Check it
|
||||
expected_result = {
|
||||
"match": {"_all_text": {"query": "Hello world", "fuzziness": "AUTO"}}
|
||||
}
|
||||
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
|
||||
|
||||
def test_fuzzy_query_single_field(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
fields=["title"],
|
||||
partial_match=False,
|
||||
)
|
||||
|
||||
# Check it
|
||||
expected_result = {
|
||||
"match": {"title": {"query": "Hello world", "fuzziness": "AUTO"}}
|
||||
}
|
||||
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
|
||||
|
||||
def test_fuzzy_query_multiple_fields_disallowed(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
fields=["title", "body"],
|
||||
partial_match=False,
|
||||
)
|
||||
|
||||
# Check it
|
||||
with self.assertRaises(NotImplementedError):
|
||||
query_compiler.get_inner_query()
|
||||
|
||||
def test_fuzzy_query_partial_match_disallowed(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
models.Book.objects.all(),
|
||||
Fuzzy("Hello world"),
|
||||
fields=["_all"],
|
||||
partial_match=True,
|
||||
)
|
||||
|
||||
# Check it
|
||||
with self.assertRaises(NotImplementedError):
|
||||
query_compiler.get_inner_query()
|
||||
|
||||
def test_year_filter(self):
|
||||
# Create a query
|
||||
query_compiler = self.query_compiler_class(
|
||||
|
Loading…
Reference in New Issue
Block a user