0
0
mirror of https://github.com/wagtail/wagtail.git synced 2024-12-01 11:41:20 +01:00

Add Fuzzy() search query class (#8498)

This commit is contained in:
Nick Smith 2022-06-20 10:41:05 +01:00 committed by GitHub
parent 417263aa85
commit db7a0c96e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 221 additions and 4 deletions

View File

@ -216,6 +216,24 @@ For example:
If you are looking to implement phrase queries using the double-quote syntax, see :ref:`wagtailsearch_query_string_parsing`.
Fuzzy matching
^^^^^^^^^^^^^^
Fuzzy matching will return documents which contain terms similar to the search term, as measured by a `Levenshtein edit distance <https://en.wikipedia.org/wiki/Levenshtein_distance>`.
A maximum of one edit (transposition, insertion, or removal of a character) is permitted for three to five letter terms, two edits for longer terms, and shorter terms must match exactly.
For example:
.. code-block:: python
>>> from wagtail.search.query import Fuzzy
>>> Page.objects.search(Fuzzy("Hallo"))
[<Page: Hello World>]
Fuzzy matching is supported by the Elasticsearch search backend only.
.. _wagtailsearch_complex_queries:

View File

@ -24,7 +24,7 @@ from wagtail.search.index import (
SearchField,
class_is_indexed,
)
from wagtail.search.query import And, Boost, MatchAll, Not, Or, Phrase, PlainText
from wagtail.search.query import And, Boost, Fuzzy, MatchAll, Not, Or, Phrase, PlainText
from wagtail.utils.utils import deep_update
@ -445,6 +445,26 @@ class Elasticsearch5SearchQueryCompiler(BaseSearchQueryCompiler):
return {"multi_match": match_query}
def _compile_fuzzy_query(self, query, fields):
if self.partial_match:
raise NotImplementedError(
"Fuzzy search is not supported with partial matches. Pass "
"partial_match=False into the search method."
)
elif len(fields) > 1:
raise NotImplementedError(
"Fuzzy search on multiple fields is not supported by the "
"Elasticsearch search backend."
)
return {
"match": {
fields[0]: {
"query": query.query_string,
"fuzziness": "AUTO",
}
}
}
def _compile_phrase_query(self, query, fields):
if len(fields) == 1:
return {"match_phrase": {fields[0]: query.query_string}}
@ -494,6 +514,9 @@ class Elasticsearch5SearchQueryCompiler(BaseSearchQueryCompiler):
elif isinstance(query, PlainText):
return self._compile_plaintext_query(query, [field], boost)
elif isinstance(query, Fuzzy):
return self._compile_fuzzy_query(query, [field])
elif isinstance(query, Phrase):
return self._compile_phrase_query(query, [field])
@ -530,6 +553,9 @@ class Elasticsearch5SearchQueryCompiler(BaseSearchQueryCompiler):
elif isinstance(self.query, Phrase):
return self._compile_phrase_query(self.query, fields)
elif isinstance(self.query, Fuzzy):
return self._compile_fuzzy_query(self.query, fields)
else:
if len(fields) == 1:
return self._compile_query(self.query, fields[0])

View File

@ -51,6 +51,14 @@ class Phrase(SearchQuery):
return "<Phrase {}>".format(repr(self.query_string))
class Fuzzy(SearchQuery):
def __init__(self, query_string: str):
self.query_string = query_string
def __repr__(self):
return "<Fuzzy {}>".format(repr(self.query_string))
class MatchAll(SearchQuery):
def __repr__(self):
return "<MatchAll>"

View File

@ -8,7 +8,7 @@ from django.test import TestCase
from elasticsearch.serializer import JSONSerializer
from wagtail.search.backends.elasticsearch5 import Elasticsearch5SearchBackend
from wagtail.search.query import MATCH_ALL, Phrase
from wagtail.search.query import MATCH_ALL, Fuzzy, Phrase
from wagtail.test.search import models
from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests
@ -560,6 +560,61 @@ class TestElasticsearch5SearchQuery(TestCase):
expected_result = {"match_phrase": {"title": "Hello world"}}
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
def test_fuzzy_query(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
partial_match=False,
)
# Check it
expected_result = {
"match": {"_all": {"query": "Hello world", "fuzziness": "AUTO"}}
}
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
def test_fuzzy_query_single_field(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
fields=["title"],
partial_match=False,
)
# Check it
expected_result = {
"match": {"title": {"query": "Hello world", "fuzziness": "AUTO"}}
}
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
def test_fuzzy_query_multiple_fields_disallowed(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
fields=["title", "body"],
partial_match=False,
)
# Check it
with self.assertRaises(NotImplementedError):
query_compiler.get_inner_query()
def test_fuzzy_query_partial_match_disallowed(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
fields=["_all"],
partial_match=True,
)
# Check it
with self.assertRaises(NotImplementedError):
query_compiler.get_inner_query()
def test_year_filter(self):
# Create a query
query_compiler = self.query_compiler_class(

View File

@ -8,7 +8,7 @@ from django.test import TestCase
from elasticsearch.serializer import JSONSerializer
from wagtail.search.backends.elasticsearch6 import Elasticsearch6SearchBackend
from wagtail.search.query import MATCH_ALL, Phrase
from wagtail.search.query import MATCH_ALL, Fuzzy, Phrase
from wagtail.test.search import models
from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests
@ -605,6 +605,61 @@ class TestElasticsearch6SearchQuery(TestCase):
expected_result = {"match_phrase": {"title": "Hello world"}}
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
def test_fuzzy_query(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
partial_match=False,
)
# Check it
expected_result = {
"match": {"_all_text": {"query": "Hello world", "fuzziness": "AUTO"}}
}
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
def test_fuzzy_query_single_field(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
fields=["title"],
partial_match=False,
)
# Check it
expected_result = {
"match": {"title": {"query": "Hello world", "fuzziness": "AUTO"}}
}
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
def test_fuzzy_query_multiple_fields_disallowed(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
fields=["title", "body"],
partial_match=False,
)
# Check it
with self.assertRaises(NotImplementedError):
query_compiler.get_inner_query()
def test_fuzzy_query_partial_match_disallowed(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
fields=["_all"],
partial_match=True,
)
# Check it
with self.assertRaises(NotImplementedError):
query_compiler.get_inner_query()
def test_year_filter(self):
# Create a query
query_compiler = self.query_compiler_class(

View File

@ -8,7 +8,7 @@ from django.test import TestCase
from elasticsearch.serializer import JSONSerializer
from wagtail.search.backends.elasticsearch7 import Elasticsearch7SearchBackend
from wagtail.search.query import MATCH_ALL, Phrase
from wagtail.search.query import MATCH_ALL, Fuzzy, Phrase
from wagtail.test.search import models
from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests
@ -605,6 +605,61 @@ class TestElasticsearch7SearchQuery(TestCase):
expected_result = {"match_phrase": {"title": "Hello world"}}
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
def test_fuzzy_query(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
partial_match=False,
)
# Check it
expected_result = {
"match": {"_all_text": {"query": "Hello world", "fuzziness": "AUTO"}}
}
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
def test_fuzzy_query_single_field(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
fields=["title"],
partial_match=False,
)
# Check it
expected_result = {
"match": {"title": {"query": "Hello world", "fuzziness": "AUTO"}}
}
self.assertDictEqual(query_compiler.get_inner_query(), expected_result)
def test_fuzzy_query_multiple_fields_disallowed(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
fields=["title", "body"],
partial_match=False,
)
# Check it
with self.assertRaises(NotImplementedError):
query_compiler.get_inner_query()
def test_fuzzy_query_partial_match_disallowed(self):
# Create a query
query_compiler = self.query_compiler_class(
models.Book.objects.all(),
Fuzzy("Hello world"),
fields=["_all"],
partial_match=True,
)
# Check it
with self.assertRaises(NotImplementedError):
query_compiler.get_inner_query()
def test_year_filter(self):
# Create a query
query_compiler = self.query_compiler_class(