From 437e3b9258c87229498c43a70b43ea8739eca988 Mon Sep 17 00:00:00 2001 From: Robbie Mackay Date: Mon, 20 Jul 2020 12:41:07 +1200 Subject: [PATCH] Replace unidecode with anyascii in wagtail.core.utils.string_to_ascii - Add anyascii to replace unidecode - Update wagtail.core.utils.string_to_ascii to use anyascii. - Anyascii has a similar but not exactly the same encoding - see updates to tests. Refs https://github.com/wagtail/wagtail/issues/3311 --- CHANGELOG.txt | 1 + CONTRIBUTORS.rst | 1 + docs/releases/2.11.rst | 1 + setup.py | 4 ++++ wagtail/core/tests/test_utils.py | 6 +++--- wagtail/core/utils.py | 7 ++----- 6 files changed, 12 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index d9b40f4c73..5e42048992 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -8,6 +8,7 @@ Changelog * Add `register_snippet_listing_buttons` and `construct_snippet_listing_buttons` hooks and documentation (Karl Hobley. Sponsored by the Mozilla Foundation) * Add `wagtail --version` to available Wagtail CLI commands (Kalob Taulien) * Add `hooks.register_temporarily` utility function for testing hooks (Karl Hobley. Sponsored by the Mozilla Foundation) + * Remove unidecode and use anyascii in for Unicode to ASCII conversion (Robbie Mackay) * Fix: Make page-level actions accessible to keyboard users in page listing tables (Jesse Menn) * Fix: `WAGTAILFRONTENDCACHE_LANGUAGES` was being interpreted incorrectly. It now accepts a list of strings, as documented (Karl Hobley) * Fix: Update oEmbed endpoints to use https where available (Matt Westcott) diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst index 4c14fd4e9f..ee83795b10 100644 --- a/CONTRIBUTORS.rst +++ b/CONTRIBUTORS.rst @@ -473,6 +473,7 @@ Contributors * Steven Wood * Gabriel Peracio * Jesse Menn +* Robbie Mackay Translators =========== diff --git a/docs/releases/2.11.rst b/docs/releases/2.11.rst index 5910db9625..2a4888a8a0 100644 --- a/docs/releases/2.11.rst +++ b/docs/releases/2.11.rst @@ -17,6 +17,7 @@ Other features * Add ``register_snippet_listing_buttons`` and ``construct_snippet_listing_buttons`` hooks and documentation (Karl Hobley. Sponsored by the Mozilla Foundation) * Add ``wagtail --version`` to available Wagtail CLI commands (Kalob Taulien) * Add ``hooks.register_temporarily`` utility function for testing hooks (Karl Hobley. Sponsored by the Mozilla Foundation) + * Remove unidecode and use anyascii in for Unicode to ASCII conversion (Robbie Mackay) Bug fixes diff --git a/setup.py b/setup.py index 0398eb7cdc..4ae58a6499 100755 --- a/setup.py +++ b/setup.py @@ -31,12 +31,16 @@ install_requires = [ "Pillow>=4.0.0,<8.0.0", "beautifulsoup4>=4.8,<4.9", "html5lib>=0.999,<2", + # RemovedInWagtail212Warning: unidecode is only used by _migrate_legacy_clean_name in wagtail.contrib.forms + # and will be made a non-default dependency once enough time has passed from the 2.10 release to allow old + # data to be migrated. "Unidecode>=0.04.14,<2.0", "Willow>=1.4,<1.5", "requests>=2.11.1,<3.0", "l18n>=2018.5", "xlsxwriter>=1.2.8,<2.0", "tablib[xls,xlsx]>=0.14.0", + "anyascii>=0.1.5", ] # Testing dependencies diff --git a/wagtail/core/tests/test_utils.py b/wagtail/core/tests/test_utils.py index 630ade2b75..2cf185beb2 100644 --- a/wagtail/core/tests/test_utils.py +++ b/wagtail/core/tests/test_utils.py @@ -24,14 +24,14 @@ class TestStringToAscii(TestCase): def test_string_to_ascii(self): test_cases = [ (u'30 \U0001d5c4\U0001d5c6/\U0001d5c1', '30 km/h'), - (u'\u5317\u4EB0', 'Bei Jing '), + (u'\u5317\u4EB0', 'BeiJing'), ('ぁ あ ぃ い ぅ う ぇ', 'a a i i u u e'), - ('Ա Բ Գ Դ Ե Զ Է Ը Թ Ժ Ի Լ Խ Ծ Կ Հ Ձ Ղ Ճ Մ Յ Ն', 'A B G D E Z E E T` Zh I L Kh Ts K H Dz Gh Ch M Y N'), + ('Ա Բ Գ Դ Ե Զ Է Ը Թ Ժ Ի Լ Խ Ծ Կ Հ Ձ Ղ Ճ Մ Յ Ն', 'A B G D E Z E Y T\' Zh I L Kh Ts K H Dz Gh Ch M Y N'), ('Спорт!', 'Sport!'), ('Straßenbahn', 'Strassenbahn'), ('Hello world', 'Hello world'), ('Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď ď Đ', 'A a A a A a C c C c C c C c D d D'), - ('〔山脈〕', '[Shan Mo ] '), + ('〔山脈〕', '[ShanMai]'), ] for (original, expected_result) in test_cases: diff --git a/wagtail/core/utils.py b/wagtail/core/utils.py index d0227b05e3..125a56469d 100644 --- a/wagtail/core/utils.py +++ b/wagtail/core/utils.py @@ -1,6 +1,7 @@ import inspect import re import unicodedata +from anyascii import anyascii from django.apps import apps from django.conf import settings @@ -8,8 +9,6 @@ from django.db.models import Model from django.utils.encoding import force_str from django.utils.text import slugify -from unidecode import unidecode - WAGTAIL_APPEND_SLASH = getattr(settings, 'WAGTAIL_APPEND_SLASH', True) @@ -21,11 +20,9 @@ def camelcase_to_underscore(str): def string_to_ascii(value): """ Convert a string to ascii. - Note: Conversion relies on unidecode, to be replaced in a future release. - Important: Consider AbstractFormField _migrate_legacy_clean_name before replcaing unidecode. """ - return str(unidecode(value)) + return str(anyascii(value)) def get_model_string(model):