diff --git a/django/db/backends/mysql/base.py b/django/db/backends/mysql/base.py index b8d2d09c94..eb0e361d8d 100644 --- a/django/db/backends/mysql/base.py +++ b/django/db/backends/mysql/base.py @@ -215,7 +215,7 @@ class DatabaseWrapper(BaseDatabaseWrapper): def get_connection_params(self): kwargs = { "conv": django_conversions, - "charset": "utf8", + "charset": "utf8mb4", } settings_dict = self.settings_dict if settings_dict["USER"]: diff --git a/django/db/backends/mysql/features.py b/django/db/backends/mysql/features.py index 21088544ac..414f552d94 100644 --- a/django/db/backends/mysql/features.py +++ b/django/db/backends/mysql/features.py @@ -71,21 +71,11 @@ class DatabaseFeatures(BaseDatabaseFeatures): @cached_property def test_collations(self): - charset = "utf8" - if ( - self.connection.mysql_is_mariadb - and self.connection.mysql_version >= (10, 6) - ) or ( - not self.connection.mysql_is_mariadb - and self.connection.mysql_version >= (8, 0, 30) - ): - # utf8 is an alias for utf8mb3 in MariaDB 10.6+ and MySQL 8.0.30+. - charset = "utf8mb3" return { - "ci": f"{charset}_general_ci", - "non_default": f"{charset}_esperanto_ci", - "swedish_ci": f"{charset}_swedish_ci", - "virtual": f"{charset}_esperanto_ci", + "ci": "utf8mb4_general_ci", + "non_default": "utf8mb4_esperanto_ci", + "swedish_ci": "utf8mb4_swedish_ci", + "virtual": "utf8mb4_esperanto_ci", } test_now_utc_template = "UTC_TIMESTAMP(6)" @@ -99,10 +89,6 @@ class DatabaseFeatures(BaseDatabaseFeatures): "db_functions.comparison.test_least.LeastTests." "test_coalesce_workaround", }, - "Running on MySQL requires utf8mb4 encoding (#18392).": { - "model_fields.test_textfield.TextFieldTests.test_emoji", - "model_fields.test_charfield.TestCharField.test_emoji", - }, "MySQL doesn't support functional indexes on a function that " "returns JSON": { "schema.tests.SchemaTests.test_func_index_json_key_transform", diff --git a/docs/ref/contrib/auth.txt b/docs/ref/contrib/auth.txt index 103aff8e0b..3e1cdfd978 100644 --- a/docs/ref/contrib/auth.txt +++ b/docs/ref/contrib/auth.txt @@ -30,10 +30,7 @@ Fields The ``max_length`` should be sufficient for many use cases. If you need a longer length, please use a :ref:`custom user model - `. If you use MySQL with the ``utf8mb4`` - encoding (recommended for proper Unicode support), specify at most - ``max_length=191`` because MySQL can only create unique indexes with - 191 characters in that case by default. + `. .. attribute:: first_name diff --git a/docs/ref/databases.txt b/docs/ref/databases.txt index 73a67475fe..57e94140c2 100644 --- a/docs/ref/databases.txt +++ b/docs/ref/databases.txt @@ -517,7 +517,7 @@ You can `create your database`_ using the command-line tools and this SQL: .. code-block:: sql - CREATE DATABASE CHARACTER SET utf8; + CREATE DATABASE CHARACTER SET utf8mb4; This ensures all tables and columns will use UTF-8 by default. @@ -542,21 +542,21 @@ Django doesn't provide an API to change them. .. _documented thoroughly: https://dev.mysql.com/doc/refman/en/charset.html By default, with a UTF-8 database, MySQL will use the -``utf8_general_ci`` collation. This results in all string equality +``utf8mb4_0900_ai_ci`` collation. This results in all string equality comparisons being done in a *case-insensitive* manner. That is, ``"Fred"`` and ``"freD"`` are considered equal at the database level. If you have a unique constraint on a field, it would be illegal to try to insert both ``"aa"`` and ``"AA"`` into the same column, since they compare as equal (and, hence, non-unique) with the default collation. If you want case-sensitive comparisons on a particular column or table, change the column or table to use the -``utf8_bin`` collation. +``utf8mb4_0900_as_cs`` collation. Please note that according to `MySQL Unicode Character Sets`_, comparisons for -the ``utf8_general_ci`` collation are faster, but slightly less correct, than -comparisons for ``utf8_unicode_ci``. If this is acceptable for your application, -you should use ``utf8_general_ci`` because it is faster. If this is not acceptable -(for example, if you require German dictionary order), use ``utf8_unicode_ci`` -because it is more accurate. +the ``utf8mb4_general_ci`` collation are faster, but slightly less correct, +than comparisons for ``utf8mb4_unicode_ci``. If this is acceptable for your +application, you should use ``utf8mb4_general_ci`` because it is faster. If +this is not acceptable (for example, if you require German dictionary order), +use ``utf8mb4_unicode_ci`` because it is more accurate. .. _MySQL Unicode Character Sets: https://dev.mysql.com/doc/refman/en/charset-unicode-sets.html @@ -602,7 +602,7 @@ Here's a sample configuration which uses a MySQL option file:: database = NAME user = USER password = PASSWORD - default-character-set = utf8 + default-character-set = utf8mb4 Several other `MySQLdb connection options`_ may be useful, such as ``ssl``, ``init_command``, and ``sql_mode``. diff --git a/docs/releases/5.2.txt b/docs/releases/5.2.txt index a977e6c803..3cc71b7f68 100644 --- a/docs/releases/5.2.txt +++ b/docs/releases/5.2.txt @@ -174,7 +174,9 @@ CSRF Database backends ~~~~~~~~~~~~~~~~~ -* ... +* MySQL connections now default to using the ``utf8mb4`` character set, + instead of ``utf8``, which is an alias for the deprecated character set + ``utf8mb3``. Decorators ~~~~~~~~~~ @@ -385,6 +387,14 @@ Dropped support for PostgreSQL 13 Upstream support for PostgreSQL 13 ends in November 2025. Django 5.2 supports PostgreSQL 14 and higher. +Changed MySQL connection character set default +---------------------------------------------- + +MySQL connections now default to using the ``utf8mb4`` character set, instead +of ``utf8``, which is an alias for the deprecated character set ``utf8mb3``. +``utf8mb3`` can be specified in the ``OPTIONS`` part of the ``DATABASES`` +setting, if needed for legacy databases. + Miscellaneous ------------- diff --git a/tests/dbshell/test_mysql.py b/tests/dbshell/test_mysql.py index 13007ec037..6088a8b61a 100644 --- a/tests/dbshell/test_mysql.py +++ b/tests/dbshell/test_mysql.py @@ -112,7 +112,7 @@ class MySqlDbshellCommandTestCase(SimpleTestCase): "--user=someuser", "--host=somehost", "--port=444", - "--default-character-set=utf8", + "--default-character-set=utf8mb4", "somedbname", ] expected_env = {"MYSQL_PWD": "somepassword"} @@ -124,7 +124,7 @@ class MySqlDbshellCommandTestCase(SimpleTestCase): "PASSWORD": "somepassword", "HOST": "somehost", "PORT": 444, - "OPTIONS": {"charset": "utf8"}, + "OPTIONS": {"charset": "utf8mb4"}, } ), (expected_args, expected_env),