Refs #35581 -- Updated implementation-dependent mail tests.

- Updated tests that depended on specific legacy email APIs or legacy behavior to be more implementation-agnostic. - Added comments to identify other things that are legacy specific and can be expected to change if using Python's modern email API.
2024-11-24 20:07:01 +01:00 · 2024-08-24 14:23:54 -07:00 · 2024-08-24 14:23:54 -07:00 · 003bec266a
commit 003bec266a
parent 0f2492066a
1 changed files with 179 additions and 39 deletions
--- a/tests/mail/tests.py
+++ b/tests/mail/tests.py
@ -1,5 +1,6 @@
 import mimetypes
 import os
+import re
 import shutil
 import socket
 import sys
@ -7,6 +8,7 @@ import tempfile
 from email import charset, message_from_binary_file
 from email import message_from_bytes as _message_from_bytes
 from email import policy
+from email.headerregistry import Address
 from email.message import EmailMessage as PyEmailMessage
 from email.message import Message as PyMessage
 from email.mime.image import MIMEImage
@ -176,9 +178,12 @@ class MailTests(MailTestsMixin, SimpleTestCase):
        same error handling strategy to avoid errors such as:

        UnicodeEncodeError: 'utf-8' codec can't encode <...>: surrogates not allowed
-
        """

+        # This test is specific to Python's legacy MIMEText, and can be safely removed
+        # if EmailMessage.message() switches Python's modern email API.
+        # Using surrogateescape for non-utf8 is already covered in test_encoding().
+
        def simplified_set_payload(instance, payload, charset):
            instance._payload = payload

@ -402,16 +407,17 @@ class MailTests(MailTestsMixin, SimpleTestCase):
        """
        email = EmailMessage(
            "Long subject lines that get wrapped should contain a space continuation "
-            "character to get expected behavior in Outlook and Thunderbird",
-            "Content",
-            "from@example.com",
-            ["to@example.com"],
+            "character to comply with RFC 822",
        )
        message = email.message()
-        self.assertEqual(
-            message["Subject"].encode(),
-            b"Long subject lines that get wrapped should contain a space continuation\n"
-            b" character to get expected behavior in Outlook and Thunderbird",
+        msg_bytes = message.as_bytes()
+        # Python's legacy email wraps this more than strictly necessary
+        # (but uses FWS properly at each wrap). Modern email wraps it better.
+        self.assertIn(
+            b"Subject: Long subject lines that get wrapped should contain a space\n"
+            b" continuation\n"
+            b" character to comply with RFC 822",
+            msg_bytes,
        )

    def test_message_header_overrides(self):
@ -515,17 +521,25 @@ class MailTests(MailTestsMixin, SimpleTestCase):
        email = EmailMessage(
            to=['"Firstname Sürname" <to@example.com>', "other@example.com"],
        )
+        reparsed = message_from_bytes(email.message().as_bytes())
        self.assertEqual(
-            email.message()["To"],
-            "=?utf-8?q?Firstname_S=C3=BCrname?= <to@example.com>, other@example.com",
+            reparsed["To"].addresses,
+            (
+                Address(display_name="Firstname Sürname", addr_spec="to@example.com"),
+                Address(addr_spec="other@example.com"),
+            ),
        )

        email = EmailMessage(
            to=['"Sürname, Firstname" <to@example.com>', "other@example.com"],
        )
+        reparsed = message_from_bytes(email.message().as_bytes())
        self.assertEqual(
-            email.message()["To"],
-            "=?utf-8?q?S=C3=BCrname=2C_Firstname?= <to@example.com>, other@example.com",
+            reparsed["To"].addresses,
+            (
+                Address(display_name="Sürname, Firstname", addr_spec="to@example.com"),
+                Address(addr_spec="other@example.com"),
+            ),
        )

    def test_unicode_headers(self):
@ -538,13 +552,24 @@ class MailTests(MailTestsMixin, SimpleTestCase):
            },
        )
        message = email.message()
-        self.assertEqual(message["Subject"], "=?utf-8?b?R8W8ZWfFvMOzxYJrYQ==?=")
-        self.assertEqual(
-            message["Sender"], "=?utf-8?q?Firstname_S=C3=BCrname?= <sender@example.com>"
+
+        # Verify sent headers use RFC 2047 encoded-words.
+        msg_bytes = message.as_bytes()
+        self.assertIn(b"Subject: =?utf-8?b?R8W8ZWfFvMOzxYJrYQ==?=", msg_bytes)
+        self.assertIn(
+            b"Sender: =?utf-8?q?Firstname_S=C3=BCrname?= <sender@example.com>",
+            msg_bytes,
        )
+        self.assertIn(b"Comments: =?utf-8?q?My_S=C3=BCrname_is_non-ASCII?=", msg_bytes)
+
+        # Verify sent headers parse to original values.
+        reparsed = message_from_bytes(msg_bytes)
+        self.assertEqual(reparsed["Subject"], "Gżegżółka")
        self.assertEqual(
-            message["Comments"], "=?utf-8?q?My_S=C3=BCrname_is_non-ASCII?="
+            reparsed["Sender"].address,
+            Address(display_name="Firstname Sürname", addr_spec="sender@example.com"),
        )
+        self.assertEqual(reparsed["Comments"], "My Sürname is non-ASCII")

    def test_non_utf8_headers_multipart(self):
        """
@ -556,22 +581,32 @@ class MailTests(MailTestsMixin, SimpleTestCase):
        to = '"Sürname, Firstname" <to@example.com>'
        text_content = "This is an important message."
        html_content = "<p>This is an <strong>important</strong> message.</p>"
-        msg = EmailMultiAlternatives(
+        email = EmailMultiAlternatives(
            "Message from Firstname Sürname",
            text_content,
            from_email,
            [to],
            headers=headers,
        )
-        msg.attach_alternative(html_content, "text/html")
-        msg.encoding = "iso-8859-1"
-        self.assertEqual(
-            msg.message()["To"],
-            "=?iso-8859-1?q?S=FCrname=2C_Firstname?= <to@example.com>",
+        email.attach_alternative(html_content, "text/html")
+        email.encoding = "iso-8859-1"
+        message = email.message()
+
+        # Verify sent headers use RFC 2047 encoded-words.
+        msg_bytes = message.as_bytes()
+        self.assertIn(
+            b"To: =?iso-8859-1?q?S=FCrname=2C_Firstname?= <to@example.com>", msg_bytes
        )
+        self.assertIn(
+            b"Subject: =?iso-8859-1?q?Message_from_Firstname_S=FCrname?=", msg_bytes
+        )
+
+        # Verify sent headers parse to original values.
+        reparsed = message_from_bytes(msg_bytes)
+        self.assertEqual(reparsed["Subject"], "Message from Firstname Sürname")
        self.assertEqual(
-            msg.message()["Subject"],
-            "=?iso-8859-1?q?Message_from_Firstname_S=FCrname?=",
+            reparsed["To"].addresses,
+            (Address(display_name="Sürname, Firstname", addr_spec="to@example.com"),),
        )

    def test_multipart_with_attachments(self):
@ -680,9 +715,6 @@ class MailTests(MailTestsMixin, SimpleTestCase):
        self.assertMessageHasHeaders(
            payload0,
            {
-                # (The MIME-Version header is neither required nor meaningful
-                # in a subpart, and this check for it can be safely removed.)
-                ("MIME-Version", "1.0"),
                ("Content-Type", 'text/plain; charset="iso-8859-1"'),
                ("Content-Transfer-Encoding", "quoted-printable"),
            },
@ -695,9 +727,6 @@ class MailTests(MailTestsMixin, SimpleTestCase):
        self.assertMessageHasHeaders(
            payload1,
            {
-                # (The MIME-Version header is neither required nor meaningful
-                # in a subpart, and this check for it can be safely removed.)
-                ("MIME-Version", "1.0"),
                ("Content-Type", 'text/html; charset="iso-8859-1"'),
                ("Content-Transfer-Encoding", "quoted-printable"),
            },
@ -1128,11 +1157,6 @@ class MailTests(MailTestsMixin, SimpleTestCase):
        msg = EmailMessage(body="Body with latin characters: àáä.")
        s = msg.message().as_bytes()
        self.assertIn(b"Content-Transfer-Encoding: 8bit", s)
-        # The following test is left over from Python 2 and can be safely removed.
-        # 8bit CTE within a Unicode str is not meaningful, and Python's modern
-        # email api won't generate it. (The test still works with the legacy api.)
-        s = msg.message().as_string()
-        self.assertIn("Content-Transfer-Encoding: 8bit", s)

        # Long body lines that require folding should use quoted-printable or base64,
        # whichever is shorter. However, Python's legacy email API avoids re-folding
@ -1147,9 +1171,6 @@ class MailTests(MailTestsMixin, SimpleTestCase):
        )
        s = msg.message().as_bytes()
        self.assertIn(b"Content-Transfer-Encoding: 8bit", s)
-        # The following test is left over from Python 2.
-        s = msg.message().as_string()
-        self.assertIn("Content-Transfer-Encoding: 8bit", s)

    # (test_dont_base64_encode_message_rfc822() is now covered
    # as part of test_attach_rfc822_message() above.)
@ -1174,6 +1195,9 @@ class MailTests(MailTestsMixin, SimpleTestCase):

    def test_sanitize_address(self):
        """Email addresses are properly sanitized."""
+        # This is a unit test for the internal sanitize_address() function.
+        # Many of these cases are now duplicated in test_address_header_encoding(),
+        # which verifies headers in the generated message.
        for email_address, encoding, expected_result in (
            # ASCII addresses.
            ("to@example.com", "ascii", "to@example.com"),
@ -1261,6 +1285,11 @@ class MailTests(MailTestsMixin, SimpleTestCase):
                )

    def test_sanitize_address_invalid(self):
+        # This is a unit test for the internal sanitize_address() function.
+        # Note that Django's EmailMessage.message() will _not_ catch these cases,
+        # as it only calls sanitize_address() if an address also includes non-ASCII
+        # chars. Django detects these cases in the SMTP EmailBackend during sending.
+        # See SMTPBackendTests.test_avoids_sending_to_invalid_addresses() below.
        for email_address in (
            # Invalid address with two @ signs.
            "to@other.com@example.com",
@ -1277,6 +1306,9 @@ class MailTests(MailTestsMixin, SimpleTestCase):
                    sanitize_address(email_address, encoding="utf-8")

    def test_sanitize_address_header_injection(self):
+        # This is a unit test for the internal sanitize_address() function.
+        # These cases are also duplicated in test_address_header_encoding(),
+        # which verifies headers in the generated message.
        msg = "Invalid address; address parts cannot contain newlines."
        tests = [
            "Name\nInjection <to@example.com>",
@ -1289,6 +1321,114 @@ class MailTests(MailTestsMixin, SimpleTestCase):
                with self.assertRaisesMessage(ValueError, msg):
                    sanitize_address(email_address, encoding="utf-8")

+    def test_address_header_encoding(self):
+        # This verifies the modern email API's address header handling.
+        # (Adapted from older test_sanitize_address() for legacy email API.)
+        cases = [
+            # (address, expected)
+            ("to@example.com", "to@example.com"),
+            ("localpartonly", "localpartonly"),
+            # Addresses with display-names.
+            ("A name <to@example.com>", "A name <to@example.com>"),
+            ('"A name" <to@example.com>', '"A name" <to@example.com>'),
+            (
+                '"Comma, requires quotes" <to@example.com>',
+                '"Comma, requires quotes" <to@example.com>',
+            ),
+            ('"to@other.com" <to@example.com>', '"to@other.com" <to@example.com>'),
+            # Non-ASCII addr-spec: IDNA encoding for domain.
+            # (Note: no RFC permits encoding a non-ASCII localpart.)
+            ("to@éxample.com", "to@xn--xample-9ua.com"),
+            (
+                "To Example <to@éxample.com>",
+                "To Example <to@xn--xample-9ua.com>",
+            ),
+            # Pre-encoded IDNA domain is left as is.
+            # (Make sure IDNA 2008 is not downgraded to IDNA 2003.)
+            ("to@xn--fa-hia.example.com", "to@xn--fa-hia.example.com"),
+            ("<to@xn--10cl1a0b660p.example.com>", "<to@xn--10cl1a0b660p.example.com>"),
+            (
+                '"Display, Name" <to@xn--nxasmm1c.example.com>',
+                '"Display, Name" <to@xn--nxasmm1c.example.com>',
+            ),
+            # Non-ASCII display-name as RFC-2047 encoded-word.
+            (
+                "Tó Example <to@example.com>",
+                "=?utf-8?q?T=C3=B3_Example?= <to@example.com>",
+            ),
+            # Addresses with two @ signs (quoted-string localpart).
+            ('"to@other.com"@example.com', '"to@other.com"@example.com'),
+            (
+                'To Example <"to@other.com"@example.com>',
+                'To Example <"to@other.com"@example.com>',
+            ),
+            # Addresses with long non-ASCII display names.
+            (
+                "Tó Example very long" * 4 + " <to@example.com>",
+                "=?utf-8?q?T=C3=B3_Example_very_longT=C3=B3_Example_very_longT"
+                "=C3=B3_Example_?="
+                " =?utf-8?q?very_longT=C3=B3_Example_very_long?= <to@example.com>",
+            ),
+            # Address with long display name and non-ASCII domain.
+            (
+                "To Example very long" * 4 + " <to@exampl€.com>",
+                "To Example very longTo Example very longTo Example very lo"
+                "ngTo Example very long <to@xn--exampl-nc1c.com>",
+            ),
+        ]
+        for address, expected in cases:
+            with self.subTest(address=address):
+                email = EmailMessage(to=[address])
+                actual = email.message().as_bytes().decode()
+                # Unfold FWS and extract the To header. (This is not even close
+                # to a complete header parser, but is sufficient for this test.
+                # Note it does not recombine adjacent/folded RFC 2047 encoded-words.)
+                headers = re.sub(r"\s*\r?\n ", " ", actual).splitlines()
+                to_header = [h for h in headers if h.startswith("To:")][0]
+                expected_header = f"To: {expected}"
+                self.assertEqual(to_header, expected_header)
+
+    def test_address_header_injection(self):
+        # (This error message comes from Django's internal forbid_multi_line_headers().)
+        msg = "Header values can't contain newlines"
+        cases = [
+            "Name\nInjection <to@example.com>",
+            '"Name\nInjection" <to@example.com>',
+            '"Name\rInjection" <to@example.com>',
+            '"Name\r\nInjection" <to@example.com>',
+            "Name <to\ninjection@example.com>",
+            "to\ninjection@example.com",
+        ]
+
+        # Structured address header fields (from RFC 5322 3.6.x).
+        headers = [
+            "From",
+            "Sender",
+            "Reply-To",
+            "To",
+            "Cc",
+            # "Bcc" is not checked by EmailMessage.message().
+            # (See SMTPBackendTests.test_avoids_sending_to_invalid_addresses().)
+            "Resent-From",
+            "Resent-Sender",
+            "Resent-To",
+            "Resent-Cc",
+            "Resent-Bcc",
+        ]
+
+        for header in headers:
+            for email_address in cases:
+                with self.subTest(header=header, email_address=email_address):
+                    if header == "From":
+                        email = EmailMessage(from_email=email_address)
+                    elif header in ("To", "Cc", "Bcc", "Reply-To"):
+                        param = header.lower().replace("-", "_")
+                        email = EmailMessage(**{param: [email_address]})
+                    else:
+                        email = EmailMessage(headers={header: email_address})
+                    with self.assertRaisesMessage(ValueError, msg):
+                        email.message()
+
    def test_email_multi_alternatives_content_mimetype_none(self):
        email_msg = EmailMultiAlternatives()
        msg = "Both content and mimetype must be provided."