diff --git a/tests/mail/tests.py b/tests/mail/tests.py index 2a8002ec8b..33d5da78f5 100644 --- a/tests/mail/tests.py +++ b/tests/mail/tests.py @@ -1,5 +1,6 @@ import mimetypes import os +import re import shutil import socket import sys @@ -7,6 +8,7 @@ import tempfile from email import charset, message_from_binary_file from email import message_from_bytes as _message_from_bytes from email import policy +from email.headerregistry import Address from email.message import EmailMessage as PyEmailMessage from email.message import Message as PyMessage from email.mime.image import MIMEImage @@ -176,9 +178,12 @@ class MailTests(MailTestsMixin, SimpleTestCase): same error handling strategy to avoid errors such as: UnicodeEncodeError: 'utf-8' codec can't encode <...>: surrogates not allowed - """ + # This test is specific to Python's legacy MIMEText, and can be safely removed + # if EmailMessage.message() switches Python's modern email API. + # Using surrogateescape for non-utf8 is already covered in test_encoding(). + def simplified_set_payload(instance, payload, charset): instance._payload = payload @@ -402,16 +407,17 @@ class MailTests(MailTestsMixin, SimpleTestCase): """ email = EmailMessage( "Long subject lines that get wrapped should contain a space continuation " - "character to get expected behavior in Outlook and Thunderbird", - "Content", - "from@example.com", - ["to@example.com"], + "character to comply with RFC 822", ) message = email.message() - self.assertEqual( - message["Subject"].encode(), - b"Long subject lines that get wrapped should contain a space continuation\n" - b" character to get expected behavior in Outlook and Thunderbird", + msg_bytes = message.as_bytes() + # Python's legacy email wraps this more than strictly necessary + # (but uses FWS properly at each wrap). Modern email wraps it better. + self.assertIn( + b"Subject: Long subject lines that get wrapped should contain a space\n" + b" continuation\n" + b" character to comply with RFC 822", + msg_bytes, ) def test_message_header_overrides(self): @@ -515,17 +521,25 @@ class MailTests(MailTestsMixin, SimpleTestCase): email = EmailMessage( to=['"Firstname Sürname" ', "other@example.com"], ) + reparsed = message_from_bytes(email.message().as_bytes()) self.assertEqual( - email.message()["To"], - "=?utf-8?q?Firstname_S=C3=BCrname?= , other@example.com", + reparsed["To"].addresses, + ( + Address(display_name="Firstname Sürname", addr_spec="to@example.com"), + Address(addr_spec="other@example.com"), + ), ) email = EmailMessage( to=['"Sürname, Firstname" ', "other@example.com"], ) + reparsed = message_from_bytes(email.message().as_bytes()) self.assertEqual( - email.message()["To"], - "=?utf-8?q?S=C3=BCrname=2C_Firstname?= , other@example.com", + reparsed["To"].addresses, + ( + Address(display_name="Sürname, Firstname", addr_spec="to@example.com"), + Address(addr_spec="other@example.com"), + ), ) def test_unicode_headers(self): @@ -538,13 +552,24 @@ class MailTests(MailTestsMixin, SimpleTestCase): }, ) message = email.message() - self.assertEqual(message["Subject"], "=?utf-8?b?R8W8ZWfFvMOzxYJrYQ==?=") - self.assertEqual( - message["Sender"], "=?utf-8?q?Firstname_S=C3=BCrname?= " + + # Verify sent headers use RFC 2047 encoded-words. + msg_bytes = message.as_bytes() + self.assertIn(b"Subject: =?utf-8?b?R8W8ZWfFvMOzxYJrYQ==?=", msg_bytes) + self.assertIn( + b"Sender: =?utf-8?q?Firstname_S=C3=BCrname?= ", + msg_bytes, ) + self.assertIn(b"Comments: =?utf-8?q?My_S=C3=BCrname_is_non-ASCII?=", msg_bytes) + + # Verify sent headers parse to original values. + reparsed = message_from_bytes(msg_bytes) + self.assertEqual(reparsed["Subject"], "Gżegżółka") self.assertEqual( - message["Comments"], "=?utf-8?q?My_S=C3=BCrname_is_non-ASCII?=" + reparsed["Sender"].address, + Address(display_name="Firstname Sürname", addr_spec="sender@example.com"), ) + self.assertEqual(reparsed["Comments"], "My Sürname is non-ASCII") def test_non_utf8_headers_multipart(self): """ @@ -556,22 +581,32 @@ class MailTests(MailTestsMixin, SimpleTestCase): to = '"Sürname, Firstname" ' text_content = "This is an important message." html_content = "

This is an important message.

" - msg = EmailMultiAlternatives( + email = EmailMultiAlternatives( "Message from Firstname Sürname", text_content, from_email, [to], headers=headers, ) - msg.attach_alternative(html_content, "text/html") - msg.encoding = "iso-8859-1" - self.assertEqual( - msg.message()["To"], - "=?iso-8859-1?q?S=FCrname=2C_Firstname?= ", + email.attach_alternative(html_content, "text/html") + email.encoding = "iso-8859-1" + message = email.message() + + # Verify sent headers use RFC 2047 encoded-words. + msg_bytes = message.as_bytes() + self.assertIn( + b"To: =?iso-8859-1?q?S=FCrname=2C_Firstname?= ", msg_bytes ) + self.assertIn( + b"Subject: =?iso-8859-1?q?Message_from_Firstname_S=FCrname?=", msg_bytes + ) + + # Verify sent headers parse to original values. + reparsed = message_from_bytes(msg_bytes) + self.assertEqual(reparsed["Subject"], "Message from Firstname Sürname") self.assertEqual( - msg.message()["Subject"], - "=?iso-8859-1?q?Message_from_Firstname_S=FCrname?=", + reparsed["To"].addresses, + (Address(display_name="Sürname, Firstname", addr_spec="to@example.com"),), ) def test_multipart_with_attachments(self): @@ -680,9 +715,6 @@ class MailTests(MailTestsMixin, SimpleTestCase): self.assertMessageHasHeaders( payload0, { - # (The MIME-Version header is neither required nor meaningful - # in a subpart, and this check for it can be safely removed.) - ("MIME-Version", "1.0"), ("Content-Type", 'text/plain; charset="iso-8859-1"'), ("Content-Transfer-Encoding", "quoted-printable"), }, @@ -695,9 +727,6 @@ class MailTests(MailTestsMixin, SimpleTestCase): self.assertMessageHasHeaders( payload1, { - # (The MIME-Version header is neither required nor meaningful - # in a subpart, and this check for it can be safely removed.) - ("MIME-Version", "1.0"), ("Content-Type", 'text/html; charset="iso-8859-1"'), ("Content-Transfer-Encoding", "quoted-printable"), }, @@ -1128,11 +1157,6 @@ class MailTests(MailTestsMixin, SimpleTestCase): msg = EmailMessage(body="Body with latin characters: àáä.") s = msg.message().as_bytes() self.assertIn(b"Content-Transfer-Encoding: 8bit", s) - # The following test is left over from Python 2 and can be safely removed. - # 8bit CTE within a Unicode str is not meaningful, and Python's modern - # email api won't generate it. (The test still works with the legacy api.) - s = msg.message().as_string() - self.assertIn("Content-Transfer-Encoding: 8bit", s) # Long body lines that require folding should use quoted-printable or base64, # whichever is shorter. However, Python's legacy email API avoids re-folding @@ -1147,9 +1171,6 @@ class MailTests(MailTestsMixin, SimpleTestCase): ) s = msg.message().as_bytes() self.assertIn(b"Content-Transfer-Encoding: 8bit", s) - # The following test is left over from Python 2. - s = msg.message().as_string() - self.assertIn("Content-Transfer-Encoding: 8bit", s) # (test_dont_base64_encode_message_rfc822() is now covered # as part of test_attach_rfc822_message() above.) @@ -1174,6 +1195,9 @@ class MailTests(MailTestsMixin, SimpleTestCase): def test_sanitize_address(self): """Email addresses are properly sanitized.""" + # This is a unit test for the internal sanitize_address() function. + # Many of these cases are now duplicated in test_address_header_encoding(), + # which verifies headers in the generated message. for email_address, encoding, expected_result in ( # ASCII addresses. ("to@example.com", "ascii", "to@example.com"), @@ -1261,6 +1285,11 @@ class MailTests(MailTestsMixin, SimpleTestCase): ) def test_sanitize_address_invalid(self): + # This is a unit test for the internal sanitize_address() function. + # Note that Django's EmailMessage.message() will _not_ catch these cases, + # as it only calls sanitize_address() if an address also includes non-ASCII + # chars. Django detects these cases in the SMTP EmailBackend during sending. + # See SMTPBackendTests.test_avoids_sending_to_invalid_addresses() below. for email_address in ( # Invalid address with two @ signs. "to@other.com@example.com", @@ -1277,6 +1306,9 @@ class MailTests(MailTestsMixin, SimpleTestCase): sanitize_address(email_address, encoding="utf-8") def test_sanitize_address_header_injection(self): + # This is a unit test for the internal sanitize_address() function. + # These cases are also duplicated in test_address_header_encoding(), + # which verifies headers in the generated message. msg = "Invalid address; address parts cannot contain newlines." tests = [ "Name\nInjection ", @@ -1289,6 +1321,114 @@ class MailTests(MailTestsMixin, SimpleTestCase): with self.assertRaisesMessage(ValueError, msg): sanitize_address(email_address, encoding="utf-8") + def test_address_header_encoding(self): + # This verifies the modern email API's address header handling. + # (Adapted from older test_sanitize_address() for legacy email API.) + cases = [ + # (address, expected) + ("to@example.com", "to@example.com"), + ("localpartonly", "localpartonly"), + # Addresses with display-names. + ("A name ", "A name "), + ('"A name" ', '"A name" '), + ( + '"Comma, requires quotes" ', + '"Comma, requires quotes" ', + ), + ('"to@other.com" ', '"to@other.com" '), + # Non-ASCII addr-spec: IDNA encoding for domain. + # (Note: no RFC permits encoding a non-ASCII localpart.) + ("to@éxample.com", "to@xn--xample-9ua.com"), + ( + "To Example ", + "To Example ", + ), + # Pre-encoded IDNA domain is left as is. + # (Make sure IDNA 2008 is not downgraded to IDNA 2003.) + ("to@xn--fa-hia.example.com", "to@xn--fa-hia.example.com"), + ("", ""), + ( + '"Display, Name" ', + '"Display, Name" ', + ), + # Non-ASCII display-name as RFC-2047 encoded-word. + ( + "Tó Example ", + "=?utf-8?q?T=C3=B3_Example?= ", + ), + # Addresses with two @ signs (quoted-string localpart). + ('"to@other.com"@example.com', '"to@other.com"@example.com'), + ( + 'To Example <"to@other.com"@example.com>', + 'To Example <"to@other.com"@example.com>', + ), + # Addresses with long non-ASCII display names. + ( + "Tó Example very long" * 4 + " ", + "=?utf-8?q?T=C3=B3_Example_very_longT=C3=B3_Example_very_longT" + "=C3=B3_Example_?=" + " =?utf-8?q?very_longT=C3=B3_Example_very_long?= ", + ), + # Address with long display name and non-ASCII domain. + ( + "To Example very long" * 4 + " ", + "To Example very longTo Example very longTo Example very lo" + "ngTo Example very long ", + ), + ] + for address, expected in cases: + with self.subTest(address=address): + email = EmailMessage(to=[address]) + actual = email.message().as_bytes().decode() + # Unfold FWS and extract the To header. (This is not even close + # to a complete header parser, but is sufficient for this test. + # Note it does not recombine adjacent/folded RFC 2047 encoded-words.) + headers = re.sub(r"\s*\r?\n ", " ", actual).splitlines() + to_header = [h for h in headers if h.startswith("To:")][0] + expected_header = f"To: {expected}" + self.assertEqual(to_header, expected_header) + + def test_address_header_injection(self): + # (This error message comes from Django's internal forbid_multi_line_headers().) + msg = "Header values can't contain newlines" + cases = [ + "Name\nInjection ", + '"Name\nInjection" ', + '"Name\rInjection" ', + '"Name\r\nInjection" ', + "Name ", + "to\ninjection@example.com", + ] + + # Structured address header fields (from RFC 5322 3.6.x). + headers = [ + "From", + "Sender", + "Reply-To", + "To", + "Cc", + # "Bcc" is not checked by EmailMessage.message(). + # (See SMTPBackendTests.test_avoids_sending_to_invalid_addresses().) + "Resent-From", + "Resent-Sender", + "Resent-To", + "Resent-Cc", + "Resent-Bcc", + ] + + for header in headers: + for email_address in cases: + with self.subTest(header=header, email_address=email_address): + if header == "From": + email = EmailMessage(from_email=email_address) + elif header in ("To", "Cc", "Bcc", "Reply-To"): + param = header.lower().replace("-", "_") + email = EmailMessage(**{param: [email_address]}) + else: + email = EmailMessage(headers={header: email_address}) + with self.assertRaisesMessage(ValueError, msg): + email.message() + def test_email_multi_alternatives_content_mimetype_none(self): email_msg = EmailMultiAlternatives() msg = "Both content and mimetype must be provided."