0
0
mirror of https://github.com/django/django.git synced 2024-11-21 19:09:18 +01:00

Refs #35581 -- Updated implementation-dependent mail tests.

- Updated tests that depended on specific legacy email APIs or legacy
  behavior to be more implementation-agnostic.
- Added comments to identify other things that are legacy specific
  and can be expected to change if using Python's modern email API.
This commit is contained in:
Mike Edmunds 2024-08-24 14:23:54 -07:00
parent 0f2492066a
commit 003bec266a
No known key found for this signature in database

View File

@ -1,5 +1,6 @@
import mimetypes
import os
import re
import shutil
import socket
import sys
@ -7,6 +8,7 @@ import tempfile
from email import charset, message_from_binary_file
from email import message_from_bytes as _message_from_bytes
from email import policy
from email.headerregistry import Address
from email.message import EmailMessage as PyEmailMessage
from email.message import Message as PyMessage
from email.mime.image import MIMEImage
@ -176,9 +178,12 @@ class MailTests(MailTestsMixin, SimpleTestCase):
same error handling strategy to avoid errors such as:
UnicodeEncodeError: 'utf-8' codec can't encode <...>: surrogates not allowed
"""
# This test is specific to Python's legacy MIMEText, and can be safely removed
# if EmailMessage.message() switches Python's modern email API.
# Using surrogateescape for non-utf8 is already covered in test_encoding().
def simplified_set_payload(instance, payload, charset):
instance._payload = payload
@ -402,16 +407,17 @@ class MailTests(MailTestsMixin, SimpleTestCase):
"""
email = EmailMessage(
"Long subject lines that get wrapped should contain a space continuation "
"character to get expected behavior in Outlook and Thunderbird",
"Content",
"from@example.com",
["to@example.com"],
"character to comply with RFC 822",
)
message = email.message()
self.assertEqual(
message["Subject"].encode(),
b"Long subject lines that get wrapped should contain a space continuation\n"
b" character to get expected behavior in Outlook and Thunderbird",
msg_bytes = message.as_bytes()
# Python's legacy email wraps this more than strictly necessary
# (but uses FWS properly at each wrap). Modern email wraps it better.
self.assertIn(
b"Subject: Long subject lines that get wrapped should contain a space\n"
b" continuation\n"
b" character to comply with RFC 822",
msg_bytes,
)
def test_message_header_overrides(self):
@ -515,17 +521,25 @@ class MailTests(MailTestsMixin, SimpleTestCase):
email = EmailMessage(
to=['"Firstname Sürname" <to@example.com>', "other@example.com"],
)
reparsed = message_from_bytes(email.message().as_bytes())
self.assertEqual(
email.message()["To"],
"=?utf-8?q?Firstname_S=C3=BCrname?= <to@example.com>, other@example.com",
reparsed["To"].addresses,
(
Address(display_name="Firstname Sürname", addr_spec="to@example.com"),
Address(addr_spec="other@example.com"),
),
)
email = EmailMessage(
to=['"Sürname, Firstname" <to@example.com>', "other@example.com"],
)
reparsed = message_from_bytes(email.message().as_bytes())
self.assertEqual(
email.message()["To"],
"=?utf-8?q?S=C3=BCrname=2C_Firstname?= <to@example.com>, other@example.com",
reparsed["To"].addresses,
(
Address(display_name="Sürname, Firstname", addr_spec="to@example.com"),
Address(addr_spec="other@example.com"),
),
)
def test_unicode_headers(self):
@ -538,13 +552,24 @@ class MailTests(MailTestsMixin, SimpleTestCase):
},
)
message = email.message()
self.assertEqual(message["Subject"], "=?utf-8?b?R8W8ZWfFvMOzxYJrYQ==?=")
self.assertEqual(
message["Sender"], "=?utf-8?q?Firstname_S=C3=BCrname?= <sender@example.com>"
# Verify sent headers use RFC 2047 encoded-words.
msg_bytes = message.as_bytes()
self.assertIn(b"Subject: =?utf-8?b?R8W8ZWfFvMOzxYJrYQ==?=", msg_bytes)
self.assertIn(
b"Sender: =?utf-8?q?Firstname_S=C3=BCrname?= <sender@example.com>",
msg_bytes,
)
self.assertIn(b"Comments: =?utf-8?q?My_S=C3=BCrname_is_non-ASCII?=", msg_bytes)
# Verify sent headers parse to original values.
reparsed = message_from_bytes(msg_bytes)
self.assertEqual(reparsed["Subject"], "Gżegżółka")
self.assertEqual(
message["Comments"], "=?utf-8?q?My_S=C3=BCrname_is_non-ASCII?="
reparsed["Sender"].address,
Address(display_name="Firstname Sürname", addr_spec="sender@example.com"),
)
self.assertEqual(reparsed["Comments"], "My Sürname is non-ASCII")
def test_non_utf8_headers_multipart(self):
"""
@ -556,22 +581,32 @@ class MailTests(MailTestsMixin, SimpleTestCase):
to = '"Sürname, Firstname" <to@example.com>'
text_content = "This is an important message."
html_content = "<p>This is an <strong>important</strong> message.</p>"
msg = EmailMultiAlternatives(
email = EmailMultiAlternatives(
"Message from Firstname Sürname",
text_content,
from_email,
[to],
headers=headers,
)
msg.attach_alternative(html_content, "text/html")
msg.encoding = "iso-8859-1"
self.assertEqual(
msg.message()["To"],
"=?iso-8859-1?q?S=FCrname=2C_Firstname?= <to@example.com>",
email.attach_alternative(html_content, "text/html")
email.encoding = "iso-8859-1"
message = email.message()
# Verify sent headers use RFC 2047 encoded-words.
msg_bytes = message.as_bytes()
self.assertIn(
b"To: =?iso-8859-1?q?S=FCrname=2C_Firstname?= <to@example.com>", msg_bytes
)
self.assertIn(
b"Subject: =?iso-8859-1?q?Message_from_Firstname_S=FCrname?=", msg_bytes
)
# Verify sent headers parse to original values.
reparsed = message_from_bytes(msg_bytes)
self.assertEqual(reparsed["Subject"], "Message from Firstname Sürname")
self.assertEqual(
msg.message()["Subject"],
"=?iso-8859-1?q?Message_from_Firstname_S=FCrname?=",
reparsed["To"].addresses,
(Address(display_name="Sürname, Firstname", addr_spec="to@example.com"),),
)
def test_multipart_with_attachments(self):
@ -680,9 +715,6 @@ class MailTests(MailTestsMixin, SimpleTestCase):
self.assertMessageHasHeaders(
payload0,
{
# (The MIME-Version header is neither required nor meaningful
# in a subpart, and this check for it can be safely removed.)
("MIME-Version", "1.0"),
("Content-Type", 'text/plain; charset="iso-8859-1"'),
("Content-Transfer-Encoding", "quoted-printable"),
},
@ -695,9 +727,6 @@ class MailTests(MailTestsMixin, SimpleTestCase):
self.assertMessageHasHeaders(
payload1,
{
# (The MIME-Version header is neither required nor meaningful
# in a subpart, and this check for it can be safely removed.)
("MIME-Version", "1.0"),
("Content-Type", 'text/html; charset="iso-8859-1"'),
("Content-Transfer-Encoding", "quoted-printable"),
},
@ -1128,11 +1157,6 @@ class MailTests(MailTestsMixin, SimpleTestCase):
msg = EmailMessage(body="Body with latin characters: àáä.")
s = msg.message().as_bytes()
self.assertIn(b"Content-Transfer-Encoding: 8bit", s)
# The following test is left over from Python 2 and can be safely removed.
# 8bit CTE within a Unicode str is not meaningful, and Python's modern
# email api won't generate it. (The test still works with the legacy api.)
s = msg.message().as_string()
self.assertIn("Content-Transfer-Encoding: 8bit", s)
# Long body lines that require folding should use quoted-printable or base64,
# whichever is shorter. However, Python's legacy email API avoids re-folding
@ -1147,9 +1171,6 @@ class MailTests(MailTestsMixin, SimpleTestCase):
)
s = msg.message().as_bytes()
self.assertIn(b"Content-Transfer-Encoding: 8bit", s)
# The following test is left over from Python 2.
s = msg.message().as_string()
self.assertIn("Content-Transfer-Encoding: 8bit", s)
# (test_dont_base64_encode_message_rfc822() is now covered
# as part of test_attach_rfc822_message() above.)
@ -1174,6 +1195,9 @@ class MailTests(MailTestsMixin, SimpleTestCase):
def test_sanitize_address(self):
"""Email addresses are properly sanitized."""
# This is a unit test for the internal sanitize_address() function.
# Many of these cases are now duplicated in test_address_header_encoding(),
# which verifies headers in the generated message.
for email_address, encoding, expected_result in (
# ASCII addresses.
("to@example.com", "ascii", "to@example.com"),
@ -1261,6 +1285,11 @@ class MailTests(MailTestsMixin, SimpleTestCase):
)
def test_sanitize_address_invalid(self):
# This is a unit test for the internal sanitize_address() function.
# Note that Django's EmailMessage.message() will _not_ catch these cases,
# as it only calls sanitize_address() if an address also includes non-ASCII
# chars. Django detects these cases in the SMTP EmailBackend during sending.
# See SMTPBackendTests.test_avoids_sending_to_invalid_addresses() below.
for email_address in (
# Invalid address with two @ signs.
"to@other.com@example.com",
@ -1277,6 +1306,9 @@ class MailTests(MailTestsMixin, SimpleTestCase):
sanitize_address(email_address, encoding="utf-8")
def test_sanitize_address_header_injection(self):
# This is a unit test for the internal sanitize_address() function.
# These cases are also duplicated in test_address_header_encoding(),
# which verifies headers in the generated message.
msg = "Invalid address; address parts cannot contain newlines."
tests = [
"Name\nInjection <to@example.com>",
@ -1289,6 +1321,114 @@ class MailTests(MailTestsMixin, SimpleTestCase):
with self.assertRaisesMessage(ValueError, msg):
sanitize_address(email_address, encoding="utf-8")
def test_address_header_encoding(self):
# This verifies the modern email API's address header handling.
# (Adapted from older test_sanitize_address() for legacy email API.)
cases = [
# (address, expected)
("to@example.com", "to@example.com"),
("localpartonly", "localpartonly"),
# Addresses with display-names.
("A name <to@example.com>", "A name <to@example.com>"),
('"A name" <to@example.com>', '"A name" <to@example.com>'),
(
'"Comma, requires quotes" <to@example.com>',
'"Comma, requires quotes" <to@example.com>',
),
('"to@other.com" <to@example.com>', '"to@other.com" <to@example.com>'),
# Non-ASCII addr-spec: IDNA encoding for domain.
# (Note: no RFC permits encoding a non-ASCII localpart.)
("to@éxample.com", "to@xn--xample-9ua.com"),
(
"To Example <to@éxample.com>",
"To Example <to@xn--xample-9ua.com>",
),
# Pre-encoded IDNA domain is left as is.
# (Make sure IDNA 2008 is not downgraded to IDNA 2003.)
("to@xn--fa-hia.example.com", "to@xn--fa-hia.example.com"),
("<to@xn--10cl1a0b660p.example.com>", "<to@xn--10cl1a0b660p.example.com>"),
(
'"Display, Name" <to@xn--nxasmm1c.example.com>',
'"Display, Name" <to@xn--nxasmm1c.example.com>',
),
# Non-ASCII display-name as RFC-2047 encoded-word.
(
"Tó Example <to@example.com>",
"=?utf-8?q?T=C3=B3_Example?= <to@example.com>",
),
# Addresses with two @ signs (quoted-string localpart).
('"to@other.com"@example.com', '"to@other.com"@example.com'),
(
'To Example <"to@other.com"@example.com>',
'To Example <"to@other.com"@example.com>',
),
# Addresses with long non-ASCII display names.
(
"Tó Example very long" * 4 + " <to@example.com>",
"=?utf-8?q?T=C3=B3_Example_very_longT=C3=B3_Example_very_longT"
"=C3=B3_Example_?="
" =?utf-8?q?very_longT=C3=B3_Example_very_long?= <to@example.com>",
),
# Address with long display name and non-ASCII domain.
(
"To Example very long" * 4 + " <to@exampl€.com>",
"To Example very longTo Example very longTo Example very lo"
"ngTo Example very long <to@xn--exampl-nc1c.com>",
),
]
for address, expected in cases:
with self.subTest(address=address):
email = EmailMessage(to=[address])
actual = email.message().as_bytes().decode()
# Unfold FWS and extract the To header. (This is not even close
# to a complete header parser, but is sufficient for this test.
# Note it does not recombine adjacent/folded RFC 2047 encoded-words.)
headers = re.sub(r"\s*\r?\n ", " ", actual).splitlines()
to_header = [h for h in headers if h.startswith("To:")][0]
expected_header = f"To: {expected}"
self.assertEqual(to_header, expected_header)
def test_address_header_injection(self):
# (This error message comes from Django's internal forbid_multi_line_headers().)
msg = "Header values can't contain newlines"
cases = [
"Name\nInjection <to@example.com>",
'"Name\nInjection" <to@example.com>',
'"Name\rInjection" <to@example.com>',
'"Name\r\nInjection" <to@example.com>',
"Name <to\ninjection@example.com>",
"to\ninjection@example.com",
]
# Structured address header fields (from RFC 5322 3.6.x).
headers = [
"From",
"Sender",
"Reply-To",
"To",
"Cc",
# "Bcc" is not checked by EmailMessage.message().
# (See SMTPBackendTests.test_avoids_sending_to_invalid_addresses().)
"Resent-From",
"Resent-Sender",
"Resent-To",
"Resent-Cc",
"Resent-Bcc",
]
for header in headers:
for email_address in cases:
with self.subTest(header=header, email_address=email_address):
if header == "From":
email = EmailMessage(from_email=email_address)
elif header in ("To", "Cc", "Bcc", "Reply-To"):
param = header.lower().replace("-", "_")
email = EmailMessage(**{param: [email_address]})
else:
email = EmailMessage(headers={header: email_address})
with self.assertRaisesMessage(ValueError, msg):
email.message()
def test_email_multi_alternatives_content_mimetype_none(self):
email_msg = EmailMultiAlternatives()
msg = "Both content and mimetype must be provided."