Fixed #22971 -- Properly parsed RFC 2388 encoded headers

Thanks homm for the report, Cea Stapleton for patch improvements and Ian Cordasco, Christian Schmitt and Tim Graham for the review.
2024-12-01 15:42:04 +01:00 · 2014-07-12 14:08:50 +02:00 · 2014-07-12 14:08:50 +02:00 · b42e5ca058
commit b42e5ca058
parent 7244a8d0ae
2 changed files with 78 additions and 1 deletions
--- a/django/http/multipartparser.py
+++ b/django/http/multipartparser.py
@ -16,6 +16,7 @@ from django.core.exceptions import SuspiciousMultipartForm
 from django.utils.datastructures import MultiValueDict
 from django.utils.encoding import force_text
 from django.utils import six
+from django.utils.six.moves.urllib.parse import unquote
 from django.utils.text import unescape_entities
 from django.core.files.uploadhandler import StopUpload, SkipFile, StopFutureHandlers

@ -631,8 +632,20 @@ def parse_header(line):
    for p in plist:
        i = p.find(b'=')
        if i >= 0:
+            has_encoding = False
            name = p[:i].strip().lower().decode('ascii')
+            if name.endswith('*'):
+                # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
+                # http://tools.ietf.org/html/rfc2231#section-4
+                name = name[:-1]
+                has_encoding = True
            value = p[i + 1:].strip()
+            if has_encoding:
+                encoding, lang, value = value.split(b"'")
+                if six.PY3:
+                    value = unquote(value.decode(), encoding=encoding.decode())
+                else:
+                    value = unquote(value).decode(encoding)
            if len(value) >= 2 and value[:1] == value[-1:] == b'"':
                value = value[1:-1]
                value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"')
--- a/tests/file_uploads/tests.py
+++ b/tests/file_uploads/tests.py
@ -12,10 +12,11 @@ import unittest

 from django.core.files import temp as tempfile
 from django.core.files.uploadedfile import SimpleUploadedFile
-from django.http.multipartparser import MultiPartParser
+from django.http.multipartparser import MultiPartParser, parse_header
 from django.test import TestCase, client
 from django.test import override_settings
 from django.utils.encoding import force_bytes
+from django.utils.http import urlquote
 from django.utils.six import StringIO

 from . import uploadhandler
@ -120,6 +121,56 @@ class FileUploadTests(TestCase):

        self.assertEqual(response.status_code, 200)

+    def test_unicode_file_name_rfc2231(self):
+        """
+        Test receiving file upload when filename is encoded with RFC2231
+        (#22971).
+        """
+        payload = client.FakePayload()
+        payload.write('\r\n'.join([
+            '--' + client.BOUNDARY,
+            'Content-Disposition: form-data; name="file_unicode"; filename*=UTF-8\'\'%s' % urlquote(UNICODE_FILENAME),
+            'Content-Type: application/octet-stream',
+            '',
+            'You got pwnd.\r\n',
+            '\r\n--' + client.BOUNDARY + '--\r\n'
+        ]))
+
+        r = {
+            'CONTENT_LENGTH': len(payload),
+            'CONTENT_TYPE': client.MULTIPART_CONTENT,
+            'PATH_INFO': "/unicode_name/",
+            'REQUEST_METHOD': 'POST',
+            'wsgi.input': payload,
+        }
+        response = self.client.request(**r)
+        self.assertEqual(response.status_code, 200)
+
+    def test_unicode_name_rfc2231(self):
+        """
+        Test receiving file upload when filename is encoded with RFC2231
+        (#22971).
+        """
+        payload = client.FakePayload()
+        payload.write('\r\n'.join([
+            '--' + client.BOUNDARY,
+            'Content-Disposition: form-data; name*=UTF-8\'\'file_unicode; filename*=UTF-8\'\'%s' % urlquote(UNICODE_FILENAME),
+            'Content-Type: application/octet-stream',
+            '',
+            'You got pwnd.\r\n',
+            '\r\n--' + client.BOUNDARY + '--\r\n'
+        ]))
+
+        r = {
+            'CONTENT_LENGTH': len(payload),
+            'CONTENT_TYPE': client.MULTIPART_CONTENT,
+            'PATH_INFO': "/unicode_name/",
+            'REQUEST_METHOD': 'POST',
+            'wsgi.input': payload,
+        }
+        response = self.client.request(**r)
+        self.assertEqual(response.status_code, 200)
+
    def test_dangerous_file_names(self):
        """Uploaded file names should be sanitized before ever reaching the view."""
        # This test simulates possible directory traversal attacks by a
@ -483,3 +534,16 @@ class MultiParserTests(unittest.TestCase):
            'CONTENT_TYPE': 'multipart/form-data; boundary=_foo',
            'CONTENT_LENGTH': '1'
        }, StringIO('x'), [], 'utf-8')
+
+    def test_rfc2231_parsing(self):
+        test_data = (
+            (b"Content-Type: application/x-stuff; title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
+             "This is ***fun***"),
+            (b"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
+             "foo-ä.html"),
+            (b"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
+             "foo-ä.html"),
+        )
+        for raw_line, expected_title in test_data:
+            parsed = parse_header(raw_line)
+            self.assertEqual(parsed[1]['title'], expected_title)