From 2dbd0a49f9e73ea4d39c5c9abc1b73207a79f3c9 Mon Sep 17 00:00:00 2001 From: Rich Brennan Date: Tue, 30 Apr 2019 16:08:13 +0100 Subject: [PATCH] Fix document serve response filename when non-ascii characters used * url encode the document filename in the Content-Disposition header in the document serve view --- CHANGELOG.txt | 1 + docs/releases/2.9.rst | 1 + wagtail/documents/tests/test_views.py | 75 +++++++++++++++++++++++++++ wagtail/documents/views/serve.py | 5 +- 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 12bcdb172f..5002edd478 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -45,6 +45,7 @@ Changelog * Fix: Invalid focal_point attribute on image edit view (Michał (Quadric) Sieradzki) * Fix: No longer expose the `.delete()` method on the default Page.objects manager (Nick Smith) * Fix: `exclude_fields_in_copy` on Page models will now work for for modelcluster parental / many to many relations (LB (Ben Johnston)) + * Fix: Response header (content disposition) now correctly handles filenames with non-ascii characters when using a storage backend (Rich Brennan) 2.8.1 (14.04.2020) diff --git a/docs/releases/2.9.rst b/docs/releases/2.9.rst index 2591f6840f..1f60311be5 100644 --- a/docs/releases/2.9.rst +++ b/docs/releases/2.9.rst @@ -63,6 +63,7 @@ Bug fixes * Invalid focal_point attribute on image edit view (Michał (Quadric) Sieradzki) * No longer expose the ``.delete()`` method on the default Page.objects manager (Nick Smith) * ``exclude_fields_in_copy`` on Page models will now work for for modelcluster parental / many to many relations (LB (Ben Johnston)) + * Response header (content disposition) now correctly handles filenames with non-ascii characters when using a storage backend (Rich Brennan) Upgrade considerations diff --git a/wagtail/documents/tests/test_views.py b/wagtail/documents/tests/test_views.py index 49e741e47c..918ab7715c 100644 --- a/wagtail/documents/tests/test_views.py +++ b/wagtail/documents/tests/test_views.py @@ -1,5 +1,7 @@ import os.path import unittest +import urllib +from io import StringIO from unittest import mock from django.conf import settings @@ -41,6 +43,38 @@ class TestServeView(TestCase): self.get()['Content-Disposition'], 'attachment; filename="{}"'.format(self.document.filename)) + @mock.patch('wagtail.documents.views.serve.hooks') + @mock.patch('wagtail.documents.views.serve.get_object_or_404') + def test_non_local_filesystem_content_disposition_header( + self, mock_get_object_or_404, mock_hooks + ): + """ + Tests the 'Content-Disposition' header in a response when using a + storage backend that doesn't expose filesystem paths. + """ + # Create a mock document with no local file to hit the correct code path + mock_doc = mock.Mock() + mock_doc.filename = self.document.filename + mock_doc.file = StringIO('file-like object' * 10) + mock_doc.file.path = None + mock_doc.file.url = None + mock_doc.file.size = 30 + mock_get_object_or_404.return_value = mock_doc + + # Bypass 'before_serve_document' hooks + mock_hooks.get_hooks.return_value = [] + + response = self.get() + + self.assertEqual(response.status_code, 200) + + self.assertEqual( + response['Content-Disposition'], + "attachment; filename={0}; filename*=UTF-8''{0}".format( + urllib.parse.quote(self.document.filename) + ) + ) + def test_content_length_header(self): self.assertEqual(self.get()['Content-Length'], '25') @@ -221,6 +255,47 @@ class TestServeWithUnicodeFilename(TestCase): except UnicodeEncodeError: raise unittest.SkipTest("Filesystem doesn't support unicode filenames") + def tearDown(self): + # delete the FieldFile directly because the TestCase does not commit + # transactions to trigger transaction.on_commit() in the signal handler + self.document.file.delete() + def test_response_code(self): response = self.client.get(reverse('wagtaildocs_serve', args=(self.document.id, self.filename))) self.assertEqual(response.status_code, 200) + + @mock.patch('wagtail.documents.views.serve.hooks') + @mock.patch('wagtail.documents.views.serve.get_object_or_404') + def test_non_local_filesystem_unicode_content_disposition_header( + self, mock_get_object_or_404, mock_hooks + ): + """ + Tests that a unicode 'Content-Disposition' header (for a response using + a storage backend that doesn't expose filesystem paths) doesn't cause an + error if encoded differently. + """ + # Create a mock document to hit the correct code path. + mock_doc = mock.Mock() + mock_doc.filename = 'TÈST.doc' + mock_doc.file = StringIO('file-like object' * 10) + mock_doc.file.path = None + mock_doc.file.url = None + mock_doc.file.size = 30 + mock_get_object_or_404.return_value = mock_doc + + # Bypass 'before_serve_document' hooks + mock_hooks.get_hooks.return_value = [] + + response = self.client.get(reverse('wagtaildocs_serve', args=(self.document.id, mock_doc.filename))) + + self.assertEqual(response.status_code, 200) + + try: + response['Content-Disposition'].encode('ascii') + except UnicodeDecodeError: + self.fail('Content-Disposition with unicode characters failed ascii encoding.') + + try: + response['Content-Disposition'].encode('latin-1') + except UnicodeDecodeError: + self.fail('Content-Disposition with unicode characters failed latin-1 encoding.') diff --git a/wagtail/documents/views/serve.py b/wagtail/documents/views/serve.py index 3077e1a450..bc2422a002 100644 --- a/wagtail/documents/views/serve.py +++ b/wagtail/documents/views/serve.py @@ -1,3 +1,4 @@ +import urllib from wsgiref.util import FileWrapper from django.conf import settings @@ -101,7 +102,9 @@ def serve(request, document_id, document_filename): wrapper = FileWrapper(doc.file) response = StreamingHttpResponse(wrapper, content_type='application/octet-stream') - response['Content-Disposition'] = 'attachment; filename=%s' % doc.filename + # set filename and filename* to handle non-ascii characters in filename + # see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition + response['Content-Disposition'] = "attachment; filename={0}; filename*=UTF-8''{0}".format(urllib.parse.quote(doc.filename)) # FIXME: storage backends are not guaranteed to implement 'size' response['Content-Length'] = doc.file.size