0
0
mirror of https://github.com/django/django.git synced 2024-12-01 15:42:04 +01:00

Fixed #717 - If-Modified-Since handling should compare dates according to RFC 2616

Thanks to Maniac for the report, julienb for the initial patch, and
especially to aaugustin for the final patch and tests.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@15696 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Luke Plant 2011-03-01 14:28:06 +00:00
parent 3f38a99174
commit dbe6ced0d6
7 changed files with 196 additions and 26 deletions

View File

@ -1,5 +1,5 @@
from django.core.exceptions import MiddlewareNotUsed from django.core.exceptions import MiddlewareNotUsed
from django.utils.http import http_date from django.utils.http import http_date, parse_http_date_safe
class ConditionalGetMiddleware(object): class ConditionalGetMiddleware(object):
""" """
@ -15,7 +15,7 @@ class ConditionalGetMiddleware(object):
response['Content-Length'] = str(len(response.content)) response['Content-Length'] = str(len(response.content))
if response.has_header('ETag'): if response.has_header('ETag'):
if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None) if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
if if_none_match == response['ETag']: if if_none_match == response['ETag']:
# Setting the status is enough here. The response handling path # Setting the status is enough here. The response handling path
# automatically removes content for this status code (in # automatically removes content for this status code (in
@ -23,10 +23,14 @@ class ConditionalGetMiddleware(object):
response.status_code = 304 response.status_code = 304
if response.has_header('Last-Modified'): if response.has_header('Last-Modified'):
if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE', None) if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
if if_modified_since == response['Last-Modified']: if if_modified_since is not None:
# Setting the status code is enough here (same reasons as if_modified_since = parse_http_date_safe(if_modified_since)
# above). if if_modified_since is not None:
response.status_code = 304 last_modified = parse_http_date_safe(response['Last-Modified'])
if last_modified is not None and last_modified <= if_modified_since:
# Setting the status code is enough here (same reasons as
# above).
response.status_code = 304
return response return response

View File

@ -1,3 +1,5 @@
import calendar
import datetime
import re import re
import sys import sys
import urllib import urllib
@ -8,6 +10,17 @@ from django.utils.functional import allow_lazy
ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"') ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"')
MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split()
__D = r'(?P<day>\d{2})'
__D2 = r'(?P<day>[ \d]\d)'
__M = r'(?P<mon>\w{3})'
__Y = r'(?P<year>\d{4})'
__Y2 = r'(?P<year>\d{2})'
__T = r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})'
RFC1123_DATE = re.compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T))
RFC850_DATE = re.compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T))
ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))
def urlquote(url, safe='/'): def urlquote(url, safe='/'):
""" """
A version of Python's urllib.quote() function that can operate on unicode A version of Python's urllib.quote() function that can operate on unicode
@ -70,6 +83,48 @@ def http_date(epoch_seconds=None):
rfcdate = formatdate(epoch_seconds) rfcdate = formatdate(epoch_seconds)
return '%s GMT' % rfcdate[:25] return '%s GMT' % rfcdate[:25]
def parse_http_date(date):
"""
Parses a date format as specified by HTTP RFC2616 section 3.3.1.
The three formats allowed by the RFC are accepted, even if only the first
one is still in widespread use.
Returns an floating point number expressed in seconds since the epoch, in
UTC.
"""
# emails.Util.parsedate does the job for RFC1123 dates; unfortunately
# RFC2616 makes it mandatory to support RFC850 dates too. So we roll
# our own RFC-compliant parsing.
for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE:
m = regex.match(date)
if m is not None:
break
else:
raise ValueError("%r is not in a valid HTTP date format" % date)
try:
year = int(m.group('year'))
if year < 100:
year += 2000 if year < 70 else 1900
month = MONTHS.index(m.group('mon').lower()) + 1
day = int(m.group('day'))
hour = int(m.group('hour'))
min = int(m.group('min'))
sec = int(m.group('sec'))
result = datetime.datetime(year, month, day, hour, min, sec)
return calendar.timegm(result.utctimetuple())
except Exception:
raise ValueError("%r is not a valid date" % date)
def parse_http_date_safe(date):
"""
Same as parse_http_date, but returns None if the input is invalid.
"""
try:
return parse_http_date(date)
except Exception:
pass
# Base 36 functions: useful for generating compact URLs # Base 36 functions: useful for generating compact URLs
def base36_to_int(s): def base36_to_int(s):

View File

@ -9,10 +9,9 @@ except ImportError:
from calendar import timegm from calendar import timegm
from datetime import timedelta from datetime import timedelta
from email.Utils import formatdate
from django.utils.decorators import decorator_from_middleware, available_attrs from django.utils.decorators import decorator_from_middleware, available_attrs
from django.utils.http import parse_etags, quote_etag from django.utils.http import http_date, parse_http_date_safe, parse_etags, quote_etag
from django.utils.log import getLogger from django.utils.log import getLogger
from django.middleware.http import ConditionalGetMiddleware from django.middleware.http import ConditionalGetMiddleware
from django.http import HttpResponseNotAllowed, HttpResponseNotModified, HttpResponse from django.http import HttpResponseNotAllowed, HttpResponseNotModified, HttpResponse
@ -79,6 +78,8 @@ def condition(etag_func=None, last_modified_func=None):
def inner(request, *args, **kwargs): def inner(request, *args, **kwargs):
# Get HTTP request headers # Get HTTP request headers
if_modified_since = request.META.get("HTTP_IF_MODIFIED_SINCE") if_modified_since = request.META.get("HTTP_IF_MODIFIED_SINCE")
if if_modified_since:
if_modified_since = parse_http_date_safe(if_modified_since)
if_none_match = request.META.get("HTTP_IF_NONE_MATCH") if_none_match = request.META.get("HTTP_IF_NONE_MATCH")
if_match = request.META.get("HTTP_IF_MATCH") if_match = request.META.get("HTTP_IF_MATCH")
if if_none_match or if_match: if if_none_match or if_match:
@ -102,7 +103,7 @@ def condition(etag_func=None, last_modified_func=None):
if last_modified_func: if last_modified_func:
dt = last_modified_func(request, *args, **kwargs) dt = last_modified_func(request, *args, **kwargs)
if dt: if dt:
res_last_modified = formatdate(timegm(dt.utctimetuple()))[:26] + 'GMT' res_last_modified = timegm(dt.utctimetuple())
else: else:
res_last_modified = None res_last_modified = None
else: else:
@ -116,7 +117,8 @@ def condition(etag_func=None, last_modified_func=None):
if ((if_none_match and (res_etag in etags or if ((if_none_match and (res_etag in etags or
"*" in etags and res_etag)) and "*" in etags and res_etag)) and
(not if_modified_since or (not if_modified_since or
res_last_modified == if_modified_since)): (res_last_modified and if_modified_since and
res_last_modified <= if_modified_since))):
if request.method in ("GET", "HEAD"): if request.method in ("GET", "HEAD"):
response = HttpResponseNotModified() response = HttpResponseNotModified()
else: else:
@ -136,9 +138,9 @@ def condition(etag_func=None, last_modified_func=None):
} }
) )
response = HttpResponse(status=412) response = HttpResponse(status=412)
elif (not if_none_match and if_modified_since and elif (not if_none_match and request.method == "GET" and
request.method == "GET" and res_last_modified and if_modified_since and
res_last_modified == if_modified_since): res_last_modified <= if_modified_since):
response = HttpResponseNotModified() response = HttpResponseNotModified()
if response is None: if response is None:
@ -146,7 +148,7 @@ def condition(etag_func=None, last_modified_func=None):
# Set relevant headers on the response if they don't already exist. # Set relevant headers on the response if they don't already exist.
if res_last_modified and not response.has_header('Last-Modified'): if res_last_modified and not response.has_header('Last-Modified'):
response['Last-Modified'] = res_last_modified response['Last-Modified'] = http_date(res_last_modified)
if res_etag and not response.has_header('ETag'): if res_etag and not response.has_header('ETag'):
response['ETag'] = quote_etag(res_etag) response['ETag'] = quote_etag(res_etag)

View File

@ -9,12 +9,11 @@ import posixpath
import re import re
import stat import stat
import urllib import urllib
from email.Utils import parsedate_tz, mktime_tz
from django.template import loader from django.template import loader
from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseNotModified from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseNotModified
from django.template import Template, Context, TemplateDoesNotExist from django.template import Template, Context, TemplateDoesNotExist
from django.utils.http import http_date from django.utils.http import http_date, parse_http_date
def serve(request, path, document_root=None, show_indexes=False): def serve(request, path, document_root=None, show_indexes=False):
""" """
@ -128,10 +127,7 @@ def was_modified_since(header=None, mtime=0, size=0):
raise ValueError raise ValueError
matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header, matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header,
re.IGNORECASE) re.IGNORECASE)
header_date = parsedate_tz(matches.group(1)) header_mtime = parse_http_date(matches.group(1))
if header_date is None:
raise ValueError
header_mtime = mktime_tz(header_date)
header_len = matches.group(3) header_len = matches.group(3)
if header_len and int(header_len) != size: if header_len and int(header_len) != size:
raise ValueError raise ValueError

View File

@ -1,17 +1,20 @@
# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
from datetime import datetime, timedelta from datetime import datetime
from calendar import timegm
from django.test import TestCase from django.test import TestCase
from django.utils.http import parse_etags, quote_etag from django.utils import unittest
from django.utils.http import parse_etags, quote_etag, parse_http_date
FULL_RESPONSE = 'Test conditional get response' FULL_RESPONSE = 'Test conditional get response'
LAST_MODIFIED = datetime(2007, 10, 21, 23, 21, 47) LAST_MODIFIED = datetime(2007, 10, 21, 23, 21, 47)
LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT' LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT'
LAST_MODIFIED_NEWER_STR = 'Mon, 18 Oct 2010 16:56:23 GMT'
LAST_MODIFIED_INVALID_STR = 'Mon, 32 Oct 2010 16:56:23 GMT'
EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT' EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT'
ETAG = 'b4246ffc4f62314ca13147c9d4f76974' ETAG = 'b4246ffc4f62314ca13147c9d4f76974'
EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6' EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6'
class ConditionalGet(TestCase): class ConditionalGet(TestCase):
def assertFullResponse(self, response, check_last_modified=True, check_etag=True): def assertFullResponse(self, response, check_last_modified=True, check_etag=True):
self.assertEquals(response.status_code, 200) self.assertEquals(response.status_code, 200)
@ -33,6 +36,12 @@ class ConditionalGet(TestCase):
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertNotModified(response) self.assertNotModified(response)
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_NEWER_STR
response = self.client.get('/condition/')
self.assertNotModified(response)
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_INVALID_STR
response = self.client.get('/condition/')
self.assertFullResponse(response)
self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
response = self.client.get('/condition/') response = self.client.get('/condition/')
self.assertFullResponse(response) self.assertFullResponse(response)
@ -118,7 +127,7 @@ class ConditionalGet(TestCase):
self.assertFullResponse(response, check_last_modified=False) self.assertFullResponse(response, check_last_modified=False)
class ETagProcesing(TestCase): class ETagProcessing(unittest.TestCase):
def testParsing(self): def testParsing(self):
etags = parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"') etags = parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"')
self.assertEquals(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak']) self.assertEquals(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak'])
@ -126,3 +135,20 @@ class ETagProcesing(TestCase):
def testQuoting(self): def testQuoting(self):
quoted_etag = quote_etag(r'e\t"ag') quoted_etag = quote_etag(r'e\t"ag')
self.assertEquals(quoted_etag, r'"e\\t\"ag"') self.assertEquals(quoted_etag, r'"e\\t\"ag"')
class HttpDateProcessing(unittest.TestCase):
def testParsingRfc1123(self):
parsed = parse_http_date('Sun, 06 Nov 1994 08:49:37 GMT')
self.assertEqual(datetime.utcfromtimestamp(parsed),
datetime(1994, 11, 06, 8, 49, 37))
def testParsingRfc850(self):
parsed = parse_http_date('Sunday, 06-Nov-94 08:49:37 GMT')
self.assertEqual(datetime.utcfromtimestamp(parsed),
datetime(1994, 11, 06, 8, 49, 37))
def testParsingAsctime(self):
parsed = parse_http_date('Sun Nov 6 08:49:37 1994')
self.assertEqual(datetime.utcfromtimestamp(parsed),
datetime(1994, 11, 06, 8, 49, 37))

View File

@ -3,6 +3,7 @@
from django.conf import settings from django.conf import settings
from django.http import HttpRequest from django.http import HttpRequest
from django.middleware.common import CommonMiddleware from django.middleware.common import CommonMiddleware
from django.middleware.http import ConditionalGetMiddleware
from django.test import TestCase from django.test import TestCase
@ -247,3 +248,89 @@ class CommonMiddlewareTest(TestCase):
self.assertEquals(r.status_code, 301) self.assertEquals(r.status_code, 301)
self.assertEquals(r['Location'], self.assertEquals(r['Location'],
'http://www.testserver/middleware/customurlconf/slash/') 'http://www.testserver/middleware/customurlconf/slash/')
class ConditionalGetMiddlewareTest(TestCase):
urls = 'regressiontests.middleware.cond_get_urls'
def setUp(self):
self.req = HttpRequest()
self.req.META = {
'SERVER_NAME': 'testserver',
'SERVER_PORT': 80,
}
self.req.path = self.req.path_info = "/"
self.resp = self.client.get(self.req.path)
# Tests for the Date header
def test_date_header_added(self):
self.assertFalse('Date' in self.resp)
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertTrue('Date' in self.resp)
# Tests for the Content-Length header
def test_content_length_header_added(self):
content_length = len(self.resp.content)
self.assertFalse('Content-Length' in self.resp)
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertTrue('Content-Length' in self.resp)
self.assertEqual(int(self.resp['Content-Length']), content_length)
def test_content_length_header_not_changed(self):
bad_content_length = len(self.resp.content) + 10
self.resp['Content-Length'] = bad_content_length
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEqual(int(self.resp['Content-Length']), bad_content_length)
# Tests for the ETag header
def test_if_none_match_and_no_etag(self):
self.req.META['HTTP_IF_NONE_MATCH'] = 'spam'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEquals(self.resp.status_code, 200)
def test_no_if_none_match_and_etag(self):
self.resp['ETag'] = 'eggs'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEquals(self.resp.status_code, 200)
def test_if_none_match_and_same_etag(self):
self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = 'spam'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEquals(self.resp.status_code, 304)
def test_if_none_match_and_different_etag(self):
self.req.META['HTTP_IF_NONE_MATCH'] = 'spam'
self.resp['ETag'] = 'eggs'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEquals(self.resp.status_code, 200)
# Tests for the Last-Modified header
def test_if_modified_since_and_no_last_modified(self):
self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEquals(self.resp.status_code, 200)
def test_no_if_modified_since_and_last_modified(self):
self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEquals(self.resp.status_code, 200)
def test_if_modified_since_and_same_last_modified(self):
self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEquals(self.resp.status_code, 304)
def test_if_modified_since_and_last_modified_in_the_past(self):
self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:35:44 GMT'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEquals(self.resp.status_code, 304)
def test_if_modified_since_and_last_modified_in_the_future(self):
self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:41:44 GMT'
self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
self.assertEquals(self.resp.status_code, 200)

View File

@ -51,7 +51,7 @@ class StaticTests(TestCase):
file_name = 'file.txt' file_name = 'file.txt'
response = self.client.get( response = self.client.get(
'/views/%s/%s' % (self.prefix, file_name), '/views/%s/%s' % (self.prefix, file_name),
HTTP_IF_MODIFIED_SINCE='Mon, 18 Jan 2038 05:14:07 UTC' HTTP_IF_MODIFIED_SINCE='Mon, 18 Jan 2038 05:14:07 GMT'
# This is 24h before max Unix time. Remember to fix Django and # This is 24h before max Unix time. Remember to fix Django and
# update this test well before 2038 :) # update this test well before 2038 :)
) )