0
0
mirror of https://github.com/wagtail/wagtail.git synced 2024-11-28 08:48:04 +01:00

Optimize the querycount for the sitemap.xml page

By optionally passing the request object to Page.get_sitemap_urls() it
will now use the cached site root on the request object instead of
retrieving it for each call. This cuts the number of queries required
for a sitemap roughly in half.
This commit is contained in:
Michael van Tellingen 2018-03-30 20:46:34 +02:00 committed by Matt Westcott
parent 40981b0c65
commit bad95cf37c
7 changed files with 64 additions and 18 deletions

View File

@ -20,6 +20,7 @@ Changelog
* Added faceted search using the `.facet()` method (Karl Hobley)
* Admin modal views no longer rely on Javascript `eval()`, for better CSP compliance (Matt Westcott)
* Update editor guide for embeds and documents in rich text (Kevin Howbrook)
* Improved performance of sitemap generation (Michael van Tellingen)
* Fix: Handle all exceptions from `Image.get_file_size` (Andrew Plummer)
* Fix: Fix display of breadcrumbs in ModelAdmin (LB (Ben Johnston))
* Fix: Remove duplicate border radius of avatars (Benjamin Thurm)

View File

@ -97,10 +97,11 @@ Customising
URLs
----
The ``Page`` class defines a ``get_sitemap_urls`` method which you can override
to customise sitemaps per ``Page`` instance. This method must return a list of
dictionaries, one dictionary per URL entry in the sitemap. You can exclude
pages from the sitemap by returning an empty list.
The ``Page`` class defines a ``get_sitemap_urls`` method which you can
override to customise sitemaps per ``Page`` instance. This method must accept
a request object and return a list of dictionaries, one dictionary per URL
entry in the sitemap. You can exclude pages from the sitemap by returning an
empty list.
Each dictionary can contain the following:

View File

@ -29,6 +29,7 @@ Other features
* Added faceted search using the ``.facet()`` method (Karl Hobley)
* Admin modal views no longer rely on Javascript ``eval()``, for better CSP compliance (Matt Westcott)
* Update editor guide for embeds and documents in rich text (Kevin Howbrook)
* Improved performance of sitemap generation (Michael van Tellingen)
Bug fixes
~~~~~~~~~
@ -55,3 +56,9 @@ The ``wagtail.admin.modal_workflow`` module (used internally by Wagtail to handl
* At the point where you call the ``ModalWorkflow`` constructor, add an ``onload`` option - a dictionary of functions to be called on loading each step of the workflow. Move the code from the .js template into this dictionary. Then, on the call to ``render_modal_workflow``, rather than passing the .js template name (which should now be replaced by ``None``), pass a ``step`` item in the ``json_data`` dictionary to indicate the ``onload`` function to be called.
Additionally, if your code calls ``loadResponseText`` as part of a jQuery AJAX callback, this should now be passed all three arguments from the callback (the response data, status string and XMLHttpRequest object).
``Page.get_sitemap_urls()`` now accepts an optional ``request`` keyword argument
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The ``Page.get_sitemap_urls()`` method used by the ``wagtail.contrib.sitemaps`` module has been updated to receive an optional ``request`` keyword argument. If you have overridden this method in your page models, you will need to update the method signature to accept this argument (and pass it on when calling ``super``, if applicable).

View File

@ -1,13 +1,19 @@
import warnings
from django.contrib.sitemaps import Sitemap as DjangoSitemap
from wagtail.core.utils import accepts_kwarg
from wagtail.utils.deprecation import RemovedInWagtail24Warning
class Sitemap(DjangoSitemap):
def __init__(self, site=None):
def __init__(self, site=None, request=None):
self.site = site
self.request = request
def location(self, obj):
return obj.url
return obj.get_full_url(self.request)
def lastmod(self, obj):
# fall back on latest_revision_created_at if last_published_at is null
@ -29,7 +35,18 @@ class Sitemap(DjangoSitemap):
last_mods = set()
for item in self.paginator.page(page).object_list:
for url_info in item.get_sitemap_urls():
if not accepts_kwarg(item.get_sitemap_urls, 'request'):
warnings.warn(
"%s.get_sitemap_urls() must be updated to accept an optional "
"'request' keyword argument" % type(item).__name__,
category=RemovedInWagtail24Warning)
url_info_items = item.get_sitemap_urls()
else:
url_info_items = item.get_sitemap_urls(self.request)
for url_info in url_info_items:
urls.append(url_info)
last_mods.add(url_info.get('lastmod'))

View File

@ -1,6 +1,7 @@
import datetime
import pytz
from django.contrib.contenttypes.models import ContentType
from django.contrib.sites.shortcuts import get_current_site
from django.test import RequestFactory, TestCase
@ -48,21 +49,40 @@ class TestSitemapGenerator(TestCase):
self.site = Site.objects.get(is_default_site=True)
# Clear the cache to that runs are deterministic regarding the sql count
ContentType.objects.clear_cache()
def test_items(self):
sitemap = Sitemap(self.site)
request = RequestFactory().get('/sitemap.xml')
sitemap = Sitemap(self.site, request)
pages = sitemap.items()
self.assertIn(self.child_page.page_ptr.specific, pages)
self.assertNotIn(self.unpublished_child_page.page_ptr.specific, pages)
self.assertNotIn(self.protected_child_page.page_ptr.specific, pages)
def test_get_urls(self):
def test_get_urls_without_request(self):
request = RequestFactory().get('/sitemap.xml')
req_protocol = request.scheme
req_site = get_current_site(request)
sitemap = Sitemap(self.site)
urls = [url['location'] for url in sitemap.get_urls(1, req_site, req_protocol)]
with self.assertNumQueries(18):
urls = [url['location'] for url in sitemap.get_urls(1, req_site, req_protocol)]
self.assertIn('http://localhost/', urls) # Homepage
self.assertIn('http://localhost/hello-world/', urls) # Child page
def test_get_urls_with_request_site_cache(self):
request = RequestFactory().get('/sitemap.xml')
req_protocol = request.scheme
req_site = get_current_site(request)
sitemap = Sitemap(self.site, request)
with self.assertNumQueries(16):
urls = [url['location'] for url in sitemap.get_urls(1, req_site, req_protocol)]
self.assertIn('http://localhost/', urls) # Homepage
self.assertIn('http://localhost/hello-world/', urls) # Child page
@ -79,7 +99,7 @@ class TestSitemapGenerator(TestCase):
live=True,
))
sitemap = Sitemap(self.site)
sitemap = Sitemap(self.site, request)
urls = [url['location'] for url in sitemap.get_urls(1, req_site, req_protocol)]
self.assertIn('http://localhost/events/', urls) # Main view
@ -90,7 +110,7 @@ class TestSitemapGenerator(TestCase):
req_protocol = request.scheme
req_site = get_current_site(request)
sitemap = Sitemap(self.site)
sitemap = Sitemap(self.site, request)
urls = sitemap.get_urls(1, req_site, req_protocol)
child_page_lastmod = [
@ -115,7 +135,7 @@ class TestSitemapGenerator(TestCase):
req_protocol = request.scheme
req_site = get_current_site(request)
sitemap = Sitemap(self.site)
sitemap = Sitemap(self.site, request)
sitemap.get_urls(1, req_site, req_protocol)
self.assertEqual(sitemap.latest_lastmod, datetime.datetime(2017, 3, 1, 12, 0, 0, tzinfo=pytz.utc))
@ -129,7 +149,7 @@ class TestSitemapGenerator(TestCase):
req_protocol = request.scheme
req_site = get_current_site(request)
sitemap = Sitemap(self.site)
sitemap = Sitemap(self.site, request)
sitemap.get_urls(1, req_site, req_protocol)
self.assertFalse(hasattr(sitemap, 'latest_lastmod'))

View File

@ -12,7 +12,7 @@ def sitemap(request, sitemaps=None, **kwargs):
if sitemaps:
sitemaps = prepare_sitemaps(request, sitemaps)
else:
sitemaps = {'wagtail': Sitemap(request.site)}
sitemaps = {'wagtail': Sitemap(request.site, request)}
return sitemap_views.sitemap(request, sitemaps, **kwargs)
@ -21,7 +21,7 @@ def prepare_sitemaps(request, sitemaps):
initialised_sitemaps = {}
for name, sitemap_cls in sitemaps.items():
if issubclass(sitemap_cls, Sitemap):
initialised_sitemaps[name] = sitemap_cls(request.site)
initialised_sitemaps[name] = sitemap_cls(request.site, request)
else:
initialised_sitemaps[name] = sitemap_cls
return initialised_sitemaps

View File

@ -1328,10 +1328,10 @@ class Page(AbstractPage, index.Indexed, ClusterableModel, metaclass=PageBase):
"""
return ['/']
def get_sitemap_urls(self):
def get_sitemap_urls(self, request=None):
return [
{
'location': self.full_url,
'location': self.get_full_url(request),
# fall back on latest_revision_created_at if last_published_at is null
# (for backwards compatibility from before last_published_at was added)
'lastmod': (self.last_published_at or self.latest_revision_created_at),