From bad95cf37c51898a67d26d4ed92ddf54bca48e21 Mon Sep 17 00:00:00 2001 From: Michael van Tellingen Date: Fri, 30 Mar 2018 20:46:34 +0200 Subject: [PATCH] Optimize the querycount for the sitemap.xml page By optionally passing the request object to Page.get_sitemap_urls() it will now use the cached site root on the request object instead of retrieving it for each call. This cuts the number of queries required for a sitemap roughly in half. --- CHANGELOG.txt | 1 + docs/reference/contrib/sitemaps.rst | 9 ++--- docs/releases/2.2.rst | 7 ++++ wagtail/contrib/sitemaps/sitemap_generator.py | 23 +++++++++++-- wagtail/contrib/sitemaps/tests.py | 34 +++++++++++++++---- wagtail/contrib/sitemaps/views.py | 4 +-- wagtail/core/models.py | 4 +-- 7 files changed, 64 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index fa7cbc327e..e510b3e011 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -20,6 +20,7 @@ Changelog * Added faceted search using the `.facet()` method (Karl Hobley) * Admin modal views no longer rely on Javascript `eval()`, for better CSP compliance (Matt Westcott) * Update editor guide for embeds and documents in rich text (Kevin Howbrook) + * Improved performance of sitemap generation (Michael van Tellingen) * Fix: Handle all exceptions from `Image.get_file_size` (Andrew Plummer) * Fix: Fix display of breadcrumbs in ModelAdmin (LB (Ben Johnston)) * Fix: Remove duplicate border radius of avatars (Benjamin Thurm) diff --git a/docs/reference/contrib/sitemaps.rst b/docs/reference/contrib/sitemaps.rst index 15336bec0c..c3b4a85922 100644 --- a/docs/reference/contrib/sitemaps.rst +++ b/docs/reference/contrib/sitemaps.rst @@ -97,10 +97,11 @@ Customising URLs ---- -The ``Page`` class defines a ``get_sitemap_urls`` method which you can override -to customise sitemaps per ``Page`` instance. This method must return a list of -dictionaries, one dictionary per URL entry in the sitemap. You can exclude -pages from the sitemap by returning an empty list. +The ``Page`` class defines a ``get_sitemap_urls`` method which you can +override to customise sitemaps per ``Page`` instance. This method must accept +a request object and return a list of dictionaries, one dictionary per URL +entry in the sitemap. You can exclude pages from the sitemap by returning an +empty list. Each dictionary can contain the following: diff --git a/docs/releases/2.2.rst b/docs/releases/2.2.rst index 3c0905f0f5..75ec770620 100644 --- a/docs/releases/2.2.rst +++ b/docs/releases/2.2.rst @@ -29,6 +29,7 @@ Other features * Added faceted search using the ``.facet()`` method (Karl Hobley) * Admin modal views no longer rely on Javascript ``eval()``, for better CSP compliance (Matt Westcott) * Update editor guide for embeds and documents in rich text (Kevin Howbrook) + * Improved performance of sitemap generation (Michael van Tellingen) Bug fixes ~~~~~~~~~ @@ -55,3 +56,9 @@ The ``wagtail.admin.modal_workflow`` module (used internally by Wagtail to handl * At the point where you call the ``ModalWorkflow`` constructor, add an ``onload`` option - a dictionary of functions to be called on loading each step of the workflow. Move the code from the .js template into this dictionary. Then, on the call to ``render_modal_workflow``, rather than passing the .js template name (which should now be replaced by ``None``), pass a ``step`` item in the ``json_data`` dictionary to indicate the ``onload`` function to be called. Additionally, if your code calls ``loadResponseText`` as part of a jQuery AJAX callback, this should now be passed all three arguments from the callback (the response data, status string and XMLHttpRequest object). + + +``Page.get_sitemap_urls()`` now accepts an optional ``request`` keyword argument +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``Page.get_sitemap_urls()`` method used by the ``wagtail.contrib.sitemaps`` module has been updated to receive an optional ``request`` keyword argument. If you have overridden this method in your page models, you will need to update the method signature to accept this argument (and pass it on when calling ``super``, if applicable). diff --git a/wagtail/contrib/sitemaps/sitemap_generator.py b/wagtail/contrib/sitemaps/sitemap_generator.py index 49a59c3b68..73b94773b1 100644 --- a/wagtail/contrib/sitemaps/sitemap_generator.py +++ b/wagtail/contrib/sitemaps/sitemap_generator.py @@ -1,13 +1,19 @@ +import warnings + from django.contrib.sitemaps import Sitemap as DjangoSitemap +from wagtail.core.utils import accepts_kwarg +from wagtail.utils.deprecation import RemovedInWagtail24Warning + class Sitemap(DjangoSitemap): - def __init__(self, site=None): + def __init__(self, site=None, request=None): self.site = site + self.request = request def location(self, obj): - return obj.url + return obj.get_full_url(self.request) def lastmod(self, obj): # fall back on latest_revision_created_at if last_published_at is null @@ -29,7 +35,18 @@ class Sitemap(DjangoSitemap): last_mods = set() for item in self.paginator.page(page).object_list: - for url_info in item.get_sitemap_urls(): + + if not accepts_kwarg(item.get_sitemap_urls, 'request'): + warnings.warn( + "%s.get_sitemap_urls() must be updated to accept an optional " + "'request' keyword argument" % type(item).__name__, + category=RemovedInWagtail24Warning) + + url_info_items = item.get_sitemap_urls() + else: + url_info_items = item.get_sitemap_urls(self.request) + + for url_info in url_info_items: urls.append(url_info) last_mods.add(url_info.get('lastmod')) diff --git a/wagtail/contrib/sitemaps/tests.py b/wagtail/contrib/sitemaps/tests.py index 7bfe8bd157..e88a96f8d5 100644 --- a/wagtail/contrib/sitemaps/tests.py +++ b/wagtail/contrib/sitemaps/tests.py @@ -1,6 +1,7 @@ import datetime import pytz +from django.contrib.contenttypes.models import ContentType from django.contrib.sites.shortcuts import get_current_site from django.test import RequestFactory, TestCase @@ -48,21 +49,40 @@ class TestSitemapGenerator(TestCase): self.site = Site.objects.get(is_default_site=True) + # Clear the cache to that runs are deterministic regarding the sql count + ContentType.objects.clear_cache() + def test_items(self): - sitemap = Sitemap(self.site) + request = RequestFactory().get('/sitemap.xml') + + sitemap = Sitemap(self.site, request) pages = sitemap.items() self.assertIn(self.child_page.page_ptr.specific, pages) self.assertNotIn(self.unpublished_child_page.page_ptr.specific, pages) self.assertNotIn(self.protected_child_page.page_ptr.specific, pages) - def test_get_urls(self): + def test_get_urls_without_request(self): request = RequestFactory().get('/sitemap.xml') req_protocol = request.scheme req_site = get_current_site(request) sitemap = Sitemap(self.site) - urls = [url['location'] for url in sitemap.get_urls(1, req_site, req_protocol)] + with self.assertNumQueries(18): + urls = [url['location'] for url in sitemap.get_urls(1, req_site, req_protocol)] + + self.assertIn('http://localhost/', urls) # Homepage + self.assertIn('http://localhost/hello-world/', urls) # Child page + + def test_get_urls_with_request_site_cache(self): + request = RequestFactory().get('/sitemap.xml') + req_protocol = request.scheme + req_site = get_current_site(request) + + sitemap = Sitemap(self.site, request) + + with self.assertNumQueries(16): + urls = [url['location'] for url in sitemap.get_urls(1, req_site, req_protocol)] self.assertIn('http://localhost/', urls) # Homepage self.assertIn('http://localhost/hello-world/', urls) # Child page @@ -79,7 +99,7 @@ class TestSitemapGenerator(TestCase): live=True, )) - sitemap = Sitemap(self.site) + sitemap = Sitemap(self.site, request) urls = [url['location'] for url in sitemap.get_urls(1, req_site, req_protocol)] self.assertIn('http://localhost/events/', urls) # Main view @@ -90,7 +110,7 @@ class TestSitemapGenerator(TestCase): req_protocol = request.scheme req_site = get_current_site(request) - sitemap = Sitemap(self.site) + sitemap = Sitemap(self.site, request) urls = sitemap.get_urls(1, req_site, req_protocol) child_page_lastmod = [ @@ -115,7 +135,7 @@ class TestSitemapGenerator(TestCase): req_protocol = request.scheme req_site = get_current_site(request) - sitemap = Sitemap(self.site) + sitemap = Sitemap(self.site, request) sitemap.get_urls(1, req_site, req_protocol) self.assertEqual(sitemap.latest_lastmod, datetime.datetime(2017, 3, 1, 12, 0, 0, tzinfo=pytz.utc)) @@ -129,7 +149,7 @@ class TestSitemapGenerator(TestCase): req_protocol = request.scheme req_site = get_current_site(request) - sitemap = Sitemap(self.site) + sitemap = Sitemap(self.site, request) sitemap.get_urls(1, req_site, req_protocol) self.assertFalse(hasattr(sitemap, 'latest_lastmod')) diff --git a/wagtail/contrib/sitemaps/views.py b/wagtail/contrib/sitemaps/views.py index ecf881123f..3d7aa2dd33 100644 --- a/wagtail/contrib/sitemaps/views.py +++ b/wagtail/contrib/sitemaps/views.py @@ -12,7 +12,7 @@ def sitemap(request, sitemaps=None, **kwargs): if sitemaps: sitemaps = prepare_sitemaps(request, sitemaps) else: - sitemaps = {'wagtail': Sitemap(request.site)} + sitemaps = {'wagtail': Sitemap(request.site, request)} return sitemap_views.sitemap(request, sitemaps, **kwargs) @@ -21,7 +21,7 @@ def prepare_sitemaps(request, sitemaps): initialised_sitemaps = {} for name, sitemap_cls in sitemaps.items(): if issubclass(sitemap_cls, Sitemap): - initialised_sitemaps[name] = sitemap_cls(request.site) + initialised_sitemaps[name] = sitemap_cls(request.site, request) else: initialised_sitemaps[name] = sitemap_cls return initialised_sitemaps diff --git a/wagtail/core/models.py b/wagtail/core/models.py index 52bbeffc9e..ae1567d343 100644 --- a/wagtail/core/models.py +++ b/wagtail/core/models.py @@ -1328,10 +1328,10 @@ class Page(AbstractPage, index.Indexed, ClusterableModel, metaclass=PageBase): """ return ['/'] - def get_sitemap_urls(self): + def get_sitemap_urls(self, request=None): return [ { - 'location': self.full_url, + 'location': self.get_full_url(request), # fall back on latest_revision_created_at if last_published_at is null # (for backwards compatibility from before last_published_at was added) 'lastmod': (self.last_published_at or self.latest_revision_created_at),