diff --git a/docs/reference/pages/queryset_reference.rst b/docs/reference/pages/queryset_reference.rst index 5ec52acd6a..4d9e202f0b 100644 --- a/docs/reference/pages/queryset_reference.rst +++ b/docs/reference/pages/queryset_reference.rst @@ -240,3 +240,5 @@ Reference homepage.get_children().specific() See also: :py:attr:`Page.specific ` + + .. automethod:: first_common_ancestor diff --git a/wagtail/wagtailcore/query.py b/wagtail/wagtailcore/query.py index e6386f43f0..c1cace59f8 100644 --- a/wagtail/wagtailcore/query.py +++ b/wagtail/wagtailcore/query.py @@ -1,11 +1,13 @@ from __future__ import absolute_import, unicode_literals +import posixpath from collections import defaultdict from django import VERSION as DJANGO_VERSION from django.apps import apps from django.contrib.contenttypes.models import ContentType -from django.db.models import Q +from django.db.models import CharField, Q +from django.db.models.functions import Length, Substr from treebeard.mp_tree import MP_NodeQuerySet from wagtail.wagtailsearch.queryset import SearchableQuerySetMixin @@ -230,6 +232,105 @@ class PageQuerySet(SearchableQuerySetMixin, TreeQuerySet): """ return self.exclude(self.public_q()) + def first_common_ancestor(self, include_self=False, strict=False): + """ + Find the first ancestor that all pages in this queryset have in common. + For example, consider a page heirarchy like:: + + - Home/ + - Foo Event Index/ + - Foo Event Page 1/ + - Foo Event Page 2/ + - Bar Event Index/ + - Bar Event Page 1/ + - Bar Event Page 2/ + + The common ancestors for some queries would be: + + .. code-block:: python + + >>> Page.objects\\ + ... .type(EventPage)\\ + ... .first_common_ancestor() + + >>> Page.objects\\ + ... .type(EventPage)\\ + ... .filter(title__contains='Foo')\\ + ... .first_common_ancestor() + + + This method tries to be efficient, but if you have millions of pages + scattered across your page tree, it will be slow. + + If `include_self` is True, the ancestor can be one of the pages in the + queryset: + + .. code-block:: python + + >>> Page.objects\\ + ... .filter(title__contains='Foo')\\ + ... .first_common_ancestor() + + >>> Page.objects\\ + ... .filter(title__exact='Bar Event Index')\\ + ... .first_common_ancestor() + + + A few invalid cases exist: when the queryset is empty, when the root + Page is in the queryset and ``include_self`` is False, and when there + are multiple page trees with no common root (a case Wagtail does not + support). If ``strict`` is False (the default), then the first root + node is returned in these cases. If ``strict`` is True, then a + ``ObjectDoesNotExist`` is raised. + """ + # An empty queryset has no ancestors. This is a problem + if not self.exists(): + if strict: + raise self.model.DoesNotExist('Can not find ancestor of empty queryset') + return self.model.get_first_root_node() + + if include_self: + # Get all the paths of the matched pages. + paths = self.order_by().values_list('path', flat=True) + else: + # Find all the distinct parent paths of all matched pages. + # The empty `.order_by()` ensures that `Page.path` is not also + # selected to order the results, which makes `.distinct()` works. + paths = self.order_by()\ + .annotate(parent_path=Substr( + 'path', 1, Length('path') - self.model.steplen, + output_field=CharField(max_length=255)))\ + .values_list('parent_path', flat=True)\ + .distinct() + + # This method works on anything, not just file system paths. + common_parent_path = posixpath.commonprefix(paths) + + # That may have returned a path like (0001, 0002, 000), which is + # missing some chars off the end. Fix this by trimming the path to a + # multiple of `Page.steplen` + extra_chars = len(common_parent_path) % self.model.steplen + if extra_chars != 0: + common_parent_path = common_parent_path[:-extra_chars] + + if common_parent_path is '': + # This should only happen when there are multiple trees, + # a situation that Wagtail does not support; + # or when the root node itself is part of the queryset. + if strict: + raise self.model.DoesNotExist('No common ancestor found!') + + # Assuming the situation is the latter, just return the root node. + # The root node is not its own ancestor, so this is technically + # incorrect. If you want very correct operation, use `strict=True` + # and receive an error. + return self.model.get_first_root_node() + + # Assuming the database is in a consistent state, this page should + # *always* exist. If your database is not in a consistent state, you've + # got bigger problems. + return self.model.objects.get(path=common_parent_path) + def unpublish(self): """ This unpublishes all live pages in the QuerySet. diff --git a/wagtail/wagtailcore/tests/test_page_queryset.py b/wagtail/wagtailcore/tests/test_page_queryset.py index 6d6bc89a88..b04659d715 100644 --- a/wagtail/wagtailcore/tests/test_page_queryset.py +++ b/wagtail/wagtailcore/tests/test_page_queryset.py @@ -568,3 +568,78 @@ class TestSpecificQuery(TestCase): self.assertIn(Page.objects.get(url_path='/home/events/christmas/').specific, pages) self.assertIn(Page.objects.get(url_path='/home/events/').specific, pages) self.assertIn(Page.objects.get(url_path='/home/about-us/').specific, pages) + + +class TestFirstCommonAncestor(TestCase): + """ + Uses the same fixture as TestSpecificQuery. See that class for the layout + of pages. + """ + fixtures = ['test_specific.json'] + + def setUp(self): + self.all_events = Page.objects.type(EventPage) + self.regular_events = Page.objects.type(EventPage)\ + .exclude(url_path__contains='/other/') + + def test_bookkeeping(self): + self.assertEqual(self.all_events.count(), 4) + self.assertEqual(self.regular_events.count(), 3) + + def test_event_pages(self): + """Common ancestor for EventPages""" + # As there are event pages in multiple trees under /home/, the home + # page is the common ancestor + self.assertEqual( + Page.objects.get(slug='home'), + self.all_events.first_common_ancestor()) + + def test_normal_event_pages(self): + """Common ancestor for EventPages, excluding /other/ events""" + self.assertEqual( + Page.objects.get(slug='events'), + self.regular_events.first_common_ancestor()) + + def test_normal_event_pages_include_self(self): + """ + Common ancestor for EventPages, excluding /other/ events, with + include_self=True + """ + self.assertEqual( + Page.objects.get(slug='events'), + self.regular_events.first_common_ancestor(include_self=True)) + + def test_single_page_no_include_self(self): + """Test getting a single page, with include_self=False.""" + self.assertEqual( + Page.objects.get(slug='events'), + Page.objects.filter(title='Christmas').first_common_ancestor()) + + def test_single_page_include_self(self): + """Test getting a single page, with include_self=True.""" + self.assertEqual( + Page.objects.get(title='Christmas'), + Page.objects.filter(title='Christmas').first_common_ancestor(include_self=True)) + + def test_all_pages(self): + self.assertEqual( + Page.get_first_root_node(), + Page.objects.first_common_ancestor()) + + def test_all_pages_strict(self): + with self.assertRaises(Page.DoesNotExist): + Page.objects.first_common_ancestor(strict=True) + + def test_all_pages_include_self_strict(self): + self.assertEqual( + Page.get_first_root_node(), + Page.objects.first_common_ancestor(include_self=True, strict=True)) + + def test_empty_queryset(self): + self.assertEqual( + Page.get_first_root_node(), + Page.objects.none().first_common_ancestor()) + + def test_empty_queryset_strict(self): + with self.assertRaises(Page.DoesNotExist): + Page.objects.none().first_common_ancestor(strict=True)