0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 09:16:49 +01:00

properties_to_Q: don't support event model anymore (#8667)

* Remove dead querying methods

* Remove a dead filtering method

* Remove capability to filter events from property_to_Q

Note I've also duplicated test_filter tests to ensure we still have
sufficient coverage (via persons now) on property_to_Q but also have the
existing tests for EE

* Remove test-only SQL

* Remove events-related cruft
This commit is contained in:
Karl-Aksel Puulmann 2022-02-17 13:23:57 +02:00 committed by GitHub
parent 2e038d4ae7
commit f8daa4a05d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 538 additions and 535 deletions

View File

@ -5,7 +5,6 @@ from uuid import uuid4
from ee.clickhouse.client import sync_execute
from ee.clickhouse.models.action import filter_event, format_action_filter
from ee.clickhouse.models.event import create_event
from ee.clickhouse.sql.actions import ACTION_QUERY
from ee.clickhouse.util import ClickhouseTestMixin
from posthog.models.action import Action
from posthog.models.action_step import ActionStep
@ -29,8 +28,15 @@ class MockEvent:
def _get_events_for_action(action: Action) -> List[MockEvent]:
formatted_query, params = format_action_filter(team_id=action.team_id, action=action, prepend="")
query = ACTION_QUERY.format(action_filter=formatted_query)
query = f"""
SELECT
events.uuid,
events.distinct_id
FROM events
WHERE {formatted_query}
AND events.team_id = %(team_id)s
ORDER BY events.timestamp DESC
"""
events = sync_execute(query, {"team_id": action.team_id, **params})
return [MockEvent(str(uuid), distinct_id) for uuid, distinct_id in events]

View File

@ -1,36 +1,31 @@
import json
from typing import Optional
from uuid import uuid4
from django.utils import timezone
from ee.clickhouse.client import sync_execute
from ee.clickhouse.models.event import ClickhouseEventSerializer, create_event
from ee.clickhouse.models.property import parse_prop_grouped_clauses
from ee.clickhouse.models.util import PersonPropertiesMode
from ee.clickhouse.sql.events import GET_EVENTS_WITH_PROPERTIES
from ee.clickhouse.util import ClickhouseTestMixin
from posthog.constants import FILTER_TEST_ACCOUNTS
from posthog.models import Element, Organization, Person, Team
from posthog.models.cohort import Cohort
from posthog.models.filters import Filter
from posthog.models.filters.retention_filter import RetentionFilter
from posthog.models.filters.test.test_filter import TestFilter as PGTestFilters
from posthog.models.filters.test.test_filter import property_to_Q_test_factory
from posthog.models.person import Person
from posthog.models.team import Team
def _filter_events(
filter: Filter, team: Team, person_query: Optional[bool] = False, order_by: Optional[str] = None,
):
def _filter_events(filter: Filter, team: Team, order_by: Optional[str] = None):
prop_filters, prop_filter_params = parse_prop_grouped_clauses(
property_group=filter.property_groups, team_id=team.pk
)
params = {"team_id": team.pk, **prop_filter_params}
if order_by == "id":
order_by = "uuid"
events = sync_execute(
GET_EVENTS_WITH_PROPERTIES.format(
filters=prop_filters, order_by="ORDER BY {}".format(order_by) if order_by else "",
filters=prop_filters, order_by="ORDER BY {}".format(order_by) if order_by else ""
),
params,
)
@ -38,9 +33,23 @@ def _filter_events(
return parsed_events
def _filter_persons(filter: Filter, team: Team):
prop_filters, prop_filter_params = parse_prop_grouped_clauses(
property_group=filter.property_groups,
team_id=team.pk,
person_properties_mode=PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN,
)
# Note this query does not handle person rows changing over time
rows = sync_execute(
f"SELECT id, properties AS person_props FROM person WHERE team_id = %(team_id)s {prop_filters}",
{"team_id": team.pk, **prop_filter_params},
)
return [str(uuid) for uuid, _ in rows]
def _create_person(**kwargs):
person = Person.objects.create(**kwargs)
return Person(id=person.uuid)
return str(person.uuid)
def _create_event(**kwargs):
@ -193,8 +202,340 @@ class TestFilters(PGTestFilters):
class TestFiltering(
ClickhouseTestMixin, property_to_Q_test_factory(_filter_events, _create_event, _create_person), # type: ignore
ClickhouseTestMixin, property_to_Q_test_factory(_filter_persons, _create_person), # type: ignore
):
def test_simple(self):
_create_event(team=self.team, distinct_id="test", event="$pageview")
_create_event(
team=self.team, distinct_id="test", event="$pageview", properties={"$current_url": 1}
) # test for type incompatibility
_create_event(
team=self.team, distinct_id="test", event="$pageview", properties={"$current_url": {"bla": "bla"}}
) # test for type incompatibility
_create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://whatever.com"},
)
filter = Filter(data={"properties": {"$current_url": "https://whatever.com"}})
events = _filter_events(filter, self.team)
self.assertEqual(len(events), 1)
def test_multiple_equality(self):
_create_event(team=self.team, distinct_id="test", event="$pageview")
_create_event(
team=self.team, distinct_id="test", event="$pageview", properties={"$current_url": 1}
) # test for type incompatibility
_create_event(
team=self.team, distinct_id="test", event="$pageview", properties={"$current_url": {"bla": "bla"}}
) # test for type incompatibility
_create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://whatever.com"},
)
_create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://example.com"},
)
filter = Filter(data={"properties": {"$current_url": ["https://whatever.com", "https://example.com"]}})
events = _filter_events(filter, self.team)
self.assertEqual(len(events), 2)
def test_numerical(self):
event1_uuid = _create_event(team=self.team, distinct_id="test", event="$pageview", properties={"$a_number": 5})
event2_uuid = _create_event(team=self.team, event="$pageview", distinct_id="test", properties={"$a_number": 6},)
_create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$a_number": "rubbish"},
)
filter = Filter(data={"properties": {"$a_number__gt": 5}})
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
filter = Filter(data={"properties": {"$a_number": 5}})
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event1_uuid)
filter = Filter(data={"properties": {"$a_number__lt": 6}})
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event1_uuid)
def test_contains(self):
_create_event(team=self.team, distinct_id="test", event="$pageview")
event2_uuid = _create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://whatever.com"},
)
filter = Filter(data={"properties": {"$current_url__icontains": "whatever"}})
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
def test_regex(self):
event1_uuid = _create_event(team=self.team, distinct_id="test", event="$pageview")
event2_uuid = _create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://whatever.com"},
)
filter = Filter(data={"properties": {"$current_url__regex": r"\.com$"}})
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
filter = Filter(data={"properties": {"$current_url__not_regex": r"\.eee$"}})
events = _filter_events(filter, self.team, order_by="timestamp")
self.assertEqual(events[0]["id"], event1_uuid)
self.assertEqual(events[1]["id"], event2_uuid)
def test_invalid_regex(self):
_create_event(team=self.team, distinct_id="test", event="$pageview")
_create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://whatever.com"},
)
filter = Filter(data={"properties": {"$current_url__regex": "?*"}})
self.assertEqual(len(_filter_events(filter, self.team)), 0)
filter = Filter(data={"properties": {"$current_url__not_regex": "?*"}})
self.assertEqual(len(_filter_events(filter, self.team)), 0)
def test_is_not(self):
event1_uuid = _create_event(team=self.team, distinct_id="test", event="$pageview")
event2_uuid = _create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://something.com"},
)
_create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://whatever.com"},
)
filter = Filter(data={"properties": {"$current_url__is_not": "https://whatever.com"}})
events = _filter_events(filter, self.team)
self.assertEqual(sorted([events[0]["id"], events[1]["id"]]), sorted([event1_uuid, event2_uuid]))
self.assertEqual(len(events), 2)
def test_does_not_contain(self):
event1_uuid = _create_event(team=self.team, event="$pageview", distinct_id="test",)
event2_uuid = _create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://something.com"},
)
_create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://whatever.com"},
)
event3_uuid = _create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": None},
)
filter = Filter(data={"properties": {"$current_url__not_icontains": "whatever.com"}})
events = _filter_events(filter, self.team)
self.assertCountEqual([event["id"] for event in events], [event1_uuid, event2_uuid, event3_uuid])
self.assertEqual(len(events), 3)
def test_multiple(self):
event2_uuid = _create_event(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://something.com", "another_key": "value",},
)
_create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://something.com"},
)
filter = Filter(data={"properties": {"$current_url__icontains": "something.com", "another_key": "value",}})
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
self.assertEqual(len(events), 1)
def test_user_properties(self):
person1 = _create_person(team_id=self.team.pk, distinct_ids=["person1"], properties={"group": "some group"})
person2 = _create_person(team_id=self.team.pk, distinct_ids=["person2"], properties={"group": "another group"})
event2_uuid = _create_event(
team=self.team,
distinct_id="person1",
event="$pageview",
properties={"$current_url": "https://something.com", "another_key": "value",},
)
event_p2_uuid = _create_event(
team=self.team,
distinct_id="person2",
event="$pageview",
properties={"$current_url": "https://something.com"},
)
# test for leakage
_, _, team2 = Organization.objects.bootstrap(None)
person_team2 = _create_person(
team_id=team2.pk, distinct_ids=["person_team_2"], properties={"group": "another group"}
)
event_team2 = _create_event(
team=team2,
distinct_id="person_team_2",
event="$pageview",
properties={"$current_url": "https://something.com", "another_key": "value",},
)
filter = Filter(data={"properties": [{"key": "group", "value": "some group", "type": "person"}]})
events = _filter_events(filter=filter, team=self.team, order_by=None)
self.assertEqual(len(events), 1)
self.assertEqual(events[0]["id"], event2_uuid)
filter = Filter(
data={"properties": [{"key": "group", "operator": "is_not", "value": "some group", "type": "person"}]}
)
events = _filter_events(filter=filter, team=self.team, order_by=None)
self.assertEqual(events[0]["id"], event_p2_uuid)
self.assertEqual(len(events), 1)
def test_user_properties_numerical(self):
person1 = _create_person(team_id=self.team.pk, distinct_ids=["person1"], properties={"group": 1})
person2 = _create_person(team_id=self.team.pk, distinct_ids=["person2"], properties={"group": 2})
event2_uuid = _create_event(
team=self.team,
distinct_id="person1",
event="$pageview",
properties={"$current_url": "https://something.com", "another_key": "value",},
)
_create_event(
team=self.team,
distinct_id="person2",
event="$pageview",
properties={"$current_url": "https://something.com"},
)
filter = Filter(
data={
"properties": [
{"key": "group", "operator": "lt", "value": 2, "type": "person"},
{"key": "group", "operator": "gt", "value": 0, "type": "person"},
]
}
)
events = _filter_events(filter=filter, team=self.team, order_by=None)
self.assertEqual(events[0]["id"], event2_uuid)
self.assertEqual(len(events), 1)
def test_boolean_filters(self):
event1_uuid = _create_event(team=self.team, event="$pageview", distinct_id="test",)
event2_uuid = _create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"is_first_user": True}
)
filter = Filter(data={"properties": [{"key": "is_first_user", "value": "true"}]})
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
self.assertEqual(len(events), 1)
def test_is_not_set_and_is_set(self):
event1_uuid = _create_event(team=self.team, event="$pageview", distinct_id="test",)
event2_uuid = _create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"is_first_user": True}
)
filter = Filter(
data={"properties": [{"key": "is_first_user", "operator": "is_not_set", "value": "is_not_set",}]}
)
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event1_uuid)
self.assertEqual(len(events), 1)
filter = Filter(data={"properties": [{"key": "is_first_user", "operator": "is_set", "value": "is_set"}]})
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
self.assertEqual(len(events), 1)
def test_true_false(self):
_create_event(team=self.team, distinct_id="test", event="$pageview")
event2_uuid = _create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"is_first": True},
)
filter = Filter(data={"properties": {"is_first": "true"}})
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
filter = Filter(data={"properties": {"is_first": ["true"]}})
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
def test_is_not_true_false(self):
event_uuid = _create_event(team=self.team, distinct_id="test", event="$pageview")
event2_uuid = _create_event(
team=self.team, event="$pageview", distinct_id="test", properties={"is_first": True},
)
filter = Filter(data={"properties": [{"key": "is_first", "value": "true", "operator": "is_not"}]})
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event_uuid)
def test_json_object(self):
person1 = _create_person(
team_id=self.team.pk,
distinct_ids=["person1"],
properties={"name": {"first_name": "Mary", "last_name": "Smith"}},
)
event1_uuid = _create_event(
team=self.team,
distinct_id="person1",
event="$pageview",
properties={"$current_url": "https://something.com"},
)
filter = Filter(
data={
"properties": [
{
"key": "name",
"value": json.dumps({"first_name": "Mary", "last_name": "Smith"}),
"type": "person",
}
]
}
)
events = _filter_events(filter=filter, team=self.team, order_by=None)
self.assertEqual(events[0]["id"], event1_uuid)
self.assertEqual(len(events), 1)
def test_element_selectors(self):
_create_event(
team=self.team,
event="$autocapture",
distinct_id="distinct_id",
elements=[Element.objects.create(tag_name="a"), Element.objects.create(tag_name="div"),],
)
_create_event(team=self.team, event="$autocapture", distinct_id="distinct_id")
filter = Filter(data={"properties": [{"key": "selector", "value": "div > a", "type": "element"}]})
events = _filter_events(filter=filter, team=self.team)
self.assertEqual(len(events), 1)
def test_element_filter(self):
_create_event(
team=self.team,
event="$autocapture",
distinct_id="distinct_id",
elements=[Element.objects.create(tag_name="a", text="some text"), Element.objects.create(tag_name="div"),],
)
_create_event(
team=self.team,
event="$autocapture",
distinct_id="distinct_id",
elements=[
Element.objects.create(tag_name="a", text="some other text"),
Element.objects.create(tag_name="div"),
],
)
_create_event(team=self.team, event="$autocapture", distinct_id="distinct_id")
filter = Filter(
data={"properties": [{"key": "text", "value": ["some text", "some other text"], "type": "element"}]}
)
events = _filter_events(filter=filter, team=self.team)
self.assertEqual(len(events), 2)
filter2 = Filter(data={"properties": [{"key": "text", "value": "some text", "type": "element"}]})
events_response_2 = _filter_events(filter=filter2, team=self.team)
self.assertEqual(len(events_response_2), 1)
def test_filter_out_team_members(self):
person1 = _create_person(
team_id=self.team.pk, distinct_ids=["team_member"], properties={"email": "test@posthog.com"}
)
person1 = _create_person(
team_id=self.team.pk, distinct_ids=["random_user"], properties={"email": "test@gmail.com"}
)
self.team.test_account_filters = [
{"key": "email", "value": "@posthog.com", "operator": "not_icontains", "type": "person"}
]
self.team.save()
_create_event(team=self.team, distinct_id="team_member", event="$pageview")
_create_event(team=self.team, distinct_id="random_user", event="$pageview")
filter = Filter(data={FILTER_TEST_ACCOUNTS: True, "events": [{"id": "$pageview"}]}, team=self.team)
events = _filter_events(filter=filter, team=self.team)
self.assertEqual(len(events), 1)
def test_person_cohort_properties(self):
person1_distinct_id = "person1"
person1 = Person.objects.create(

View File

@ -1,9 +0,0 @@
ACTION_QUERY = """
SELECT
events.uuid,
events.distinct_id
FROM events
WHERE {action_filter}
AND events.team_id = %(team_id)s
ORDER BY events.timestamp DESC
"""

View File

@ -2,16 +2,12 @@ import copy
import datetime
import re
from collections import defaultdict
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Dict, List, Optional, Union
from dateutil.relativedelta import relativedelta
from django.db import models, transaction
from django.db.models import F, OuterRef, Q, Subquery
from django.db import models
from django.utils import timezone
from .element import Element
from .element_group import ElementGroup
from .person import PersonDistinctId
from .team import Team
SELECTOR_ATTRIBUTE_REGEX = r"([a-zA-Z]*)\[(.*)=[\'|\"](.*)[\'|\"]\]"
@ -118,88 +114,6 @@ class Selector:
yield "".join(part)
class EventManager(models.QuerySet):
def _element_subquery(self, selector: Selector) -> Tuple[Dict[str, Subquery], Dict[str, Union[F, bool]]]:
filter: Dict[str, Union[F, bool]] = {}
subqueries = {}
for index, tag in enumerate(selector.parts):
subqueries[f"match_{index}"] = Subquery(
Element.objects.filter(group_id=OuterRef("pk"))
.values("order")
.order_by("order")
.extra(**tag.extra_query) # type: ignore
# If there's two of the same element, for the second one we need to shift one
[tag.unique_order : tag.unique_order + 1]
)
filter[f"match_{index}__isnull"] = False
if index > 0:
# If direct descendant, the next element has to have order +1
if tag.direct_descendant:
filter[f"match_{index}"] = F(f"match_{index - 1}") + 1
else:
# If not, it can have any order as long as it's bigger than current element
filter[f"match_{index}__gt"] = F(f"match_{index - 1}")
return (subqueries, filter)
def filter_by_element(self, filters: Dict, team_id: int):
groups = ElementGroup.objects.filter(team_id=team_id)
filter = Q()
if filters.get("selector"):
selector = Selector(filters["selector"])
subqueries, subq_filter = self._element_subquery(selector)
filter = Q(**subq_filter)
groups = groups.annotate(**subqueries) # type: ignore
else:
filter = Q()
for key in ["tag_name", "text", "href"]:
values = filters.get(key, [])
if not values:
continue
values = values if isinstance(values, list) else [values]
if len(values) == 0:
continue
condition = Q()
for searched_value in values:
condition |= Q(**{f"element__{key}": searched_value})
filter &= condition
if not filter:
return {}
groups = groups.filter(filter)
return {"elements_hash__in": groups.values_list("hash", flat=True)}
def add_person_id(self, team_id: int):
return self.annotate(
person_id=Subquery(
PersonDistinctId.objects.filter(team_id=team_id, distinct_id=OuterRef("distinct_id"))
.order_by()
.values("person_id")[:1]
)
)
def create(self, *args: Any, **kwargs: Any):
site_url = kwargs.get("site_url")
with transaction.atomic():
if kwargs.get("elements"):
if kwargs.get("team"):
kwargs["elements_hash"] = ElementGroup.objects.create(
team=kwargs["team"], elements=kwargs.pop("elements")
).hash
else:
kwargs["elements_hash"] = ElementGroup.objects.create(
team_id=kwargs["team_id"], elements=kwargs.pop("elements")
).hash
return super().create(*args, **kwargs)
class Event(models.Model):
class Meta:
indexes = [
@ -224,5 +138,3 @@ class Event(models.Model):
# DEPRECATED: elements are stored against element groups now
elements: models.JSONField = models.JSONField(default=list, null=True, blank=True)
objects: EventManager = EventManager.as_manager() # type: ignore

View File

@ -1,5 +1,5 @@
import json
from typing import Callable, Optional
from typing import Callable
from dateutil.relativedelta import relativedelta
from django.db.models import Q
@ -7,7 +7,7 @@ from django.utils import timezone
from freezegun.api import freeze_time
from posthog.constants import FILTER_TEST_ACCOUNTS
from posthog.models import Cohort, Element, Event, Filter, Organization, Person, Team
from posthog.models import Cohort, Filter, Person, Team
from posthog.queries.base import properties_to_Q
from posthog.test.base import BaseTest
@ -77,49 +77,36 @@ class TestFilter(BaseTest):
)
def property_to_Q_test_factory(filter_events: Callable, event_factory, person_factory):
def property_to_Q_test_factory(filter_persons: Callable, person_factory):
class TestPropertiesToQ(BaseTest):
def test_simple(self):
event_factory(team=self.team, distinct_id="test", event="$pageview")
event_factory(
team=self.team, distinct_id="test", event="$pageview", properties={"$current_url": 1}
) # test for type incompatibility
event_factory(
team=self.team, distinct_id="test", event="$pageview", properties={"$current_url": {"bla": "bla"}}
) # test for type incompatibility
event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://whatever.com"},
)
filter = Filter(data={"properties": {"$current_url": "https://whatever.com"}})
events = filter_events(filter, self.team)
self.assertEqual(len(events), 1)
def test_simple_persons(self):
person_factory(team_id=self.team.pk, distinct_ids=["person1"], properties={"url": "https://whatever.com"})
person_factory(team_id=self.team.pk, distinct_ids=["person2"], properties={"url": 1})
person_factory(team_id=self.team.pk, distinct_ids=["person3"], properties={"url": {"bla": "bla"}})
person_factory(team_id=self.team.pk, distinct_ids=["person4"])
def test_multiple_equality(self):
event_factory(team=self.team, distinct_id="test", event="$pageview")
event_factory(
team=self.team, distinct_id="test", event="$pageview", properties={"$current_url": 1}
) # test for type incompatibility
event_factory(
team=self.team, distinct_id="test", event="$pageview", properties={"$current_url": {"bla": "bla"}}
) # test for type incompatibility
event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://whatever.com"},
filter = Filter(data={"properties": [{"type": "person", "key": "url", "value": "https://whatever.com"}]})
results = filter_persons(filter, self.team)
self.assertEqual(len(results), 1)
def test_multiple_equality_persons(self):
person_factory(team_id=self.team.pk, distinct_ids=["person1"], properties={"url": "https://whatever.com"})
person_factory(team_id=self.team.pk, distinct_ids=["person2"], properties={"url": 1})
person_factory(team_id=self.team.pk, distinct_ids=["person3"], properties={"url": {"bla": "bla"}})
person_factory(team_id=self.team.pk, distinct_ids=["person4"])
person_factory(team_id=self.team.pk, distinct_ids=["person5"], properties={"url": "https://example.com"})
filter = Filter(
data={
"properties": [
{"type": "person", "key": "url", "value": ["https://whatever.com", "https://example.com"]}
]
}
)
event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://example.com"},
)
filter = Filter(data={"properties": {"$current_url": ["https://whatever.com", "https://example.com"]}})
events = filter_events(filter, self.team)
self.assertEqual(len(events), 2)
results = filter_persons(filter, self.team)
self.assertEqual(len(results), 2)
def test_incomplete_data(self):
filter = Filter(
@ -127,262 +114,158 @@ def property_to_Q_test_factory(filter_events: Callable, event_factory, person_fa
)
self.assertListEqual(filter.properties, [])
def test_numerical(self):
event1_uuid = event_factory(
team=self.team, distinct_id="test", event="$pageview", properties={"$a_number": 5}
)
event2_uuid = event_factory(
team=self.team, event="$pageview", distinct_id="test", properties={"$a_number": 6},
)
event_factory(
team=self.team, event="$pageview", distinct_id="test", properties={"$a_number": "rubbish"},
)
filter = Filter(data={"properties": {"$a_number__gt": 5}})
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
def test_numerical_person_properties(self):
person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"$a_number": 4})
person_factory(team_id=self.team.pk, distinct_ids=["p2"], properties={"$a_number": 5})
person_factory(team_id=self.team.pk, distinct_ids=["p3"], properties={"$a_number": 6})
filter = Filter(data={"properties": {"$a_number": 5}})
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event1_uuid)
filter = Filter(data={"properties": [{"type": "person", "key": "$a_number", "value": 4, "operator": "gt"}]})
self.assertEqual(len(filter_persons(filter, self.team)), 2)
filter = Filter(data={"properties": {"$a_number__lt": 6}})
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event1_uuid)
filter = Filter(data={"properties": [{"type": "person", "key": "$a_number", "value": 5}]})
self.assertEqual(len(filter_persons(filter, self.team)), 1)
def test_contains(self):
event_factory(team=self.team, distinct_id="test", event="$pageview")
event2_uuid = event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://whatever.com"},
)
filter = Filter(data={"properties": {"$current_url__icontains": "whatever"}})
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
filter = Filter(data={"properties": [{"type": "person", "key": "$a_number", "value": 6, "operator": "lt"}]})
self.assertEqual(len(filter_persons(filter, self.team)), 2)
def test_regex(self):
event1_uuid = event_factory(team=self.team, distinct_id="test", event="$pageview")
event2_uuid = event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://whatever.com"},
)
filter = Filter(data={"properties": {"$current_url__regex": r"\.com$"}})
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
filter = Filter(data={"properties": {"$current_url__not_regex": r"\.eee$"}})
events = filter_events(filter, self.team, order_by="timestamp")
self.assertEqual(events[0]["id"], event1_uuid)
self.assertEqual(events[1]["id"], event2_uuid)
def test_invalid_regex(self):
event_factory(team=self.team, distinct_id="test", event="$pageview")
event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://whatever.com"},
)
filter = Filter(data={"properties": {"$current_url__regex": "?*"}})
self.assertEqual(len(filter_events(filter, self.team)), 0)
filter = Filter(data={"properties": {"$current_url__not_regex": "?*"}})
self.assertEqual(len(filter_events(filter, self.team)), 0)
def test_is_not(self):
event1_uuid = event_factory(team=self.team, distinct_id="test", event="$pageview")
event2_uuid = event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://something.com"},
)
event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://whatever.com"},
)
filter = Filter(data={"properties": {"$current_url__is_not": "https://whatever.com"}})
events = filter_events(filter, self.team)
self.assertEqual(sorted([events[0]["id"], events[1]["id"]]), sorted([event1_uuid, event2_uuid]))
self.assertEqual(len(events), 2)
def test_does_not_contain(self):
event1_uuid = event_factory(team=self.team, event="$pageview", distinct_id="test",)
event2_uuid = event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://something.com"},
)
event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://whatever.com"},
)
event3_uuid = event_factory(
team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": None},
)
filter = Filter(data={"properties": {"$current_url__not_icontains": "whatever.com"}})
events = filter_events(filter, self.team, order_by="id")
self.assertEqual(sorted(event["id"] for event in events), sorted([event1_uuid, event2_uuid, event3_uuid]))
self.assertEqual(len(events), 3)
def test_multiple(self):
event2_uuid = event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://something.com", "another_key": "value",},
)
event_factory(
team=self.team,
event="$pageview",
distinct_id="test",
properties={"$current_url": "https://something.com"},
)
filter = Filter(data={"properties": {"$current_url__icontains": "something.com", "another_key": "value",}})
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
self.assertEqual(len(events), 1)
def test_user_properties(self):
person1 = person_factory(team_id=self.team.pk, distinct_ids=["person1"], properties={"group": "some group"})
person2 = person_factory(
team_id=self.team.pk, distinct_ids=["person2"], properties={"group": "another group"}
)
event2_uuid = event_factory(
team=self.team,
distinct_id="person1",
event="$pageview",
properties={"$current_url": "https://something.com", "another_key": "value",},
)
event_p2_uuid = event_factory(
team=self.team,
distinct_id="person2",
event="$pageview",
properties={"$current_url": "https://something.com"},
)
# test for leakage
_, _, team2 = Organization.objects.bootstrap(None)
person_team2 = person_factory(
team_id=team2.pk, distinct_ids=["person_team_2"], properties={"group": "another group"}
)
event_team2 = event_factory(
team=team2,
distinct_id="person_team_2",
event="$pageview",
properties={"$current_url": "https://something.com", "another_key": "value",},
)
filter = Filter(data={"properties": [{"key": "group", "value": "some group", "type": "person"}]})
events = filter_events(filter=filter, team=self.team, person_query=True, order_by=None)
self.assertEqual(len(events), 1)
self.assertEqual(events[0]["id"], event2_uuid)
def test_contains_persons(self):
person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"url": "https://whatever.com"})
person_factory(team_id=self.team.pk, distinct_ids=["p2"], properties={"url": "https://example.com"})
filter = Filter(
data={"properties": [{"key": "group", "operator": "is_not", "value": "some group", "type": "person"}]}
data={"properties": [{"type": "person", "key": "url", "value": "whatever", "operator": "icontains"}]}
)
events = filter_events(filter=filter, team=self.team, person_query=True, order_by=None)
self.assertEqual(events[0]["id"], event_p2_uuid)
self.assertEqual(len(events), 1)
def test_user_properties_numerical(self):
person1 = person_factory(team_id=self.team.pk, distinct_ids=["person1"], properties={"group": 1})
person2 = person_factory(team_id=self.team.pk, distinct_ids=["person2"], properties={"group": 2})
event2_uuid = event_factory(
team=self.team,
distinct_id="person1",
event="$pageview",
properties={"$current_url": "https://something.com", "another_key": "value",},
results = filter_persons(filter, self.team)
self.assertEqual(len(results), 1)
def test_regex_persons(self):
p1_uuid = person_factory(
team_id=self.team.pk, distinct_ids=["p1"], properties={"url": "https://whatever.com"}
)
event_factory(
team=self.team,
distinct_id="person2",
event="$pageview",
properties={"$current_url": "https://something.com"},
p2_uuid = person_factory(team_id=self.team.pk, distinct_ids=["p2"])
filter = Filter(
data={"properties": [{"type": "person", "key": "url", "value": r"\.com$", "operator": "regex"}]}
)
results = filter_persons(filter, self.team)
self.assertCountEqual(results, [p1_uuid])
filter = Filter(
data={"properties": [{"type": "person", "key": "url", "value": r"\.eee$", "operator": "not_regex"}]}
)
results = filter_persons(filter, self.team)
self.assertCountEqual(results, [p1_uuid, p2_uuid])
def test_invalid_regex_persons(self):
person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"url": "https://whatever.com"})
person_factory(team_id=self.team.pk, distinct_ids=["p2"], properties={"url": "https://example.com"})
filter = Filter(
data={"properties": [{"type": "person", "key": "url", "value": r"?*", "operator": "regex"}]}
)
self.assertEqual(len(filter_persons(filter, self.team)), 0)
filter = Filter(
data={"properties": [{"type": "person", "key": "url", "value": r"?*", "operator": "not_regex"}]}
)
self.assertEqual(len(filter_persons(filter, self.team)), 0)
def test_is_not_persons(self):
person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"url": "https://whatever.com"})
p2_uuid = person_factory(
team_id=self.team.pk, distinct_ids=["p2"], properties={"url": "https://example.com"}
)
filter = Filter(
data={
"properties": [
{"key": "group", "operator": "lt", "value": 2, "type": "person"},
{"key": "group", "operator": "gt", "value": 0, "type": "person"},
{"type": "person", "key": "url", "value": "https://whatever.com", "operator": "is_not"}
]
}
)
events = filter_events(filter=filter, team=self.team, person_query=True, order_by=None)
self.assertEqual(events[0]["id"], event2_uuid)
self.assertEqual(len(events), 1)
results = filter_persons(filter, self.team)
self.assertCountEqual(results, [p2_uuid])
def test_boolean_filters(self):
event1_uuid = event_factory(team=self.team, event="$pageview", distinct_id="test",)
event2_uuid = event_factory(
team=self.team, event="$pageview", distinct_id="test", properties={"is_first_user": True}
def test_does_not_contain_persons(self):
person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"url": "https://whatever.com"})
p2_uuid = person_factory(
team_id=self.team.pk, distinct_ids=["p2"], properties={"url": "https://example.com"}
)
filter = Filter(data={"properties": [{"key": "is_first_user", "value": "true"}]})
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
self.assertEqual(len(events), 1)
p3_uuid = person_factory(team_id=self.team.pk, distinct_ids=["p3"])
p4_uuid = person_factory(team_id=self.team.pk, distinct_ids=["p4"], properties={"url": None})
def test_is_not_set_and_is_set(self):
event1_uuid = event_factory(team=self.team, event="$pageview", distinct_id="test",)
event2_uuid = event_factory(
team=self.team, event="$pageview", distinct_id="test", properties={"is_first_user": True}
)
filter = Filter(
data={"properties": [{"key": "is_first_user", "operator": "is_not_set", "value": "is_not_set",}]}
data={
"properties": [
{"type": "person", "key": "url", "value": "whatever.com", "operator": "not_icontains"}
]
}
)
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event1_uuid)
self.assertEqual(len(events), 1)
results = filter_persons(filter, self.team)
self.assertCountEqual(results, [p2_uuid, p3_uuid, p4_uuid])
filter = Filter(data={"properties": [{"key": "is_first_user", "operator": "is_set", "value": "is_set"}]})
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
self.assertEqual(len(events), 1)
def test_true_false(self):
event_factory(team=self.team, distinct_id="test", event="$pageview")
event2_uuid = event_factory(
team=self.team, event="$pageview", distinct_id="test", properties={"is_first": True},
def test_multiple_persons(self):
p1_uuid = person_factory(
team_id=self.team.pk,
distinct_ids=["p1"],
properties={"url": "https://whatever.com", "another_key": "value"},
)
filter = Filter(data={"properties": {"is_first": "true"}})
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
person_factory(team_id=self.team.pk, distinct_ids=["p2"], properties={"url": "https://whatever.com"})
filter = Filter(data={"properties": {"is_first": ["true"]}})
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event2_uuid)
def test_is_not_true_false(self):
event_uuid = event_factory(team=self.team, distinct_id="test", event="$pageview")
event2_uuid = event_factory(
team=self.team, event="$pageview", distinct_id="test", properties={"is_first": True},
filter = Filter(
data={
"properties": [
{"type": "person", "key": "url", "value": "whatever.com", "operator": "icontains"},
{"type": "person", "key": "another_key", "value": "value"},
]
}
)
filter = Filter(data={"properties": [{"key": "is_first", "value": "true", "operator": "is_not"}]})
events = filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event_uuid)
results = filter_persons(filter, self.team)
self.assertCountEqual(results, [p1_uuid])
def test_boolean_filters_persons(self):
p1_uuid = person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"is_first_user": True})
person_factory(team_id=self.team.pk, distinct_ids=["p2"])
filter = Filter(data={"properties": [{"type": "person", "key": "is_first_user", "value": ["true"]}]})
results = filter_persons(filter, self.team)
self.assertEqual(results, [p1_uuid])
def test_is_not_set_and_is_set_persons(self):
p1_uuid = person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"is_first_user": True})
p2_uuid = person_factory(team_id=self.team.pk, distinct_ids=["p2"])
filter = Filter(
data={"properties": [{"type": "person", "key": "is_first_user", "value": "", "operator": "is_set"}]}
)
results = filter_persons(filter, self.team)
self.assertEqual(results, [p1_uuid])
filter = Filter(
data={"properties": [{"type": "person", "key": "is_first_user", "value": "", "operator": "is_not_set"}]}
)
results = filter_persons(filter, self.team)
self.assertEqual(results, [p2_uuid])
def test_is_not_true_false_persons(self):
person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"is_first_user": True})
p2_uuid = person_factory(team_id=self.team.pk, distinct_ids=["p2"])
filter = Filter(
data={
"properties": [{"type": "person", "key": "is_first_user", "value": ["true"], "operator": "is_not"}]
}
)
results = filter_persons(filter, self.team)
self.assertEqual(results, [p2_uuid])
def test_json_object(self):
person1 = person_factory(
p1_uuid = person_factory(
team_id=self.team.pk,
distinct_ids=["person1"],
properties={"name": {"first_name": "Mary", "last_name": "Smith"}},
)
event1_uuid = event_factory(
team=self.team,
distinct_id="person1",
event="$pageview",
properties={"$current_url": "https://something.com"},
)
filter = Filter(
data={
"properties": [
@ -394,93 +277,38 @@ def property_to_Q_test_factory(filter_events: Callable, event_factory, person_fa
]
}
)
events = filter_events(filter=filter, team=self.team, person_query=True, order_by=None)
self.assertEqual(events[0]["id"], event1_uuid)
self.assertEqual(len(events), 1)
results = filter_persons(filter, self.team)
self.assertEqual(results, [p1_uuid])
def test_element_selectors(self):
event_factory(
team=self.team,
event="$autocapture",
distinct_id="distinct_id",
elements=[Element.objects.create(tag_name="a"), Element.objects.create(tag_name="div"),],
)
event_factory(team=self.team, event="$autocapture", distinct_id="distinct_id")
filter = Filter(data={"properties": [{"key": "selector", "value": "div > a", "type": "element"}]})
events = filter_events(filter=filter, team=self.team)
self.assertEqual(len(events), 1)
def test_element_filter(self):
event_factory(
team=self.team,
event="$autocapture",
distinct_id="distinct_id",
elements=[
Element.objects.create(tag_name="a", text="some text"),
Element.objects.create(tag_name="div"),
],
)
event_factory(
team=self.team,
event="$autocapture",
distinct_id="distinct_id",
elements=[
Element.objects.create(tag_name="a", text="some other text"),
Element.objects.create(tag_name="div"),
],
)
event_factory(team=self.team, event="$autocapture", distinct_id="distinct_id")
filter = Filter(
data={"properties": [{"key": "text", "value": ["some text", "some other text"], "type": "element"}]}
)
events = filter_events(filter=filter, team=self.team)
self.assertEqual(len(events), 2)
filter2 = Filter(data={"properties": [{"key": "text", "value": "some text", "type": "element"}]})
events_response_2 = filter_events(filter=filter2, team=self.team)
self.assertEqual(len(events_response_2), 1)
def test_filter_out_team_members(self):
person1 = person_factory(
team_id=self.team.pk, distinct_ids=["team_member"], properties={"email": "test@posthog.com"}
)
person1 = person_factory(
def test_filter_out_team_members_persons(self):
person_factory(team_id=self.team.pk, distinct_ids=["team_member"], properties={"email": "test@posthog.com"})
p2_uuid = person_factory(
team_id=self.team.pk, distinct_ids=["random_user"], properties={"email": "test@gmail.com"}
)
self.team.test_account_filters = [
{"key": "email", "value": "@posthog.com", "operator": "not_icontains", "type": "person"}
]
self.team.save()
event_factory(team=self.team, distinct_id="team_member", event="$pageview")
event_factory(team=self.team, distinct_id="random_user", event="$pageview")
filter = Filter(data={FILTER_TEST_ACCOUNTS: True, "events": [{"id": "$pageview"}]}, team=self.team)
events = filter_events(filter=filter, team=self.team, person_query=True)
self.assertEqual(len(events), 1)
filter = Filter(data={FILTER_TEST_ACCOUNTS: True}, team=self.team)
results = filter_persons(filter, self.team)
self.assertEqual(results, [p2_uuid])
return TestPropertiesToQ
def _filter_events(filter: Filter, team: Team, person_query: Optional[bool] = False, order_by: Optional[str] = None):
events = Event.objects
if person_query:
events = events.add_person_id(team.pk)
events = events.filter(properties_to_Q(filter.properties, team_id=team.pk))
events = events.filter(team_id=team.pk)
if order_by:
events = events.order_by(order_by)
return events.values()
def _filter_persons(filter: Filter, team: Team):
persons = Person.objects.filter(properties_to_Q(filter.properties, team_id=team.pk, is_direct_query=True))
persons = persons.filter(team_id=team.pk)
return [str(uuid) for uuid in persons.values_list("uuid", flat=True)]
def _create_event(**kwargs):
event = Event.objects.create(**kwargs)
return event.pk
def _create_person(**kwargs):
person = Person.objects.create(**kwargs)
return str(person.uuid)
class TestDjangoPropertiesToQ(property_to_Q_test_factory(_filter_events, _create_event, Person.objects.create)): # type: ignore
class TestDjangoPropertiesToQ(property_to_Q_test_factory(_filter_persons, _create_person)): # type: ignore
def test_person_cohort_properties(self):
person1_distinct_id = "person1"
person1 = Person.objects.create(

View File

@ -1,16 +1,9 @@
import json
from datetime import datetime, time
from typing import Any, Callable, Dict, List, Optional, Union, cast
from typing import Any, Callable, Dict, List, Union, cast
from dateutil.relativedelta import relativedelta
from django.db.models import Exists, OuterRef, Q, QuerySet
from django.db.models.query import Prefetch
from rest_framework import request
from django.db.models import Exists, OuterRef, Q
from rest_framework.exceptions import ValidationError
from posthog.constants import TREND_FILTER_TYPE_ACTIONS
from posthog.models.cohort import Cohort
from posthog.models.entity import Entity
from posthog.models.filters.filter import Filter
from posthog.models.person import Person
from posthog.models.property import Property
@ -74,43 +67,6 @@ TIME_IN_SECONDS: Dict[str, Any] = {
"month": 3600 * 24 * 30, # TODO: Let's get rid of this lie! Months are not all 30 days long
}
"""
filter_events takes team_id, filter, entity and generates a Q objects that you can use to filter a QuerySet
"""
def filter_events(
team_id: int, filter, entity: Optional[Entity] = None, include_dates: bool = True, interval_annotation=None
) -> Q:
filters = Q()
relativity = relativedelta(days=1)
date_from = filter.date_from
date_to = filter.date_to
if filter.interval == "hour":
relativity = relativedelta(hours=1)
elif filter.interval == "week":
relativity = relativedelta(weeks=1)
date_from = filter.date_from - relativedelta(days=filter.date_from.weekday() + 1)
elif filter.interval == "month":
relativity = relativedelta(months=1) - relativity # go to last day of month instead of first of next
date_from = filter.date_from.replace(day=1)
else:
date_to = datetime.combine(
filter.date_to, time()
) # round time to start of day. Relativity addition will make sure the current day is included
if filter.date_from and include_dates:
filters &= Q(timestamp__gte=date_from)
if include_dates:
filters &= Q(timestamp__lte=date_to + relativity)
if filter.properties:
filters &= properties_to_Q(filter.properties, team_id=team_id)
if entity and entity.properties:
filters &= properties_to_Q(entity.properties, team_id=team_id)
return filters
def properties_to_Q(properties: List[Property], team_id: int, is_direct_query: bool = False) -> Q:
"""
@ -134,26 +90,15 @@ def properties_to_Q(properties: List[Property], team_id: int, is_direct_query: b
person_Q &= property.property_to_Q()
filters &= Q(Exists(Person.objects.filter(person_Q, id=OuterRef("person_id"),).only("pk")))
for property in [prop for prop in properties if prop.type == "event"]:
filters &= property.property_to_Q()
event_properties = [prop for prop in properties if prop.type == "event"]
if len(event_properties) > 0:
raise ValueError("Event properties are no longer supported in properties_to_Q")
# importing from .event and .cohort below to avoid importing from partially initialized modules
element_properties = [prop for prop in properties if prop.type == "element"]
if len(element_properties) > 0:
from posthog.models.event import Event
filters &= Q(
Exists(
Event.objects.filter(pk=OuterRef("id"))
.filter(
**Event.objects.filter_by_element(
{item.key: item.value for item in element_properties}, team_id=team_id,
)
)
.only("id")
)
)
raise ValueError("Element properties are no longer supported in properties_to_Q")
cohort_properties = [prop for prop in properties if prop.type == "cohort"]
if len(cohort_properties) > 0:
@ -175,23 +120,3 @@ def properties_to_Q(properties: List[Property], team_id: int, is_direct_query: b
raise ValueError("Group properties are not supported for indirect filtering via postgres")
return filters
def entity_to_Q(entity: Entity, team_id: int) -> Q:
result = Q(action__pk=entity.id) if entity.type == TREND_FILTER_TYPE_ACTIONS else Q(event=entity.id)
if entity.properties:
result &= properties_to_Q(entity.properties, team_id)
return result
def filter_persons(team_id: int, request: request.Request, queryset: QuerySet) -> QuerySet:
if request.GET.get("id"):
ids = request.GET["id"].split(",")
queryset = queryset.filter(id__in=ids)
from posthog.api.person import PersonFilter
queryset = PersonFilter(data=request.GET, request=request, queryset=queryset).qs
queryset = queryset.prefetch_related(Prefetch("persondistinctid_set", to_attr="distinct_ids_cache"))
return queryset