0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 09:16:49 +01:00

Simplification of property groups usage (#8641)

* initial working for trends with simplify

* simplify logic only if property_groups exists

* purge empty clauses

* fix tests

* fix snapshots

* update subquery logic

* update snapshots

* update snapshots

* columnoptimizer

* combine tests

* proof of concept simplification

* diff snapshots

* fix tests

* helpers and property_groups everywhere

* update snapshots

* fix simplify

* type

* missing argument

* add old properties back to simplify logic

* resolve merge conflicts, address few comments

* wip cleanup

* rm simplify filters

* redo removal

* update snapshots

* last snapshot

* update snapshot

Co-authored-by: Eric <eeoneric@gmail.com>
This commit is contained in:
Neil Kakkar 2022-02-17 17:46:08 +00:00 committed by GitHub
parent 0fb19f87d7
commit 101e1c8d1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 289 additions and 153 deletions

View File

@ -238,10 +238,7 @@ def is_precalculated_query(cohort: Cohort) -> bool:
def format_filter_query(cohort: Cohort, index: int = 0, id_column: str = "distinct_id") -> Tuple[str, Dict[str, Any]]:
is_precalculated = is_precalculated_query(cohort)
person_query, params = (
format_precalculated_cohort_query(cohort.pk, index) if is_precalculated else format_person_query(cohort, index)
)
person_query, params = format_cohort_subquery(cohort, index)
person_id_query = CALCULATE_COHORT_PEOPLE_SQL.format(
query=person_query,
@ -251,6 +248,16 @@ def format_filter_query(cohort: Cohort, index: int = 0, id_column: str = "distin
return person_id_query, params
def format_cohort_subquery(cohort: Cohort, index: int, custom_match_field="person_id") -> Tuple[str, Dict[str, Any]]:
is_precalculated = is_precalculated_query(cohort)
person_query, params = (
format_precalculated_cohort_query(cohort.pk, index, custom_match_field=custom_match_field)
if is_precalculated
else format_person_query(cohort, index, custom_match_field=custom_match_field)
)
return person_query, params
def get_person_ids_by_cohort_id(team: Team, cohort_id: int, limit: Optional[int] = None, offset: Optional[int] = None):
from ee.clickhouse.models.property import parse_prop_grouped_clauses
@ -355,8 +362,10 @@ def simplified_cohort_filter_properties(cohort: Cohort, team: Team) -> List[Prop
return [Property(type="cohort", key="id", value=cohort.pk)]
elif group.get("properties"):
# :TRICKY: This will recursively simplify all the properties
# :TRICKY: cohort groups will only contain 1 level deep properties which means we can use _property_groups_flat to return
# TODO: Update this when cohort groups use property_groups
filter = Filter(data=group, team=team)
group_filters.append(filter.properties)
group_filters.append(filter.property_groups_flat)
if len(group_filters) > 1:
# :TODO: Support or properties

View File

@ -19,6 +19,7 @@ from rest_framework import exceptions
from ee.clickhouse.client import sync_execute
from ee.clickhouse.materialized_columns.columns import TableWithProperties, get_materialized_columns
from ee.clickhouse.models.cohort import (
format_cohort_subquery,
format_filter_query,
format_precalculated_cohort_query,
format_static_cohort_query,
@ -100,6 +101,8 @@ def parse_prop_grouped_clauses(
group_clauses.append(clause)
final_params.update(params)
# purge empty returns
group_clauses = [clause for clause in group_clauses if clause]
_final = f"{property_group.type} ".join(group_clauses)
else:
_final, final_params = parse_prop_clauses(
@ -158,9 +161,17 @@ def parse_prop_clauses(
f"{property_operator} 0 = 13"
) # If cohort doesn't exist, nothing can match, unless an OR operator is used
else:
person_id_query, cohort_filter_params = format_filter_query(cohort, idx)
params = {**params, **cohort_filter_params}
final.append(f"{property_operator} {table_name}distinct_id IN ({person_id_query})")
if person_properties_mode == PersonPropertiesMode.EXCLUDE:
person_id_query, cohort_filter_params = format_cohort_subquery(
cohort, idx, custom_match_field=f"{person_id_joined_alias}"
)
params = {**params, **cohort_filter_params}
final.append(f"{property_operator} {person_id_query}")
else:
person_id_query, cohort_filter_params = format_filter_query(cohort, idx)
params = {**params, **cohort_filter_params}
final.append(f"{property_operator} {table_name}distinct_id IN ({person_id_query})")
elif prop.type == "person" and person_properties_mode != PersonPropertiesMode.EXCLUDE:
# :TODO: Clean this up by using ClickhousePersonQuery over GET_DISTINCT_IDS_BY_PROPERTY_SQL to have access
# to materialized columns

View File

@ -4,11 +4,11 @@
FROM events
WHERE team_id = 2
AND ((has(['val_1'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'attr')))
FROM JSONExtractRaw(properties, 'attr_1')))
AND has(['val_2'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'attr_2'))))
OR (has(['val_2'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'attr')))))
FROM JSONExtractRaw(properties, 'attr_1')))))
'
---
# name: TestPropFormat.test_parse_groups_persons
@ -62,7 +62,7 @@
---
# name: test_parse_groups_persons_edge_case_with_single_filter
<class 'tuple'> (
'AND ( has(%(vglobalperson_0)s, trim(BOTH \'"\' FROM JSONExtractRaw(person_props, %(kglobalperson_0)s))))',
'AND ( has(%(vglobalperson_0)s, pmat_email))',
<class 'dict'> {
'kglobalperson_0': 'email',
'vglobalperson_0': <class 'list'> [

View File

@ -5,6 +5,7 @@ from uuid import UUID, uuid4
import pytest
from freezegun.api import freeze_time
from rest_framework.exceptions import ValidationError
from ee.clickhouse.client import sync_execute
from ee.clickhouse.materialized_columns.columns import materialize
@ -362,27 +363,27 @@ class TestPropFormat(ClickhouseTestMixin, BaseTest):
def test_parse_groups(self):
_create_event(
event="$pageview", team=self.team, distinct_id="some_id", properties={"attr": "val_1", "attr_2": "val_2"},
event="$pageview", team=self.team, distinct_id="some_id", properties={"attr_1": "val_1", "attr_2": "val_2"},
)
_create_event(
event="$pageview", team=self.team, distinct_id="some_id", properties={"attr": "val_2"},
event="$pageview", team=self.team, distinct_id="some_id", properties={"attr_1": "val_2"},
)
_create_event(
event="$pageview", team=self.team, distinct_id="some_other_id", properties={"attr": "val_3"},
event="$pageview", team=self.team, distinct_id="some_other_id", properties={"attr_1": "val_3"},
)
filter = Filter(
data={
"property_groups": {
"properties": {
"type": "OR",
"groups": [
{
"type": "AND",
"groups": [{"key": "attr", "value": "val_1"}, {"key": "attr_2", "value": "val_2"}],
"groups": [{"key": "attr_1", "value": "val_1"}, {"key": "attr_2", "value": "val_2"}],
},
{"type": "OR", "groups": [{"key": "attr", "value": "val_2"}],},
{"type": "OR", "groups": [{"key": "attr_1", "value": "val_2"}],},
],
}
}
@ -390,6 +391,25 @@ class TestPropFormat(ClickhouseTestMixin, BaseTest):
self.assertEqual(len(self._run_query(filter)), 2)
def test_parse_groups_invalid_type(self):
filter = Filter(
data={
"properties": {
"type": "OR",
"groups": [
{
"type": "AND",
"groups": [{"key": "attr", "value": "val_1"}, {"key": "attr_2", "value": "val_2"}],
},
{"type": "XOR", "groups": [{"key": "attr", "value": "val_2"}],},
],
}
}
)
with self.assertRaises(ValidationError):
self._run_query(filter)
@snapshot_clickhouse_queries
def test_parse_groups_persons(self):
_create_person(distinct_ids=["some_id"], team_id=self.team.pk, properties={"email": "1@posthog.com"})
@ -413,7 +433,7 @@ class TestPropFormat(ClickhouseTestMixin, BaseTest):
filter = Filter(
data={
"property_groups": {
"properties": {
"type": "OR",
"groups": [
{"type": "OR", "groups": [{"key": "email", "type": "person", "value": "1@posthog.com"}],},
@ -572,11 +592,10 @@ def test_parse_prop_clauses_defaults(snapshot):
)
@pytest.mark.django_db
def test_parse_groups_persons_edge_case_with_single_filter(snapshot):
filter = Filter(
data={
"property_groups": {"type": "OR", "groups": [{"key": "email", "type": "person", "value": "1@posthog.com"}],}
}
data={"properties": {"type": "OR", "groups": [{"key": "email", "type": "person", "value": "1@posthog.com"}],}}
)
assert (
parse_prop_grouped_clauses(

View File

@ -57,7 +57,7 @@ class ColumnOptimizer:
"Returns whether this query uses elements_chain"
has_element_type_property = lambda properties: any(prop.type == "element" for prop in properties)
if has_element_type_property(self.filter.properties):
if has_element_type_property(self.filter.property_groups_flat):
return True
# Both entities and funnel exclusions can contain nested elements_chain inclusions
@ -77,7 +77,7 @@ class ColumnOptimizer:
@cached_property
def properties_used_in_filter(self) -> Counter[PropertyIdentifier]:
"Returns collection of properties + types that this query would use"
counter: Counter[PropertyIdentifier] = extract_tables_and_properties(self.filter.properties)
counter: Counter[PropertyIdentifier] = extract_tables_and_properties(self.filter.property_groups_flat)
if not isinstance(self.filter, StickinessFilter):
# Some breakdown types read properties

View File

@ -1,8 +1,13 @@
from abc import ABCMeta, abstractmethod
from typing import Any, Dict, List, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union
from ee.clickhouse.materialized_columns.columns import ColumnName
from ee.clickhouse.models.cohort import format_person_query, format_precalculated_cohort_query, is_precalculated_query
from ee.clickhouse.models.cohort import (
format_cohort_subquery,
format_person_query,
format_precalculated_cohort_query,
is_precalculated_query,
)
from ee.clickhouse.models.property import parse_prop_clauses, parse_prop_grouped_clauses
from ee.clickhouse.models.util import PersonPropertiesMode
from ee.clickhouse.queries.column_optimizer import ColumnOptimizer
@ -95,7 +100,7 @@ class ClickhouseEventQuery(metaclass=ABCMeta):
# :KLUDGE: The following is mostly making sure if cohorts are included as well.
# Can be simplified significantly after https://github.com/PostHog/posthog/issues/5854
if any(self._should_property_join_persons(prop) for prop in self._filter.properties):
if any(self._should_property_join_persons(prop) for prop in self._filter.property_groups_flat):
self._should_join_distinct_ids = True
self._should_join_persons = True
return
@ -157,6 +162,20 @@ class ClickhouseEventQuery(metaclass=ABCMeta):
return query, date_params
def _get_prop_groups(self, prop_group: Optional[PropertyGroup]) -> Tuple[str, Dict]:
if not prop_group:
return "", {}
return parse_prop_grouped_clauses(
team_id=self._team_id,
property_group=prop_group,
prepend="global",
table_name=self.EVENT_TABLE_ALIAS,
allow_denormalized_props=True,
person_properties_mode=PersonPropertiesMode.EXCLUDE,
person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id",
)
def _get_props(self, filters: List[Property]) -> Tuple[str, Dict]:
final = []
params: Dict[str, Any] = {}
@ -184,14 +203,8 @@ class ClickhouseEventQuery(metaclass=ABCMeta):
except Cohort.DoesNotExist:
return "0 = 11", {} # If cohort doesn't exist, nothing can match
is_precalculated = is_precalculated_query(cohort)
person_id_query, cohort_filter_params = (
format_precalculated_cohort_query(
cohort.pk, 0, custom_match_field=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id"
)
if is_precalculated
else format_person_query(cohort, 0, custom_match_field=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id")
person_id_query, cohort_filter_params = format_cohort_subquery(
cohort, 0, custom_match_field=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id"
)
return person_id_query, cohort_filter_params

View File

@ -51,8 +51,8 @@ class FunnelEventQuery(ClickhouseEventQuery):
date_query, date_params = self._get_date_filter()
self.params.update(date_params)
prop_filters = self._filter.properties
prop_query, prop_params = self._get_props(prop_filters)
prop_query, prop_params = self._get_prop_groups(self._filter.property_groups)
self.params.update(prop_params)
if skip_entity_filter:

View File

@ -75,8 +75,8 @@ class PathEventQuery(ClickhouseEventQuery):
date_query, date_params = self._get_date_filter()
self.params.update(date_params)
prop_filters = self._filter.properties
prop_query, prop_params = self._get_props(prop_filters)
prop_query, prop_params = self._get_prop_groups(self._filter.property_groups)
self.params.update(prop_params)
event_query, event_params = self._get_event_query()

View File

@ -72,7 +72,7 @@ class ClickhousePersonQuery:
@property
def is_used(self):
"Returns whether properties or any other columns are actually being queried"
if any(self._uses_person_id(prop) for prop in self._filter.properties):
if any(self._uses_person_id(prop) for prop in self._filter.property_groups_flat):
return True
if any(self._uses_person_id(prop) for entity in self._filter.entities for prop in entity.properties):
return True
@ -88,7 +88,7 @@ class ClickhousePersonQuery:
# Here, we remove the ones only to be used for filtering.
# The same property might be present for both querying and filtering, and hence the Counter.
properties_to_query = self._column_optimizer._used_properties_with_type("person")
properties_to_query -= extract_tables_and_properties(self._filter.properties)
properties_to_query -= extract_tables_and_properties(self._filter.property_groups_flat)
if self._entity is not None:
properties_to_query -= extract_tables_and_properties(self._entity.properties)
@ -100,7 +100,7 @@ class ClickhousePersonQuery:
def _get_person_filters(self) -> Tuple[str, Dict]:
conditions, params = [""], {}
properties = self._filter.properties + (self._entity.properties if self._entity else [])
properties = self._filter.property_groups_flat + (self._entity.properties if self._entity else [])
for index, property in enumerate(properties):
if property.type != "person":

View File

@ -107,8 +107,8 @@ class RetentionEventsQuery(ClickhouseEventQuery):
date_query, date_params = self._get_date_filter()
self.params.update(date_params)
prop_filters = [*self._filter.properties]
prop_query, prop_params = self._get_props(prop_filters)
prop_query, prop_params = self._get_prop_groups(self._filter.property_groups)
self.params.update(prop_params)
entity_query, entity_params = self._get_entity_query(

View File

@ -265,7 +265,9 @@ class ClickhouseSessionRecordingList(ClickhouseEventQuery):
offset = self._filter.offset or 0
base_params = {"team_id": self._team_id, "limit": self.limit + 1, "offset": offset}
person_query, person_query_params = self._get_person_query()
prop_query, prop_params = self._get_props(self._filter.properties)
prop_query, prop_params = self._get_prop_groups(self._filter.property_groups)
events_timestamp_clause, events_timestamp_params = self._get_events_timestamp_clause()
recording_start_time_clause, recording_start_time_params = self._get_recording_start_time_clause()
person_id_clause, person_id_params = self._get_person_id_clause()

View File

@ -178,7 +178,7 @@
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE 1 = 1
AND (person_id IN
AND (pdi.person_id IN
(SELECT person_id
FROM cohortpeople
WHERE team_id = 2

View File

@ -5,9 +5,10 @@ from ee.clickhouse.models.action import format_action_filter
from ee.clickhouse.models.group import get_aggregation_target_field
from ee.clickhouse.queries.event_query import ClickhouseEventQuery
from ee.clickhouse.queries.util import get_trunc_func_ch
from posthog.constants import TREND_FILTER_TYPE_ACTIONS
from posthog.constants import TREND_FILTER_TYPE_ACTIONS, PropertyOperatorType
from posthog.models import Entity
from posthog.models.filters.stickiness_filter import StickinessFilter
from posthog.models.property import PropertyGroup
class StickinessEventsQuery(ClickhouseEventQuery):
@ -19,7 +20,11 @@ class StickinessEventsQuery(ClickhouseEventQuery):
super().__init__(*args, **kwargs)
def get_query(self) -> Tuple[str, Dict[str, Any]]:
prop_query, prop_params = self._get_props(self._filter.properties + self._entity.properties)
prop_query, prop_params = self._get_prop_groups(
self._filter.property_groups.combine_properties(PropertyOperatorType.AND, self._entity.properties)
)
self.params.update(prop_params)
actions_query, actions_params = self.get_actions_query()

View File

@ -39,7 +39,7 @@
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2021-01-14 00:00:00'))
AND timestamp <= '2021-01-21 23:59:59'
AND (has(['Jane'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'name'))))
FROM JSONExtractRaw(e.properties, 'name'))))
'
---
# name: TestEventQuery.test_account_filters.3
@ -109,7 +109,7 @@
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2021-01-14 00:00:00'))
AND timestamp <= '2021-01-21 23:59:59'
AND (has(['Jane'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'name'))))
FROM JSONExtractRaw(e.properties, 'name'))))
'
---
# name: TestEventQuery.test_basic_event_filter
@ -146,23 +146,23 @@
AND event = 'viewed'
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2021-05-01 00:00:00'))
AND timestamp <= '2021-05-07 23:59:59'
AND pdi.person_id IN
(select id
from
(SELECT *
FROM person
JOIN
(SELECT id,
max(_timestamp) as _timestamp,
max(is_deleted) as is_deleted
FROM person
WHERE team_id = 2
GROUP BY id) as person_max ON person.id = person_max.id
AND person._timestamp = person_max._timestamp
WHERE team_id = 2
AND person_max.is_deleted = 0
AND (has(['test'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'name')))) ))
AND (pdi.person_id IN
(select id
from
(SELECT *
FROM person
JOIN
(SELECT id,
max(_timestamp) as _timestamp,
max(is_deleted) as is_deleted
FROM person
WHERE team_id = 2
GROUP BY id) as person_max ON person.id = person_max.id
AND person._timestamp = person_max._timestamp
WHERE team_id = 2
AND person_max.is_deleted = 0
AND (has(['test'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'name')))) )))
'
---
# name: TestEventQuery.test_denormalised_props
@ -175,8 +175,8 @@
AND event = 'user signed up'
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2020-01-01 00:00:00'))
AND timestamp <= '2020-01-14 23:59:59'
AND (has(['hi'], mat_test_prop))
AND (has(['hi'], mat_test_prop))
AND ((has(['hi'], mat_test_prop))
AND (has(['hi'], mat_test_prop)))
'
---
# name: TestEventQuery.test_element
@ -226,23 +226,23 @@
AND event = '$pageview'
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2021-05-01 00:00:00'))
AND timestamp <= '2021-05-07 23:59:59'
AND pdi.person_id IN
(select id
from
(SELECT *
FROM person
JOIN
(SELECT id,
max(_timestamp) as _timestamp,
max(is_deleted) as is_deleted
FROM person
WHERE team_id = 2
GROUP BY id) as person_max ON person.id = person_max.id
AND person._timestamp = person_max._timestamp
WHERE team_id = 2
AND person_max.is_deleted = 0
AND (has(['test'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'name')))) ))
AND (pdi.person_id IN
(select id
from
(SELECT *
FROM person
JOIN
(SELECT id,
max(_timestamp) as _timestamp,
max(is_deleted) as is_deleted
FROM person
WHERE team_id = 2
GROUP BY id) as person_max ON person.id = person_max.id
AND person._timestamp = person_max._timestamp
WHERE team_id = 2
AND person_max.is_deleted = 0
AND (has(['test'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'name')))) )))
'
---
# name: TestEventQuery.test_event_properties_filter
@ -256,7 +256,7 @@
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2021-05-01 00:00:00'))
AND timestamp <= '2021-05-07 23:59:59'
AND (has(['test_val'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'some_key'))))
FROM JSONExtractRaw(e.properties, 'some_key'))))
'
---
# name: TestEventQuery.test_event_properties_filter.1
@ -269,7 +269,7 @@
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2021-05-01 00:00:00'))
AND timestamp <= '2021-05-07 23:59:59'
AND (has(['test_val'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'some_key'))))
FROM JSONExtractRaw(e.properties, 'some_key'))))
'
---
# name: TestEventQuery.test_groups_filters
@ -296,9 +296,9 @@
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2020-01-01 00:00:00'))
AND timestamp <= '2020-01-12 23:59:59'
AND (has(['finance'], trim(BOTH '"'
FROM JSONExtractRaw(group_properties_0, 'industry'))))
AND (has(['value'], trim(BOTH '"'
FROM JSONExtractRaw(group_properties_1, 'another'))))
FROM JSONExtractRaw(group_properties_0, 'industry')))
AND has(['value'], trim(BOTH '"'
FROM JSONExtractRaw(group_properties_1, 'another'))))
'
---
# name: TestEventQuery.test_groups_filters_mixed
@ -360,7 +360,7 @@
AND event = 'viewed'
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2021-05-01 00:00:00'))
AND timestamp <= '2021-05-07 23:59:59'
AND (person_id IN
AND (pdi.person_id IN
(SELECT person_id
FROM person_static_cohort
WHERE cohort_id = 2

View File

@ -1,3 +1,5 @@
import pytest
from ee.clickhouse.materialized_columns import materialize
from ee.clickhouse.queries.column_optimizer import ColumnOptimizer
from ee.clickhouse.util import ClickhouseTestMixin
@ -15,6 +17,7 @@ PROPERTIES_OF_ALL_TYPES = [
BASE_FILTER = Filter({"events": [{"id": "$pageview", "type": "events", "order": 0}]})
FILTER_WITH_PROPERTIES = BASE_FILTER.with_data({"properties": PROPERTIES_OF_ALL_TYPES})
FILTER_WITH_GROUPS = BASE_FILTER.with_data({"properties": {"type": "AND", "groups": PROPERTIES_OF_ALL_TYPES}})
class TestColumnOptimizer(ClickhouseTestMixin, APIBaseTest):
@ -37,6 +40,16 @@ class TestColumnOptimizer(ClickhouseTestMixin, APIBaseTest):
("group_prop", "group", 2): 1,
},
)
self.assertEqual(
properties_used_in_filter(FILTER_WITH_GROUPS),
{
("event_prop", "event", None): 1,
("person_prop", "person", None): 1,
("id", "cohort", None): 1,
("tag_name", "element", None): 1,
("group_prop", "group", 2): 1,
},
)
# Breakdown cases
filter = BASE_FILTER.with_data({"breakdown": "some_prop", "breakdown_type": "person"})
@ -150,15 +163,20 @@ class TestColumnOptimizer(ClickhouseTestMixin, APIBaseTest):
def test_materialized_columns_checks(self):
optimizer = lambda: ColumnOptimizer(FILTER_WITH_PROPERTIES, self.team.id)
optimizer_groups = lambda: ColumnOptimizer(FILTER_WITH_GROUPS, self.team.id)
self.assertEqual(optimizer().event_columns_to_query, {"properties"})
self.assertEqual(optimizer().person_columns_to_query, {"properties"})
self.assertEqual(optimizer_groups().event_columns_to_query, {"properties"})
self.assertEqual(optimizer_groups().person_columns_to_query, {"properties"})
materialize("events", "event_prop")
materialize("person", "person_prop")
self.assertEqual(optimizer().event_columns_to_query, {"mat_event_prop"})
self.assertEqual(optimizer().person_columns_to_query, {"pmat_person_prop"})
self.assertEqual(optimizer_groups().event_columns_to_query, {"mat_event_prop"})
self.assertEqual(optimizer_groups().person_columns_to_query, {"pmat_person_prop"})
def test_should_query_element_chain_column(self):
should_query_elements_chain_column = lambda filter: ColumnOptimizer(
@ -167,6 +185,7 @@ class TestColumnOptimizer(ClickhouseTestMixin, APIBaseTest):
self.assertEqual(should_query_elements_chain_column(BASE_FILTER), False)
self.assertEqual(should_query_elements_chain_column(FILTER_WITH_PROPERTIES), True)
self.assertEqual(should_query_elements_chain_column(FILTER_WITH_GROUPS), True)
filter = Filter(
data={"events": [{"id": "$pageview", "type": "events", "order": 0, "properties": PROPERTIES_OF_ALL_TYPES,}]}
@ -197,8 +216,9 @@ class TestColumnOptimizer(ClickhouseTestMixin, APIBaseTest):
ColumnOptimizer(filter, self.team.id).should_query_elements_chain_column, True,
)
def group_types_to_query(self):
def test_group_types_to_query(self):
group_types_to_query = lambda filter: ColumnOptimizer(filter, self.team.id).group_types_to_query
self.assertEqual(group_types_to_query(BASE_FILTER), set())
self.assertEqual(group_types_to_query(FILTER_WITH_PROPERTIES), {2})
self.assertEqual(group_types_to_query(FILTER_WITH_GROUPS), {2})

View File

@ -65,10 +65,12 @@ class ClickhouseTrendsBreakdown:
)
_, parsed_date_to, date_params = parse_timestamps(filter=self.filter, team_id=self.team_id)
props_to_filter = [*self.filter.properties, *self.entity.properties]
props_to_filter = self.filter.property_groups.combine_properties(
PropertyOperatorType.AND, self.entity.properties
)
prop_filters, prop_filter_params = parse_prop_grouped_clauses(
team_id=self.team_id,
property_group=PropertyGroup(type=PropertyOperatorType.AND, groups=props_to_filter),
property_group=props_to_filter,
table_name="e",
person_properties_mode=PersonPropertiesMode.EXCLUDE,
)
@ -95,7 +97,7 @@ class ClickhouseTrendsBreakdown:
"parsed_date_to": parsed_date_to,
"actions_query": "AND {}".format(action_query) if action_query else "",
"event_filter": "AND event = %(event)s" if not action_query else "",
"filters": prop_filters if props_to_filter else "",
"filters": prop_filters if props_to_filter.groups else "",
}
_params, _breakdown_filter_params = {}, {}

View File

@ -86,7 +86,8 @@ class LifecycleEventQuery(ClickhouseEventQuery):
date_query, date_params = self._get_date_filter()
self.params.update(date_params)
prop_query, prop_params = self._get_props(self._filter.properties)
prop_query, prop_params = self._get_prop_groups(self._filter.property_groups)
self.params.update(prop_params)
person_query, person_params = self._get_person_query()

View File

@ -6,10 +6,11 @@ from ee.clickhouse.queries.event_query import ClickhouseEventQuery
from ee.clickhouse.queries.person_query import ClickhousePersonQuery
from ee.clickhouse.queries.trends.util import get_active_user_params
from ee.clickhouse.queries.util import date_from_clause, get_time_diff, get_trunc_func_ch, parse_timestamps
from posthog.constants import MONTHLY_ACTIVE, WEEKLY_ACTIVE
from posthog.constants import MONTHLY_ACTIVE, WEEKLY_ACTIVE, PropertyOperatorType
from posthog.models import Entity
from posthog.models.filters.filter import Filter
from posthog.models.filters.mixins.utils import cached_property
from posthog.models.property import PropertyGroup
class TrendsEventQuery(ClickhouseEventQuery):
@ -54,8 +55,10 @@ class TrendsEventQuery(ClickhouseEventQuery):
date_query, date_params = self._get_date_filter()
self.params.update(date_params)
prop_filters = [*self._filter.properties, *self._entity.properties]
prop_query, prop_params = self._get_props(prop_filters)
prop_query, prop_params = self._get_prop_groups(
self._filter.property_groups.combine_properties(PropertyOperatorType.AND, self._entity.properties)
)
self.params.update(prop_params)
entity_query, entity_params = self._get_entity_query()

View File

@ -156,7 +156,7 @@
AND timestamp >= '2020-01-01 00:00:00'
AND timestamp <= '2020-01-06 23:59:59'
AND (has(['control', 'test'], trim(BOTH '"'
FROM JSONExtractRaw(properties, '$feature/a-b-test')))) ) events
FROM JSONExtractRaw(e.properties, '$feature/a-b-test')))) ) events
WHERE (step_0 = 1
OR step_1 = 1) ))
WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 ))

View File

@ -86,7 +86,7 @@
AND timestamp >= '2020-01-01 00:00:00'
AND timestamp <= '2020-01-06 23:59:59'
AND (has(['control', 'test'], trim(BOTH '"'
FROM JSONExtractRaw(properties, '$feature/a-b-test')))) ) events
FROM JSONExtractRaw(e.properties, '$feature/a-b-test')))) ) events
WHERE (step_0 = 1
OR step_1 = 1) ))
WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 ))
@ -185,7 +185,7 @@
AND timestamp >= '2020-01-01 00:00:00'
AND timestamp <= '2020-01-06 23:59:59'
AND (has(['control', 'test_1', 'test_2', 'test'], trim(BOTH '"'
FROM JSONExtractRaw(properties, '$feature/a-b-test')))) ) events
FROM JSONExtractRaw(e.properties, '$feature/a-b-test')))) ) events
WHERE (step_0 = 1
OR step_1 = 1) ))
WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 ))

View File

@ -44,8 +44,8 @@
AND timestamp >= '2020-01-01 00:00:00'
AND timestamp <= '2020-02-15 23:59:59'
AND event = 'watched movie'
AND (NOT has([''], $group_0))
AND (NOT has([''], $group_0))
AND (NOT has([''], $group_0)
AND NOT has([''], $group_0))
GROUP BY aggregation_target)
WHERE num_intervals = 1
LIMIT 100
@ -71,8 +71,8 @@
AND timestamp >= '2020-01-01 00:00:00'
AND timestamp <= '2020-02-15 23:59:59'
AND event = 'watched movie'
AND (NOT has([''], $group_0))
AND (NOT has([''], $group_0))
AND (NOT has([''], $group_0)
AND NOT has([''], $group_0))
GROUP BY aggregation_target)
WHERE num_intervals = 2
LIMIT 100
@ -98,8 +98,8 @@
AND timestamp >= '2020-01-01 00:00:00'
AND timestamp <= '2020-02-15 23:59:59'
AND event = 'watched movie'
AND (NOT has([''], $group_0))
AND (NOT has([''], $group_0))
AND (NOT has([''], $group_0)
AND NOT has([''], $group_0))
GROUP BY aggregation_target)
WHERE num_intervals = 3
LIMIT 100

View File

@ -136,7 +136,7 @@
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2012-01-01 00:00:00'))
AND timestamp <= '2012-01-15 23:59:59'
AND (has(['val'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'key')))) )
FROM JSONExtractRaw(e.properties, 'key')))) )
GROUP BY toStartOfDay(timestamp))
group by day_start
order by day_start) SETTINGS timeout_before_checking_execution_speed = 60
@ -171,7 +171,7 @@
AND timestamp >= '2012-01-14 00:00:00'
AND timestamp <= '2012-01-14 23:59:59'
AND (has(['val'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'key')))) )
FROM JSONExtractRaw(e.properties, 'key')))) )
GROUP BY actor_id
LIMIT 200
OFFSET 0
@ -404,7 +404,7 @@
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2012-01-01 00:00:00'))
AND timestamp <= '2012-01-14 23:59:59'
AND (has(['val'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'key')))) )
FROM JSONExtractRaw(e.properties, 'key')))) )
GROUP BY actor_id
LIMIT 200
OFFSET 0
@ -522,7 +522,7 @@
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2012-01-01 00:00:00'))
AND timestamp <= '2012-01-14 23:59:59'
AND (has(['val'], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'key')))) )
FROM JSONExtractRaw(e.properties, 'key')))) )
GROUP BY actor_id
LIMIT 200
OFFSET 0
@ -570,8 +570,8 @@
AND event = '$pageview'
AND timestamp >= '2020-01-02 00:00:00'
AND timestamp <= '2020-01-02 23:59:59'
AND (NOT has([''], $group_0))
AND (NOT has([''], $group_0)) )
AND (NOT has([''], $group_0)
AND NOT has([''], $group_0)) )
GROUP BY actor_id
LIMIT 200
OFFSET 0

View File

@ -118,8 +118,8 @@
AND event IN ['paid', 'user signed up']
AND timestamp >= '2020-01-01 00:00:00'
AND timestamp <= '2020-01-14 23:59:59'
AND (NOT has([''], $group_0))
AND (NOT has([''], $group_0)) ) events
AND (NOT has([''], $group_0)
AND NOT has([''], $group_0)) ) events
WHERE (step_0 = 1
OR step_1 = 1) ))
WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 ))
@ -254,8 +254,8 @@
AND event IN ['paid', 'user signed up']
AND timestamp >= '2020-01-01 00:00:00'
AND timestamp <= '2020-01-14 23:59:59'
AND (NOT has([''], $group_0))
AND (NOT has([''], $group_0)) ) events
AND (NOT has([''], $group_0)
AND NOT has([''], $group_0)) ) events
WHERE (step_0 = 1
OR step_1 = 1) ))
WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 ))

View File

@ -164,8 +164,8 @@
AND event IN ['paid', 'user signed up']
AND timestamp >= '2020-01-01 00:00:00'
AND timestamp <= '2020-01-14 23:59:59'
AND (NOT has([''], $group_0))
AND (NOT has([''], $group_0)) ) events
AND (NOT has([''], $group_0)
AND NOT has([''], $group_0)) ) events
WHERE (step_0 = 1
OR step_1 = 1) ))
WHERE step_0 = 1
@ -209,8 +209,8 @@
AND event IN ['paid', 'user signed up']
AND timestamp >= '2020-01-01 00:00:00'
AND timestamp <= '2020-01-14 23:59:59'
AND (NOT has([''], $group_0))
AND (NOT has([''], $group_0)) ) events
AND (NOT has([''], $group_0)
AND NOT has([''], $group_0)) ) events
WHERE (step_0 = 1
OR step_1 = 1) ))
WHERE step_0 = 1 ))

View File

@ -34,7 +34,7 @@
AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime('2020-01-08 00:00:00'))
AND timestamp <= '2020-01-12 23:59:59'
AND (has([''], trim(BOTH '"'
FROM JSONExtractRaw(properties, 'key')))) )
FROM JSONExtractRaw(e.properties, 'key')))) )
GROUP BY actor_id
LIMIT 200
OFFSET 0

View File

@ -1,5 +1,5 @@
import json
from typing import Any, Dict, List, Optional, Union, cast
from typing import Any, Dict, Iterator, List, Optional, Union, cast
from rest_framework.exceptions import ValidationError
@ -22,14 +22,18 @@ class PropertyMixin(BaseParamMixin):
else:
loaded_props = _props
return self._parse_properties(loaded_props)
# if grouped properties
if isinstance(loaded_props, dict) and "type" in loaded_props and "groups" in loaded_props:
# property_groups is main function from now on
# TODO: this function will go away at end of migration
return []
else:
# old style dict properties or a list of properties
return self._parse_properties(loaded_props)
@cached_property
def property_groups(self) -> PropertyGroup:
_props = self._data.get(PROPERTY_GROUPS, None)
if not _props:
return PropertyGroup(type=PropertyOperatorType.AND, groups=self.properties)
_props = self._data.get(PROPERTIES)
if isinstance(_props, str):
try:
@ -39,7 +43,28 @@ class PropertyMixin(BaseParamMixin):
else:
loaded_props = _props
return self._parse_property_group(loaded_props)
# if grouped properties
if isinstance(loaded_props, dict) and "type" in loaded_props and "groups" in loaded_props:
try:
return self._parse_property_group(loaded_props)
except ValidationError as e:
raise e
except ValueError as e:
raise ValidationError(f"PropertyGroup is unparsable: {e}")
# old properties
return PropertyGroup(type=PropertyOperatorType.AND, groups=self.properties)
@cached_property
def property_groups_flat(self) -> List[Property]:
return list(self._property_groups_flat(self.property_groups))
def _property_groups_flat(self, prop_group: PropertyGroup):
for _group in prop_group.groups:
if isinstance(_group, PropertyGroup):
yield from self._property_groups_flat(_group)
else:
yield _group
def _parse_properties(self, properties: Optional[Any]) -> List[Property]:
if isinstance(properties, list):
@ -70,7 +95,9 @@ class PropertyMixin(BaseParamMixin):
def _parse_property_group(self, group: Optional[Dict]) -> PropertyGroup:
if group and "type" in group and "groups" in group:
return PropertyGroup(group["type"], self._parse_property_group_list(group["groups"]))
return PropertyGroup(
PropertyOperatorType(group["type"].upper()), self._parse_property_group_list(group["groups"])
)
return PropertyGroup(PropertyOperatorType.AND, cast(List[Property], []))
@ -78,7 +105,6 @@ class PropertyMixin(BaseParamMixin):
if not prop_list:
# empty prop list
return cast(List[Property], [])
has_property_groups = False
has_simple_properties = False
for prop in prop_list:
@ -97,12 +123,9 @@ class PropertyMixin(BaseParamMixin):
@include_dict
def properties_to_dict(self):
return {PROPERTIES: [prop.to_dict() for prop in self.properties]} if self.properties else {}
if self.properties:
return {PROPERTIES: [prop.to_dict() for prop in self.properties]}
@include_dict
def property_groups_to_dict(self):
return (
{PROPERTY_GROUPS: self.property_groups.to_dict()}
if self.property_groups and self.property_groups.groups and not self.properties
else {}
{PROPERTIES: self.property_groups.to_dict()} if self.property_groups and self.property_groups.groups else {}
)

View File

@ -10,7 +10,7 @@ from posthog.models.property import Property, PropertyGroup
def test_property_group_multi_level_parsing():
filter = Filter(
data={
"property_groups": {
"properties": {
"type": "AND",
"groups": [
{
@ -43,7 +43,7 @@ def test_property_group_multi_level_parsing():
def test_property_group_simple_parsing():
filter = Filter(
data={
"property_groups": {
"properties": {
"type": "AND",
"groups": [{"key": "attr", "value": "val_1"}, {"key": "attr_2", "value": "val_2"}],
}
@ -60,7 +60,7 @@ def test_property_group_simple_parsing():
def test_property_group_empty_parsing():
filter = Filter(data={"property_groups": {}})
filter = Filter(data={"properties": {}})
assert filter.property_groups.type == "AND"
assert filter.property_groups.groups == []
@ -70,9 +70,25 @@ def test_property_group_invalid_parsing():
filter = Filter(
data={
"property_groups": {
"properties": {
"type": "XaND",
"groups": [{"key": "attr", "value": "val_1"}, {"key": "attr_2", "value": "val_2"},],
}
}
)
with pytest.raises(ValidationError):
filter.property_groups
def test_property_group_includes_unhomogenous_groups():
filter = Filter(
data={
"properties": {
"type": "AND",
"groups": [
{"type": "or", "groups": [{"key": "attr", "value": "val_1"}]},
{"key": "attr", "value": "val_1"},
{"key": "attr_2", "value": "val_2"},
{"type": "OR", "groups": []},
@ -88,7 +104,7 @@ def test_property_group_invalid_parsing():
def test_property_multi_level_to_dict():
filter = Filter(
data={
"property_groups": {
"properties": {
"type": "AND",
"groups": [
{
@ -102,22 +118,24 @@ def test_property_multi_level_to_dict():
)
assert filter.property_groups.to_dict() == {
"AND": [
"type": "AND",
"groups": [
{
"AND": [
"type": "AND",
"groups": [
{"key": "attr", "value": "val_1", "operator": None, "type": "event"},
{"key": "attr_2", "value": "val_2", "operator": None, "type": "event"},
],
},
{"OR": [{"key": "attr", "value": "val_2", "operator": None, "type": "event"}]},
]
{"type": "OR", "groups": [{"key": "attr", "value": "val_2", "operator": None, "type": "event"}]},
],
}
def test_property_group_simple_to_dict():
filter = Filter(
data={
"property_groups": {
"properties": {
"type": "AND",
"groups": [{"key": "attr", "value": "val_1"}, {"key": "attr_2", "value": "val_2"}],
}
@ -125,17 +143,18 @@ def test_property_group_simple_to_dict():
)
assert filter.property_groups.to_dict() == {
"AND": [
"type": "AND",
"groups": [
{"key": "attr", "value": "val_1", "operator": None, "type": "event"},
{"key": "attr_2", "value": "val_2", "operator": None, "type": "event"},
]
],
}
def test_property_group_simple_json_parsing():
filter = Filter(
data={
"property_groups": json.dumps(
"properties": json.dumps(
{"type": "AND", "groups": [{"key": "attr", "value": "val_1"}, {"key": "attr_2", "value": "val_2"}],}
)
}
@ -154,7 +173,7 @@ def test_property_group_simple_json_parsing():
def test_property_group_multi_level_json_parsing():
filter = Filter(
data={
"property_groups": json.dumps(
"properties": json.dumps(
{
"type": "AND",
"groups": [

View File

@ -152,12 +152,21 @@ class PropertyGroup:
self.type = type
self.groups = groups
def combine_properties(self, operator: PropertyOperatorType, properties: List[Property]) -> "PropertyGroup":
if not properties:
return self
if len(self.groups) == 0:
return PropertyGroup(PropertyOperatorType.AND, properties)
return PropertyGroup(operator, [self, PropertyGroup(PropertyOperatorType.AND, properties)])
def to_dict(self):
result: Dict = {}
if not self.groups:
return result
return {f"{self.type}": [prop.to_dict() for prop in self.groups]}
return {"type": self.type, "groups": [prop.to_dict() for prop in self.groups]}
def __repr__(self):
params_repr = ", ".join(f"{repr(prop)}" for prop in self.groups)