From a314b61a0243e038a91aed4d73b92502319abced Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Wed, 16 Feb 2022 15:32:24 +0000 Subject: [PATCH] Query person_distinct_id2 not person_distinct_id (#8358) * add the fix and let CI say if that breaks any tests * allow getting team IDS without direct substitution and mark all the places to check with TODOs * pass team id in from everywhere and see what tests fail in CI * named params everywhere * address mypyc * get tests passing * fix tests after merge from master * remove SQL query that's not referenced from anywhere * check some marked queries are templated correctly now * remove unused SQL * check some marked queries are templated correctly now * no optional team id * more passing of team id into things * don't trust autocomplete * address review --- ee/clickhouse/models/action.py | 14 ++++-- ee/clickhouse/models/cohort.py | 8 +++- ee/clickhouse/models/entity.py | 2 +- ee/clickhouse/models/property.py | 10 ++-- .../test/__snapshots__/test_property.ambr | 46 ++++++------------- ee/clickhouse/models/test/test_action.py | 2 +- ee/clickhouse/models/test/test_cohort.py | 20 ++++---- ee/clickhouse/models/test/test_filters.py | 12 +++-- ee/clickhouse/models/test/test_property.py | 25 +++++++--- ee/clickhouse/queries/breakdown_props.py | 8 ++-- ee/clickhouse/queries/event_query.py | 1 + ee/clickhouse/queries/funnels/base.py | 7 ++- .../retention/retention_event_query.py | 4 +- .../clickhouse_session_recording_list.py | 11 +++-- .../stickiness/clickhouse_stickiness.py | 9 +--- .../stickiness/stickiness_event_query.py | 2 +- ee/clickhouse/queries/trends/breakdown.py | 5 +- ee/clickhouse/queries/trends/lifecycle.py | 2 +- .../queries/trends/trend_event_query.py | 2 +- ee/clickhouse/sql/person.py | 10 ---- ee/clickhouse/sql/trends/volume.py | 15 ------ ee/clickhouse/views/element.py | 0 ee/clickhouse/views/events.py | 0 posthog/api/action.py | 2 +- posthog/api/element.py | 4 +- posthog/api/event.py | 4 +- 26 files changed, 112 insertions(+), 113 deletions(-) create mode 100644 ee/clickhouse/views/element.py create mode 100644 ee/clickhouse/views/events.py diff --git a/ee/clickhouse/models/action.py b/ee/clickhouse/models/action.py index f511045cf1f..3ab0db0d193 100644 --- a/ee/clickhouse/models/action.py +++ b/ee/clickhouse/models/action.py @@ -6,10 +6,11 @@ from ee.clickhouse.models.util import PersonPropertiesMode from posthog.constants import AUTOCAPTURE_EVENT, TREND_FILTER_TYPE_ACTIONS from posthog.models import Action, Entity, Filter from posthog.models.action_step import ActionStep -from posthog.models.property import Property, PropertyIdentifier, PropertyName, PropertyType +from posthog.models.property import Property, PropertyIdentifier def format_action_filter( + team_id: int, action: Action, prepend: str = "action", use_loop: bool = False, @@ -45,7 +46,8 @@ def format_action_filter( from ee.clickhouse.models.property import parse_prop_grouped_clauses prop_query, prop_params = parse_prop_grouped_clauses( - Filter(data={"properties": step.properties}).property_groups, + team_id=team_id, + property_group=Filter(data={"properties": step.properties}).property_groups, prepend=f"action_props_{action.pk}_{step.pk}", table_name=table_name, person_properties_mode=person_properties_mode, @@ -93,10 +95,14 @@ def filter_event( return conditions, params -def format_entity_filter(entity: Entity, prepend: str = "action", filter_by_team=True) -> Tuple[str, Dict]: +def format_entity_filter( + team_id: int, entity: Entity, prepend: str = "action", filter_by_team=True +) -> Tuple[str, Dict]: if entity.type == TREND_FILTER_TYPE_ACTIONS: action = entity.get_action() - entity_filter, params = format_action_filter(action, prepend=prepend, filter_by_team=filter_by_team) + entity_filter, params = format_action_filter( + team_id=team_id, action=action, prepend=prepend, filter_by_team=filter_by_team + ) else: key = f"{prepend}_event" entity_filter = f"event = %({key})s" diff --git a/ee/clickhouse/models/cohort.py b/ee/clickhouse/models/cohort.py index 8c34a82f32c..e8fd6c789af 100644 --- a/ee/clickhouse/models/cohort.py +++ b/ee/clickhouse/models/cohort.py @@ -179,7 +179,9 @@ def _get_entity_query( return "event = %(event)s", {"event": event_id} elif action_id: action = Action.objects.get(pk=action_id, team_id=team_id) - action_filter_query, action_params = format_action_filter(action, prepend="_{}_action".format(group_idx)) + action_filter_query, action_params = format_action_filter( + team_id=team_id, action=action, prepend="_{}_action".format(group_idx) + ) return action_filter_query, action_params else: raise ValidationError("Cohort query requires action_id or event_id") @@ -253,7 +255,9 @@ def get_person_ids_by_cohort_id(team: Team, cohort_id: int, limit: Optional[int] from ee.clickhouse.models.property import parse_prop_grouped_clauses filters = Filter(data={"properties": [{"key": "id", "value": cohort_id, "type": "cohort"}],}) - filter_query, filter_params = parse_prop_grouped_clauses(filters.property_groups, table_name="pdi") + filter_query, filter_params = parse_prop_grouped_clauses( + team_id=team.pk, property_group=filters.property_groups, table_name="pdi" + ) results = sync_execute( GET_PERSON_IDS_BY_FILTER.format( diff --git a/ee/clickhouse/models/entity.py b/ee/clickhouse/models/entity.py index 65c4d31040b..ab6fb9f7732 100644 --- a/ee/clickhouse/models/entity.py +++ b/ee/clickhouse/models/entity.py @@ -18,7 +18,7 @@ def get_entity_filtering_params( if entity.type == TREND_FILTER_TYPE_ACTIONS: action = entity.get_action() action_query, action_params = format_action_filter( - action, table_name=table_name, person_properties_mode=person_properties_mode, + team_id=team_id, action=action, table_name=table_name, person_properties_mode=person_properties_mode, ) params.update(action_params) content_sql_params = {"entity_query": f"AND {action_query}"} diff --git a/ee/clickhouse/models/property.py b/ee/clickhouse/models/property.py index 67e37289ecf..5b0a49b81f8 100644 --- a/ee/clickhouse/models/property.py +++ b/ee/clickhouse/models/property.py @@ -24,6 +24,7 @@ from ee.clickhouse.models.cohort import ( format_static_cohort_query, ) from ee.clickhouse.models.util import PersonPropertiesMode, is_json +from ee.clickhouse.queries.person_distinct_id_query import get_team_distinct_ids_query from ee.clickhouse.sql.events import SELECT_PROP_VALUES_SQL, SELECT_PROP_VALUES_SQL_WITH_FILTER from ee.clickhouse.sql.groups import GET_GROUP_IDS_BY_PROPERTY_SQL from ee.clickhouse.sql.person import ( @@ -64,6 +65,7 @@ from posthog.utils import is_valid_regex, relative_date_parse def parse_prop_grouped_clauses( + team_id: int, property_group: PropertyGroup, prepend: str = "global", table_name: str = "", @@ -84,6 +86,7 @@ def parse_prop_grouped_clauses( for idx, group in enumerate(property_group.groups): if isinstance(group, PropertyGroup): clause, params = parse_prop_grouped_clauses( + team_id=team_id, property_group=group, prepend=f"{prepend}_{idx}", table_name=table_name, @@ -109,6 +112,7 @@ def parse_prop_grouped_clauses( person_id_joined_alias=person_id_joined_alias, group_properties_joined=group_properties_joined, property_operator=property_group.type, + team_id=team_id, ) if not _final: @@ -129,6 +133,7 @@ def is_property_group(group: Union[Property, "PropertyGroup"]): def parse_prop_clauses( + team_id: int, filters: List[Property], prepend: str = "global", table_name: str = "", @@ -178,7 +183,7 @@ def parse_prop_clauses( final.append( " {property_operator} {table_name}distinct_id IN ({filter_query})".format( filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format( - filters=filter_query, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS + filters=filter_query, GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(team_id), ), table_name=table_name, property_operator=property_operator, @@ -238,9 +243,8 @@ def parse_prop_clauses( final.append(f"{property_operator} {filter_query}") else: # :TODO: (performance) Avoid subqueries whenever possible, use joins instead - # :TODO: Use get_team_distinct_ids_query instead when possible instead of GET_TEAM_PERSON_DISTINCT_IDS subquery = GET_DISTINCT_IDS_BY_PERSON_ID_FILTER.format( - filters=filter_query, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS + filters=filter_query, GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(team_id), ) final.append(f"{property_operator} {table_name}distinct_id IN ({subquery})") params.update(filter_params) diff --git a/ee/clickhouse/models/test/__snapshots__/test_property.ambr b/ee/clickhouse/models/test/__snapshots__/test_property.ambr index baa9225c949..54155914c60 100644 --- a/ee/clickhouse/models/test/__snapshots__/test_property.ambr +++ b/ee/clickhouse/models/test/__snapshots__/test_property.ambr @@ -20,18 +20,11 @@ (SELECT distinct_id FROM (SELECT distinct_id, - argMax(person_id, _timestamp) as person_id - FROM - (SELECT distinct_id, - person_id, - max(_timestamp) as _timestamp - FROM person_distinct_id - WHERE team_id = 2 - GROUP BY person_id, - distinct_id, - team_id - HAVING max(is_deleted) = 0) - GROUP BY distinct_id) + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) WHERE person_id IN (SELECT id FROM @@ -48,18 +41,11 @@ (SELECT distinct_id FROM (SELECT distinct_id, - argMax(person_id, _timestamp) as person_id - FROM - (SELECT distinct_id, - person_id, - max(_timestamp) as _timestamp - FROM person_distinct_id - WHERE team_id = 2 - GROUP BY person_id, - distinct_id, - team_id - HAVING max(is_deleted) = 0) - GROUP BY distinct_id) + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) WHERE person_id IN (SELECT id FROM @@ -92,15 +78,11 @@ SELECT distinct_id FROM ( - SELECT distinct_id, argMax(person_id, _timestamp) as person_id - FROM ( - SELECT distinct_id, person_id, max(_timestamp) as _timestamp - FROM person_distinct_id - WHERE team_id = %(team_id)s - GROUP BY person_id, distinct_id, team_id - HAVING max(is_deleted) = 0 - ) + SELECT distinct_id, argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 1 GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0 ) WHERE person_id IN diff --git a/ee/clickhouse/models/test/test_action.py b/ee/clickhouse/models/test/test_action.py index a9c7a43ea1a..466d8f61a95 100644 --- a/ee/clickhouse/models/test/test_action.py +++ b/ee/clickhouse/models/test/test_action.py @@ -23,7 +23,7 @@ def _create_event(**kwargs) -> Event: def query_action(action: Action) -> Optional[List]: - formatted_query, params = format_action_filter(action, "") + formatted_query, params = format_action_filter(team_id=action.team_id, action=action, prepend="") query = ACTION_QUERY.format(action_filter=formatted_query) diff --git a/ee/clickhouse/models/test/test_cohort.py b/ee/clickhouse/models/test/test_cohort.py index e7d3180ad77..565fbf8b64f 100644 --- a/ee/clickhouse/models/test/test_cohort.py +++ b/ee/clickhouse/models/test/test_cohort.py @@ -7,7 +7,7 @@ from django.utils import timezone from freezegun import freeze_time from ee.clickhouse.client import sync_execute -from ee.clickhouse.models.cohort import format_filter_query, get_person_ids_by_cohort_id, recalculate_cohortpeople +from ee.clickhouse.models.cohort import format_filter_query, get_person_ids_by_cohort_id from ee.clickhouse.models.event import create_event from ee.clickhouse.models.person import create_person, create_person_distinct_id from ee.clickhouse.models.property import parse_prop_grouped_clauses @@ -78,7 +78,7 @@ class TestCohort(ClickhouseTestMixin, BaseTest): ) filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}) - query, params = parse_prop_grouped_clauses(filter.property_groups) + query, params = parse_prop_grouped_clauses(team_id=self.team.pk, property_group=filter.property_groups) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1) @@ -106,7 +106,7 @@ class TestCohort(ClickhouseTestMixin, BaseTest): cohort1 = Cohort.objects.create(team=self.team, groups=[{"action_id": action.pk}], name="cohort1",) filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}, team=self.team) - query, params = parse_prop_grouped_clauses(filter.property_groups) + query, params = parse_prop_grouped_clauses(team_id=self.team.pk, property_group=filter.property_groups) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1) @@ -145,7 +145,7 @@ class TestCohort(ClickhouseTestMixin, BaseTest): filter = Filter( data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}, team=self.team ) - query, params = parse_prop_grouped_clauses(filter.property_groups) + query, params = parse_prop_grouped_clauses(team_id=self.team.pk, property_group=filter.property_groups) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1) @@ -157,7 +157,7 @@ class TestCohort(ClickhouseTestMixin, BaseTest): filter = Filter( data={"properties": [{"key": "id", "value": cohort2.pk, "type": "cohort"}],}, team=self.team ) - query, params = parse_prop_grouped_clauses(filter.property_groups) + query, params = parse_prop_grouped_clauses(team_id=self.team.pk, property_group=filter.property_groups) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 2) @@ -197,7 +197,7 @@ class TestCohort(ClickhouseTestMixin, BaseTest): filter = Filter( data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}, team=self.team ) - query, params = parse_prop_grouped_clauses(filter.property_groups) + query, params = parse_prop_grouped_clauses(team_id=self.team.pk, property_group=filter.property_groups) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1) @@ -209,7 +209,7 @@ class TestCohort(ClickhouseTestMixin, BaseTest): filter = Filter( data={"properties": [{"key": "id", "value": cohort2.pk, "type": "cohort"}],}, team=self.team ) - query, params = parse_prop_grouped_clauses(filter.property_groups) + query, params = parse_prop_grouped_clauses(team_id=self.team.pk, property_group=filter.property_groups) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 2) @@ -234,7 +234,7 @@ class TestCohort(ClickhouseTestMixin, BaseTest): ) filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}, team=self.team) - query, params = parse_prop_grouped_clauses(filter.property_groups) + query, params = parse_prop_grouped_clauses(team_id=self.team.pk, property_group=filter.property_groups) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 2) @@ -262,8 +262,10 @@ class TestCohort(ClickhouseTestMixin, BaseTest): ) filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}, team=self.team) - query, params = parse_prop_grouped_clauses(filter.property_groups) + query, params = parse_prop_grouped_clauses(team_id=self.team.pk, property_group=filter.property_groups) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) + self.assertIn("\nFROM person_distinct_id2\n", final_query) + result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 0) diff --git a/ee/clickhouse/models/test/test_filters.py b/ee/clickhouse/models/test/test_filters.py index f1e9330651d..9ba21cd22a6 100644 --- a/ee/clickhouse/models/test/test_filters.py +++ b/ee/clickhouse/models/test/test_filters.py @@ -21,7 +21,9 @@ from posthog.models.team import Team def _filter_events( filter: Filter, team: Team, person_query: Optional[bool] = False, order_by: Optional[str] = None, ): - prop_filters, prop_filter_params = parse_prop_grouped_clauses(filter.property_groups) + prop_filters, prop_filter_params = parse_prop_grouped_clauses( + property_group=filter.property_groups, team_id=team.pk + ) params = {"team_id": team.pk, **prop_filter_params} if order_by == "id": @@ -220,7 +222,9 @@ class TestFiltering( filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}, team=self.team) - prop_clause, prop_clause_params = parse_prop_grouped_clauses(filter.property_groups, has_person_id_joined=False) + prop_clause, prop_clause_params = parse_prop_grouped_clauses( + property_group=filter.property_groups, has_person_id_joined=False, team_id=self.team.pk + ) query = """ SELECT distinct_id FROM person_distinct_id WHERE team_id = %(team_id)s {prop_clause} """.format( @@ -232,7 +236,9 @@ class TestFiltering( # test cohort2 with negation filter = Filter(data={"properties": [{"key": "id", "value": cohort2.pk, "type": "cohort"}],}, team=self.team) - prop_clause, prop_clause_params = parse_prop_grouped_clauses(filter.property_groups, has_person_id_joined=False) + prop_clause, prop_clause_params = parse_prop_grouped_clauses( + property_group=filter.property_groups, has_person_id_joined=False, team_id=self.team.pk + ) query = """ SELECT distinct_id FROM person_distinct_id WHERE team_id = %(team_id)s {prop_clause} """.format( diff --git a/ee/clickhouse/models/test/test_property.py b/ee/clickhouse/models/test/test_property.py index 17a0767d03a..8ce7bb47708 100644 --- a/ee/clickhouse/models/test/test_property.py +++ b/ee/clickhouse/models/test/test_property.py @@ -44,7 +44,9 @@ class TestPropFormat(ClickhouseTestMixin, BaseTest): CLASS_DATA_LEVEL_SETUP = False def _run_query(self, filter: Filter, **kwargs) -> List: - query, params = parse_prop_grouped_clauses(filter.property_groups, allow_denormalized_props=True, **kwargs) + query, params = parse_prop_grouped_clauses( + property_group=filter.property_groups, allow_denormalized_props=True, team_id=self.team.pk, **kwargs + ) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) return sync_execute(final_query, {**params, "team_id": self.team.pk}) @@ -429,7 +431,10 @@ class TestPropDenormalized(ClickhouseTestMixin, BaseTest): def _run_query(self, filter: Filter, join_person_tables=False) -> List: query, params = parse_prop_grouped_clauses( - filter.property_groups, allow_denormalized_props=True, person_properties_mode=PersonPropertiesMode.EXCLUDE, + team_id=self.team.pk, + property_group=filter.property_groups, + allow_denormalized_props=True, + person_properties_mode=PersonPropertiesMode.EXCLUDE, ) joins = "" if join_person_tables: @@ -543,18 +548,25 @@ def test_parse_prop_clauses_defaults(snapshot): } ) - assert parse_prop_grouped_clauses(filter.property_groups, allow_denormalized_props=False) == snapshot + assert ( + parse_prop_grouped_clauses(property_group=filter.property_groups, allow_denormalized_props=False, team_id=1) + == snapshot + ) assert ( parse_prop_grouped_clauses( - filter.property_groups, + property_group=filter.property_groups, person_properties_mode=PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN, allow_denormalized_props=False, + team_id=1, ) == snapshot ) assert ( parse_prop_grouped_clauses( - filter.property_groups, person_properties_mode=PersonPropertiesMode.EXCLUDE, allow_denormalized_props=False + team_id=1, + property_group=filter.property_groups, + person_properties_mode=PersonPropertiesMode.EXCLUDE, + allow_denormalized_props=False, ) == snapshot ) @@ -568,7 +580,8 @@ def test_parse_groups_persons_edge_case_with_single_filter(snapshot): ) assert ( parse_prop_grouped_clauses( - filter.property_groups, + team_id=1, + property_group=filter.property_groups, person_properties_mode=PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN, allow_denormalized_props=True, ) diff --git a/ee/clickhouse/queries/breakdown_props.py b/ee/clickhouse/queries/breakdown_props.py index 1348c0e0827..cfb4be933e7 100644 --- a/ee/clickhouse/queries/breakdown_props.py +++ b/ee/clickhouse/queries/breakdown_props.py @@ -51,14 +51,15 @@ def get_breakdown_prop_values( parsed_date_from, parsed_date_to, date_params = parse_timestamps(filter=filter, team_id=team_id) prop_filters, prop_filter_params = parse_prop_grouped_clauses( - PropertyGroup(type=PropertyOperatorType.AND, groups=filter.properties + entity.properties), + team_id=team_id, + property_group=PropertyGroup(type=PropertyOperatorType.AND, groups=filter.properties + entity.properties), table_name="e", prepend="e_brkdwn", person_properties_mode=PersonPropertiesMode.EXCLUDE, allow_denormalized_props=True, ) - entity_params, entity_format_params = get_entity_filtering_params(entity, team_id, table_name="e") + entity_params, entity_format_params = get_entity_filtering_params(entity=entity, team_id=team_id, table_name="e") value_expression = _to_value_expression(filter.breakdown_type, filter.breakdown, filter.breakdown_group_type_index) @@ -133,7 +134,8 @@ def _format_all_query(team_id: int, filter: Filter, **kwargs) -> Tuple[str, Dict props_to_filter = [*props_to_filter, *entity.properties] prop_filters, prop_filter_params = parse_prop_grouped_clauses( - PropertyGroup(type=PropertyOperatorType.AND, groups=props_to_filter), + team_id=team_id, + property_group=PropertyGroup(type=PropertyOperatorType.AND, groups=props_to_filter), prepend="all_cohort_", table_name="all_events", ) diff --git a/ee/clickhouse/queries/event_query.py b/ee/clickhouse/queries/event_query.py index af6330812bd..59b034fb285 100644 --- a/ee/clickhouse/queries/event_query.py +++ b/ee/clickhouse/queries/event_query.py @@ -168,6 +168,7 @@ class ClickhouseEventQuery(metaclass=ABCMeta): final.append(f"AND {person_id_query}") else: filter_query, filter_params = parse_prop_grouped_clauses( + team_id=self._team_id, property_group=PropertyGroup(type=PropertyOperatorType.AND, groups=[prop]), prepend=f"global_{idx}", allow_denormalized_props=True, diff --git a/ee/clickhouse/queries/funnels/base.py b/ee/clickhouse/queries/funnels/base.py index 960bfeb80e6..da7aa0a0c00 100644 --- a/ee/clickhouse/queries/funnels/base.py +++ b/ee/clickhouse/queries/funnels/base.py @@ -440,7 +440,9 @@ class ClickhouseFunnelBase(ABC): for action_step in action.steps.all(): if entity_name not in self.params[entity_name]: self.params[entity_name].append(action_step.event) - action_query, action_params = format_action_filter(action, f"{entity_name}_{step_prefix}step_{index}") + action_query, action_params = format_action_filter( + team_id=self._team.pk, action=action, prepend=f"{entity_name}_{step_prefix}step_{index}" + ) if action_query == "": return "" @@ -456,7 +458,8 @@ class ClickhouseFunnelBase(ABC): def _build_filters(self, entity: Entity, index: int) -> str: prop_filters, prop_filter_params = parse_prop_grouped_clauses( - entity.property_groups, + team_id=self._team.pk, + property_group=entity.property_groups, prepend=str(index), person_properties_mode=PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN, person_id_joined_alias="aggregation_target", diff --git a/ee/clickhouse/queries/retention/retention_event_query.py b/ee/clickhouse/queries/retention/retention_event_query.py index aa91c8b87d6..675fa2cf0d2 100644 --- a/ee/clickhouse/queries/retention/retention_event_query.py +++ b/ee/clickhouse/queries/retention/retention_event_query.py @@ -157,7 +157,9 @@ class RetentionEventsQuery(ClickhouseEventQuery): prepend = self._event_query_type if entity.type == TREND_FILTER_TYPE_ACTIONS: action = Action.objects.get(pk=entity.id) - action_query, params = format_action_filter(action, prepend=prepend, use_loop=False) + action_query, params = format_action_filter( + team_id=self._team_id, action=action, prepend=prepend, use_loop=False + ) condition = action_query elif entity.type == TREND_FILTER_TYPE_EVENTS: condition = f"{self.EVENT_TABLE_ALIAS}.event = %({prepend}_event)s" diff --git a/ee/clickhouse/queries/session_recordings/clickhouse_session_recording_list.py b/ee/clickhouse/queries/session_recordings/clickhouse_session_recording_list.py index fa5b722c08b..4b2ef150bd2 100644 --- a/ee/clickhouse/queries/session_recordings/clickhouse_session_recording_list.py +++ b/ee/clickhouse/queries/session_recordings/clickhouse_session_recording_list.py @@ -211,11 +211,12 @@ class ClickhouseSessionRecordingList(ClickhouseEventQuery): } return duration_clause, duration_params - def format_event_filter(self, entity: Entity, prepend: str) -> Tuple[str, Dict[str, Any]]: - filter_sql, params = format_entity_filter(entity, prepend=prepend, filter_by_team=False) + def format_event_filter(self, entity: Entity, prepend: str, team_id: int) -> Tuple[str, Dict[str, Any]]: + filter_sql, params = format_entity_filter(team_id=team_id, entity=entity, prepend=prepend, filter_by_team=False) if entity.properties: filters, filter_params = parse_prop_grouped_clauses( - entity.property_groups, + team_id=team_id, + property_group=entity.property_groups, prepend=prepend, allow_denormalized_props=True, has_person_id_joined=True, @@ -245,7 +246,9 @@ class ClickhouseSessionRecordingList(ClickhouseEventQuery): if entity.id not in event_names_to_filter: event_names_to_filter.append(entity.id) - condition_sql, filter_params = self.format_event_filter(entity, prepend=f"event_matcher_{index}") + condition_sql, filter_params = self.format_event_filter( + entity, prepend=f"event_matcher_{index}", team_id=self._team_id + ) aggregate_select_clause += f", sum(if({condition_sql}, 1, 0)) as count_event_match_{index}" aggregate_having_clause += f"\nAND count_event_match_{index} > 0" params = {**params, **filter_params} diff --git a/ee/clickhouse/queries/stickiness/clickhouse_stickiness.py b/ee/clickhouse/queries/stickiness/clickhouse_stickiness.py index 77bfa21115b..0c1e0e36cca 100644 --- a/ee/clickhouse/queries/stickiness/clickhouse_stickiness.py +++ b/ee/clickhouse/queries/stickiness/clickhouse_stickiness.py @@ -5,20 +5,13 @@ from django.conf import settings from django.db.models.expressions import F from django.utils import timezone from rest_framework.request import Request -from rest_framework.utils.serializer_helpers import ReturnDict from sentry_sdk.api import capture_exception from ee.clickhouse.client import sync_execute -from ee.clickhouse.models.person import ClickhousePersonSerializer from ee.clickhouse.queries.person_distinct_id_query import get_team_distinct_ids_query from ee.clickhouse.queries.stickiness.stickiness_actors import ClickhouseStickinessActors from ee.clickhouse.queries.stickiness.stickiness_event_query import StickinessEventsQuery -from ee.clickhouse.sql.person import ( - GET_LATEST_PERSON_SQL, - INSERT_COHORT_ALL_PEOPLE_SQL, - PEOPLE_SQL, - PERSON_STATIC_COHORT_TABLE, -) +from ee.clickhouse.sql.person import GET_LATEST_PERSON_SQL, INSERT_COHORT_ALL_PEOPLE_SQL, PERSON_STATIC_COHORT_TABLE from posthog.models.cohort import Cohort from posthog.models.entity import Entity from posthog.models.filters.stickiness_filter import StickinessFilter diff --git a/ee/clickhouse/queries/stickiness/stickiness_event_query.py b/ee/clickhouse/queries/stickiness/stickiness_event_query.py index e06a5375bad..3f2c846940a 100644 --- a/ee/clickhouse/queries/stickiness/stickiness_event_query.py +++ b/ee/clickhouse/queries/stickiness/stickiness_event_query.py @@ -61,6 +61,6 @@ class StickinessEventsQuery(ClickhouseEventQuery): def get_actions_query(self) -> Tuple[str, Dict[str, Any]]: if self._entity.type == TREND_FILTER_TYPE_ACTIONS: - return format_action_filter(self._entity.get_action()) + return format_action_filter(team_id=self._team_id, action=self._entity.get_action()) else: return "event = %(event)s", {"event": self._entity.id} diff --git a/ee/clickhouse/queries/trends/breakdown.py b/ee/clickhouse/queries/trends/breakdown.py index c5304b22790..2de64d575a4 100644 --- a/ee/clickhouse/queries/trends/breakdown.py +++ b/ee/clickhouse/queries/trends/breakdown.py @@ -67,7 +67,8 @@ class ClickhouseTrendsBreakdown: props_to_filter = [*self.filter.properties, *self.entity.properties] prop_filters, prop_filter_params = parse_prop_grouped_clauses( - PropertyGroup(type=PropertyOperatorType.AND, groups=props_to_filter), + team_id=self.team_id, + property_group=PropertyGroup(type=PropertyOperatorType.AND, groups=props_to_filter), table_name="e", person_properties_mode=PersonPropertiesMode.EXCLUDE, ) @@ -77,7 +78,7 @@ class ClickhouseTrendsBreakdown: action_params: Dict = {} if self.entity.type == TREND_FILTER_TYPE_ACTIONS: action = self.entity.get_action() - action_query, action_params = format_action_filter(action, table_name="e") + action_query, action_params = format_action_filter(team_id=self.team_id, action=action, table_name="e") self.params = { **self.params, diff --git a/ee/clickhouse/queries/trends/lifecycle.py b/ee/clickhouse/queries/trends/lifecycle.py index 5f6b36db69d..a5cf3114a8c 100644 --- a/ee/clickhouse/queries/trends/lifecycle.py +++ b/ee/clickhouse/queries/trends/lifecycle.py @@ -98,7 +98,7 @@ class LifecycleEventQuery(ClickhouseEventQuery): self.params.update(groups_params) entity_params, entity_format_params = get_entity_filtering_params( - self._filter.entities[0], self._team_id, table_name=self.EVENT_TABLE_ALIAS + entity=self._filter.entities[0], team_id=self._team_id, table_name=self.EVENT_TABLE_ALIAS ) self.params.update(entity_params) diff --git a/ee/clickhouse/queries/trends/trend_event_query.py b/ee/clickhouse/queries/trends/trend_event_query.py index d53f6444114..a2bba42f7ea 100644 --- a/ee/clickhouse/queries/trends/trend_event_query.py +++ b/ee/clickhouse/queries/trends/trend_event_query.py @@ -112,7 +112,7 @@ class TrendsEventQuery(ClickhouseEventQuery): def _get_entity_query(self) -> Tuple[str, Dict]: entity_params, entity_format_params = get_entity_filtering_params( - self._entity, self._team_id, table_name=self.EVENT_TABLE_ALIAS + entity=self._entity, team_id=self._team_id, table_name=self.EVENT_TABLE_ALIAS ) return entity_format_params["entity_query"], entity_params diff --git a/ee/clickhouse/sql/person.py b/ee/clickhouse/sql/person.py index 6f3677a0216..a357ff29fbc 100644 --- a/ee/clickhouse/sql/person.py +++ b/ee/clickhouse/sql/person.py @@ -319,16 +319,6 @@ INSERT INTO {cohort_table} SELECT generateUUIDv4(), actor_id, %(cohort_id)s, %(t ) """ -PEOPLE_SQL = """ -SELECT id, created_at, team_id, properties, is_identified, groupArray(distinct_id) FROM ( - {latest_person_sql} -) as person INNER JOIN ( - SELECT person_id, distinct_id FROM ({GET_TEAM_PERSON_DISTINCT_IDS}) WHERE person_id IN ({content_sql}) -) as pdi ON person.id = pdi.person_id -GROUP BY id, created_at, team_id, properties, is_identified -LIMIT 100 OFFSET %(offset)s -""" - INSERT_COHORT_ALL_PEOPLE_SQL = """ INSERT INTO {cohort_table} SELECT generateUUIDv4(), id, %(cohort_id)s, %(team_id)s, %(_timestamp)s, 0 FROM ( SELECT id FROM ( diff --git a/ee/clickhouse/sql/trends/volume.py b/ee/clickhouse/sql/trends/volume.py index 8bea2d89540..48178294e9d 100644 --- a/ee/clickhouse/sql/trends/volume.py +++ b/ee/clickhouse/sql/trends/volume.py @@ -19,21 +19,6 @@ SELECT counts as total, timestamp as day_start FROM ( ) WHERE 1 = 1 {parsed_date_from} {parsed_date_to} """ -PERSONS_ACTIVE_USER_SQL = """ -SELECT DISTINCT person_id FROM ( - SELECT d.timestamp, person_id FROM ( - SELECT toStartOfDay(timestamp) as timestamp FROM events WHERE team_id = %(team_id)s {parsed_date_from_prev_range} {parsed_date_to} GROUP BY timestamp - ) d - CROSS JOIN ( - SELECT toStartOfDay(timestamp) as timestamp, person_id FROM events INNER JOIN ( - {GET_TEAM_PERSON_DISTINCT_IDS} - ) AS pdi - ON events.distinct_id = pdi.distinct_id - WHERE team_id = %(team_id)s {entity_query} {filters} {parsed_date_from_prev_range} {parsed_date_to} GROUP BY timestamp, person_id - ) e WHERE e.timestamp <= d.timestamp AND e.timestamp > d.timestamp - INTERVAL {prev_interval} -) WHERE 1 = 1 {parsed_date_from} {parsed_date_to} -""" - AGGREGATE_SQL = """ SELECT groupArray(day_start) as date, groupArray(count) as data FROM ( SELECT SUM(total) AS count, day_start from ({null_sql} UNION ALL {content_sql}) group by day_start order by day_start diff --git a/ee/clickhouse/views/element.py b/ee/clickhouse/views/element.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ee/clickhouse/views/events.py b/ee/clickhouse/views/events.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/posthog/api/action.py b/posthog/api/action.py index 03b88264e7c..a66ab48ef01 100644 --- a/posthog/api/action.py +++ b/posthog/api/action.py @@ -249,7 +249,7 @@ class ActionViewSet(TaggedItemViewSetMixin, StructuredViewSetMixin, viewsets.Mod @action(methods=["GET"], detail=True) def count(self, request: request.Request, **kwargs) -> Response: action = self.get_object() - query, params = format_action_filter(action) + query, params = format_action_filter(team_id=action.team_id, action=action) if query == "": return Response({"count": 0}) diff --git a/posthog/api/element.py b/posthog/api/element.py index 7dfcecdad6b..54cc4d18a1b 100644 --- a/posthog/api/element.py +++ b/posthog/api/element.py @@ -50,7 +50,9 @@ class ElementViewSet(StructuredViewSetMixin, viewsets.ModelViewSet): date_from, date_to, date_params = parse_timestamps(filter, team_id=self.team.pk) - prop_filters, prop_filter_params = parse_prop_grouped_clauses(filter.property_groups) + prop_filters, prop_filter_params = parse_prop_grouped_clauses( + team_id=self.team.pk, property_group=filter.property_groups + ) result = sync_execute( GET_ELEMENTS.format(date_from=date_from, date_to=date_to, query=prop_filters), {"team_id": self.team.pk, **prop_filter_params, **date_params}, diff --git a/posthog/api/event.py b/posthog/api/event.py index 883a8209f31..4c261e938b5 100644 --- a/posthog/api/event.py +++ b/posthog/api/event.py @@ -137,7 +137,7 @@ class EventViewSet(StructuredViewSetMixin, mixins.RetrieveModelMixin, mixins.Lis long_date_from, ) prop_filters, prop_filter_params = parse_prop_grouped_clauses( - filter.property_groups, has_person_id_joined=False + team_id=team.pk, property_group=filter.property_groups, has_person_id_joined=False ) if request.GET.get("action_id"): @@ -147,7 +147,7 @@ class EventViewSet(StructuredViewSetMixin, mixins.RetrieveModelMixin, mixins.Lis return [] if action.steps.count() == 0: return [] - action_query, params = format_action_filter(action) + action_query, params = format_action_filter(team_id=team.pk, action=action) prop_filters += " AND {}".format(action_query) prop_filter_params = {**prop_filter_params, **params}