diff --git a/ee/clickhouse/materialized_columns/test/test_columns.py b/ee/clickhouse/materialized_columns/test/test_columns.py index 5adb6815ffa..20fef28abc0 100644 --- a/ee/clickhouse/materialized_columns/test/test_columns.py +++ b/ee/clickhouse/materialized_columns/test/test_columns.py @@ -18,7 +18,10 @@ from posthog.constants import GROUP_TYPES_LIMIT from posthog.settings import CLICKHOUSE_DATABASE from posthog.test.base import BaseTest -GROUPS_COLUMNS = [f"$group_{i}" for i in range(GROUP_TYPES_LIMIT)] +EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS = [f"$group_{i}" for i in range(GROUP_TYPES_LIMIT)] + [ + "$session_id", + "$window_id", +] def _create_event(**kwargs): @@ -30,7 +33,7 @@ def _create_event(**kwargs): class TestMaterializedColumns(ClickhouseTestMixin, ClickhouseDestroyTablesMixin, BaseTest): def test_get_columns_default(self): - self.assertCountEqual(get_materialized_columns("events"), GROUPS_COLUMNS) + self.assertCountEqual(get_materialized_columns("events"), EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS) self.assertCountEqual(get_materialized_columns("person"), []) self.assertEqual( get_materialized_columns("session_recording_events"), {"has_full_snapshot": "has_full_snapshot"} @@ -43,19 +46,22 @@ class TestMaterializedColumns(ClickhouseTestMixin, ClickhouseDestroyTablesMixin, materialize("person", "$zeta") self.assertCountEqual( - get_materialized_columns("events", use_cache=True).keys(), ["$foo", "$bar", *GROUPS_COLUMNS] + get_materialized_columns("events", use_cache=True).keys(), + ["$foo", "$bar", *EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS], ) self.assertCountEqual(get_materialized_columns("person", use_cache=True).keys(), ["$zeta"]) materialize("events", "abc") self.assertCountEqual( - get_materialized_columns("events", use_cache=True).keys(), ["$foo", "$bar", *GROUPS_COLUMNS] + get_materialized_columns("events", use_cache=True).keys(), + ["$foo", "$bar", *EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS], ) with freeze_time("2020-01-04T14:00:01Z"): self.assertCountEqual( - get_materialized_columns("events", use_cache=True).keys(), ["$foo", "$bar", "abc", *GROUPS_COLUMNS] + get_materialized_columns("events", use_cache=True).keys(), + ["$foo", "$bar", "abc", *EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS], ) def test_materialized_column_naming(self): diff --git a/ee/clickhouse/migrations/0024_materialize_window_and_session_id.py b/ee/clickhouse/migrations/0024_materialize_window_and_session_id.py new file mode 100644 index 00000000000..0e96d060f39 --- /dev/null +++ b/ee/clickhouse/migrations/0024_materialize_window_and_session_id.py @@ -0,0 +1,19 @@ +from infi.clickhouse_orm import migrations + +from ee.clickhouse.materialized_columns.columns import materialize + + +def create_materialized_columns(database): + try: + materialize("events", "$session_id", "$session_id") + except ValueError: + # session_id is already materialized, skip + pass + try: + materialize("events", "$window_id", "$window_id") + except ValueError: + # window_id is already materialized, skip + pass + + +operations = [migrations.RunPython(create_materialized_columns)] diff --git a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation_persons.ambr b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation_persons.ambr index 327eeedb65b..77080782028 100644 --- a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation_persons.ambr +++ b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation_persons.ambr @@ -87,10 +87,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, @@ -310,10 +308,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, @@ -463,10 +459,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, @@ -605,10 +599,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, @@ -745,10 +737,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, diff --git a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_persons.ambr b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_persons.ambr index 9f8cb39cf4f..16d7e36f13b 100644 --- a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_persons.ambr +++ b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_persons.ambr @@ -148,10 +148,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, @@ -337,10 +335,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, @@ -526,10 +522,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, diff --git a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_strict_persons.ambr b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_strict_persons.ambr index d9212c0f8bc..93ff192bff5 100644 --- a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_strict_persons.ambr +++ b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_strict_persons.ambr @@ -108,10 +108,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, @@ -254,10 +252,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, @@ -400,10 +396,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, diff --git a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_unordered_persons.ambr b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_unordered_persons.ambr index c720ef970c4..247b2970716 100644 --- a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_unordered_persons.ambr +++ b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_unordered_persons.ambr @@ -76,10 +76,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, @@ -155,10 +153,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, @@ -234,10 +230,8 @@ e.timestamp as timestamp, pdi.person_id as aggregation_target, e.uuid AS uuid, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id", - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id" + e.$session_id as "$session_id", + e.$window_id as "$window_id" FROM events e INNER JOIN (SELECT distinct_id, diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_correlation_persons.py b/ee/clickhouse/queries/funnels/test/test_funnel_correlation_persons.py index 36def668046..4611572edfc 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_correlation_persons.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_correlation_persons.py @@ -15,7 +15,7 @@ from posthog.constants import INSIGHT_FUNNELS from posthog.models import Cohort, Filter from posthog.models.person import Person from posthog.tasks.calculate_cohort import insert_cohort_from_insight_filter -from posthog.test.base import APIBaseTest, test_with_materialized_columns +from posthog.test.base import APIBaseTest FORMAT_TIME = "%Y-%m-%d 00:00:00" MAX_STEP_COLUMN = 0 @@ -265,7 +265,6 @@ class TestClickhouseFunnelCorrelationActors(ClickhouseTestMixin, APIBaseTest): self.assertCountEqual([str(val["id"]) for val in serialized_actors], [str(people["user_1"].uuid)]) @snapshot_clickhouse_queries - @test_with_materialized_columns(event_properties=["$window_id", "$session_id"]) @freeze_time("2021-01-02 00:00:00.000Z") def test_funnel_correlation_on_event_with_recordings(self): p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"}) @@ -365,7 +364,6 @@ class TestClickhouseFunnelCorrelationActors(ClickhouseTestMixin, APIBaseTest): ) @snapshot_clickhouse_queries - @test_with_materialized_columns(event_properties=["$window_id", "$session_id"]) @freeze_time("2021-01-02 00:00:00.000Z") def test_funnel_correlation_on_properties_with_recordings(self): p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"}) @@ -423,7 +421,6 @@ class TestClickhouseFunnelCorrelationActors(ClickhouseTestMixin, APIBaseTest): ) @snapshot_clickhouse_queries - @test_with_materialized_columns(event_properties=["$window_id", "$session_id"]) @freeze_time("2021-01-02 00:00:00.000Z") def test_strict_funnel_correlation_with_recordings(self): diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_persons.py b/ee/clickhouse/queries/funnels/test/test_funnel_persons.py index 64b88374411..463ad48e345 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_persons.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_persons.py @@ -433,7 +433,6 @@ class TestFunnelPersons(ClickhouseTestMixin, APIBaseTest): self.assertEqual(results[0]["id"], person.uuid) @snapshot_clickhouse_queries - @test_with_materialized_columns(event_properties=["$window_id", "$session_id"]) @freeze_time("2021-01-02 00:00:00.000Z") def test_funnel_person_recordings(self): p1 = _create_person(distinct_ids=[f"user_1"], team=self.team) diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_strict_persons.py b/ee/clickhouse/queries/funnels/test/test_funnel_strict_persons.py index ca967ed96a9..309e0228808 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_strict_persons.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_strict_persons.py @@ -12,7 +12,7 @@ from ee.clickhouse.util import ClickhouseTestMixin, snapshot_clickhouse_queries from posthog.constants import INSIGHT_FUNNELS from posthog.models.filters import Filter from posthog.models.person import Person -from posthog.test.base import APIBaseTest, test_with_materialized_columns +from posthog.test.base import APIBaseTest FORMAT_TIME = "%Y-%m-%d 00:00:00" @@ -139,7 +139,6 @@ class TestFunnelStrictStepsPersons(ClickhouseTestMixin, APIBaseTest): self.assertEqual(0, len(serialized_results)) @snapshot_clickhouse_queries - @test_with_materialized_columns(event_properties=["$window_id", "$session_id"]) @freeze_time("2021-01-02 00:00:00.000Z") def test_strict_funnel_person_recordings(self): p1 = _create_person(distinct_ids=[f"user_1"], team=self.team) diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_unordered_persons.py b/ee/clickhouse/queries/funnels/test/test_funnel_unordered_persons.py index dfcf7e3267d..1fb791ff04f 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_unordered_persons.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_unordered_persons.py @@ -13,7 +13,7 @@ from ee.clickhouse.util import ClickhouseTestMixin, snapshot_clickhouse_queries from posthog.constants import INSIGHT_FUNNELS from posthog.models.filters import Filter from posthog.models.person import Person -from posthog.test.base import APIBaseTest, test_with_materialized_columns +from posthog.test.base import APIBaseTest FORMAT_TIME = "%Y-%m-%d 00:00:00" @@ -161,7 +161,6 @@ class TestFunnelUnorderedStepsPersons(ClickhouseTestMixin, APIBaseTest): self.assertEqual(10, len(serialized_results)) @snapshot_clickhouse_queries - @test_with_materialized_columns(event_properties=["$window_id", "$session_id"]) @freeze_time("2021-01-02 00:00:00.000Z") def test_unordered_funnel_does_not_return_recordings(self): p1 = _create_person(distinct_ids=[f"user_1"], team=self.team) diff --git a/ee/clickhouse/queries/session_recordings/test/__snapshots__/test_clickhouse_session_recording_list.ambr b/ee/clickhouse/queries/session_recordings/test/__snapshots__/test_clickhouse_session_recording_list.ambr index 585e1cd45fc..a53572461ce 100644 --- a/ee/clickhouse/queries/session_recordings/test/__snapshots__/test_clickhouse_session_recording_list.ambr +++ b/ee/clickhouse/queries/session_recordings/test/__snapshots__/test_clickhouse_session_recording_list.ambr @@ -12,8 +12,7 @@ event, team_id, timestamp , - trim(BOTH '"' - FROM JSONExtractRaw(properties, '$session_id')) as session_id + $session_id as session_id FROM events WHERE team_id = 2 AND event IN ['$pageview'] @@ -69,8 +68,7 @@ event, team_id, timestamp , - trim(BOTH '"' - FROM JSONExtractRaw(properties, '$session_id')) as session_id + $session_id as session_id FROM events WHERE team_id = 2 AND event IN ['$autocapture'] @@ -304,115 +302,6 @@ OFFSET 0 ' --- -# name: TestClickhouseSessionRecordingsList.test_event_filter_with_cohort_properties.5 - ' - - SELECT DISTINCT p.id - FROM - (SELECT * - FROM person - JOIN - (SELECT id, - max(_timestamp) as _timestamp, - max(is_deleted) as is_deleted - FROM person - WHERE team_id = 2 - GROUP BY id) as person_max ON person.id = person_max.id - AND person._timestamp = person_max._timestamp - WHERE team_id = 2 - AND person_max.is_deleted = 0 ) AS p - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON p.id = pdi.person_id - WHERE team_id = 2 - AND pdi.distinct_id IN - (SELECT distinct_id - FROM - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) - WHERE person_id IN - (select id - from - (SELECT * - FROM person - JOIN - (SELECT id, - max(_timestamp) as _timestamp, - max(is_deleted) as is_deleted - FROM person - WHERE team_id = 2 - GROUP BY id) as person_max ON person.id = person_max.id - AND person._timestamp = person_max._timestamp - WHERE team_id = 2 - AND person_max.is_deleted = 0 - AND (has(['some_val'], trim(BOTH '"' - FROM JSONExtractRaw(properties, '$some_prop')))) )) ) - ORDER BY _timestamp ASC - LIMIT 50000 - OFFSET 50000 - ' ---- -# name: TestClickhouseSessionRecordingsList.test_event_filter_with_cohort_properties.6 - ' - - SELECT session_recordings.session_id, - any(session_recordings.start_time) as start_time, - any(session_recordings.end_time) as end_time, - any(session_recordings.duration) as duration, - any(session_recordings.distinct_id) as distinct_id - FROM - (SELECT session_id, - any(window_id) as window_id, - MIN(timestamp) AS start_time, - MAX(timestamp) AS end_time, - dateDiff('second', toDateTime(MIN(timestamp)), toDateTime(MAX(timestamp))) as duration, - any(distinct_id) as distinct_id, - SUM(has_full_snapshot) as full_snapshots - FROM session_recording_events - WHERE team_id = 2 - AND timestamp >= '2021-08-13 12:00:00' - AND timestamp <= '2021-08-22 08:00:00' - GROUP BY session_id - HAVING full_snapshots > 0 - AND start_time >= '2021-08-14 00:00:00' - AND start_time <= '2021-08-21 20:00:00') AS session_recordings - JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) as pdi ON pdi.distinct_id = session_recordings.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 2 - GROUP BY id - HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id - WHERE 1 = 1 - AND person_id IN - (SELECT person_id - FROM cohortpeople - WHERE team_id = 2 - AND cohort_id = 2 - GROUP BY person_id, - cohort_id, - team_id - HAVING sum(sign) > 0) - GROUP BY session_recordings.session_id - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 - ' ---- # name: TestClickhouseSessionRecordingsList.test_event_filter_with_matching_on_session_id ' @@ -427,8 +316,7 @@ event, team_id, timestamp , - trim(BOTH '"' - FROM JSONExtractRaw(properties, '$session_id')) as session_id + $session_id as session_id FROM events WHERE team_id = 2 AND event IN ['$pageview'] @@ -484,8 +372,7 @@ event, team_id, timestamp , - trim(BOTH '"' - FROM JSONExtractRaw(properties, '$session_id')) as session_id + $session_id as session_id FROM events WHERE team_id = 2 AND event IN ['$autocapture'] diff --git a/ee/clickhouse/queries/session_recordings/test/test_clickhouse_session_recording_list.py b/ee/clickhouse/queries/session_recordings/test/test_clickhouse_session_recording_list.py index 424cda9263d..35373bf65df 100644 --- a/ee/clickhouse/queries/session_recordings/test/test_clickhouse_session_recording_list.py +++ b/ee/clickhouse/queries/session_recordings/test/test_clickhouse_session_recording_list.py @@ -79,7 +79,7 @@ class TestClickhouseSessionRecordingsList(ClickhouseTestMixin, factory_session_r @freeze_time("2021-01-21T20:00:00.000Z") @snapshot_clickhouse_queries - @test_with_materialized_columns(["$current_url", "$session_id"]) + @test_with_materialized_columns(["$current_url"]) def test_event_filter_with_matching_on_session_id(self): Person.objects.create(team=self.team, distinct_ids=["user"], properties={"email": "bla"}) self.create_snapshot("user", "1", self.base_time, window_id="1") @@ -104,7 +104,7 @@ class TestClickhouseSessionRecordingsList(ClickhouseTestMixin, factory_session_r @freeze_time("2021-01-21T20:00:00.000Z") @snapshot_clickhouse_queries - @test_with_materialized_columns(["$current_url", "$session_id"]) + @test_with_materialized_columns(["$current_url"]) def test_event_filter_matching_with_no_session_id(self): Person.objects.create(team=self.team, distinct_ids=["user"], properties={"email": "bla"}) self.create_snapshot("user", "1", self.base_time, window_id="1") diff --git a/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr b/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr index fb3b6f065a0..825133ee7bb 100644 --- a/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr +++ b/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr @@ -1307,10 +1307,8 @@ pdi.person_id as person_id, e.uuid AS uuid, e.timestamp AS timestamp, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id, + e.$session_id as $session_id, + e.$window_id as $window_id, if(0, '', if(e.event = '$pageview', if(length(trim(BOTH '"' FROM JSONExtractRaw(properties, '$current_url'))) > 1, trim(TRAILING '/' FROM trim(BOTH '"' @@ -1434,10 +1432,8 @@ pdi.person_id as person_id, e.uuid AS uuid, e.timestamp AS timestamp, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id, + e.$session_id as $session_id, + e.$window_id as $window_id, if(0, '', if(e.event = '$pageview', if(length(trim(BOTH '"' FROM JSONExtractRaw(properties, '$current_url'))) > 1, trim(TRAILING '/' FROM trim(BOTH '"' @@ -1561,10 +1557,8 @@ pdi.person_id as person_id, e.uuid AS uuid, e.timestamp AS timestamp, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id, + e.$session_id as $session_id, + e.$window_id as $window_id, if(0, '', if(e.event = '$pageview', if(length(trim(BOTH '"' FROM JSONExtractRaw(properties, '$current_url'))) > 1, trim(TRAILING '/' FROM trim(BOTH '"' @@ -1694,10 +1688,8 @@ pdi.person_id as person_id, e.uuid AS uuid, e.timestamp AS timestamp, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id, + e.$session_id as $session_id, + e.$window_id as $window_id, if(0, '', if(e.event = '$pageview', if(length(trim(BOTH '"' FROM JSONExtractRaw(properties, '$current_url'))) > 1, trim(TRAILING '/' FROM trim(BOTH '"' diff --git a/ee/clickhouse/queries/test/test_paths.py b/ee/clickhouse/queries/test/test_paths.py index 8671e2a09e2..c103fe1fec6 100644 --- a/ee/clickhouse/queries/test/test_paths.py +++ b/ee/clickhouse/queries/test/test_paths.py @@ -2782,7 +2782,6 @@ class TestClickhousePaths(ClickhouseTestMixin, paths_test_factory(ClickhousePath # Note: not using `@snapshot_clickhouse_queries` here because the ordering of the session_ids in the recording # query is not guaranteed, so adding it would lead to a flaky test. - @test_with_materialized_columns(["$current_url", "$window_id", "$session_id"]) @freeze_time("2012-01-01T03:21:34.000Z") def test_path_recording(self): # User with 2 matching paths with recordings @@ -2886,7 +2885,6 @@ class TestClickhousePaths(ClickhouseTestMixin, paths_test_factory(ClickhousePath self.assertEqual([], matched_recordings[1]) @snapshot_clickhouse_queries - @test_with_materialized_columns(["$current_url", "$window_id", "$session_id"]) @freeze_time("2012-01-01T03:21:34.000Z") def test_path_recording_with_no_window_or_session_id(self): p1 = Person.objects.create(team_id=self.team.pk, distinct_ids=["p1"]) @@ -2925,7 +2923,6 @@ class TestClickhousePaths(ClickhouseTestMixin, paths_test_factory(ClickhousePath ) @snapshot_clickhouse_queries - @test_with_materialized_columns(["$current_url", "$window_id", "$session_id"]) @freeze_time("2012-01-01T03:21:34.000Z") def test_path_recording_with_start_and_end(self): p1 = Person.objects.create(team_id=self.team.pk, distinct_ids=["p1"]) @@ -2990,7 +2987,6 @@ class TestClickhousePaths(ClickhouseTestMixin, paths_test_factory(ClickhousePath ) @snapshot_clickhouse_queries - @test_with_materialized_columns(["$current_url", "$window_id", "$session_id"]) @freeze_time("2012-01-01T03:21:34.000Z") def test_path_recording_for_dropoff(self): p1 = Person.objects.create(team_id=self.team.pk, distinct_ids=["p1"]) diff --git a/ee/clickhouse/queries/trends/test/__snapshots__/test_person.ambr b/ee/clickhouse/queries/trends/test/__snapshots__/test_person.ambr index 3d93c37ad06..4a4e17aa5bd 100644 --- a/ee/clickhouse/queries/trends/test/__snapshots__/test_person.ambr +++ b/ee/clickhouse/queries/trends/test/__snapshots__/test_person.ambr @@ -9,10 +9,8 @@ FROM (SELECT e.timestamp as timestamp, e.$group_0 as $group_0, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id, + e.$window_id as $window_id, + e.$session_id as $session_id, e.uuid as uuid FROM events e WHERE team_id = 2 diff --git a/ee/clickhouse/queries/trends/test/test_person.py b/ee/clickhouse/queries/trends/test/test_person.py index 1b648afbdf1..58cd23b4192 100644 --- a/ee/clickhouse/queries/trends/test/test_person.py +++ b/ee/clickhouse/queries/trends/test/test_person.py @@ -13,7 +13,7 @@ from posthog.models.entity import Entity from posthog.models.filters import Filter from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.person import Person -from posthog.test.base import APIBaseTest, test_with_materialized_columns +from posthog.test.base import APIBaseTest def _create_person(**kwargs): @@ -42,7 +42,6 @@ class TestPerson(ClickhouseTestMixin, APIBaseTest): # Note: not using `@snapshot_clickhouse_queries` here because the ordering of the session_ids in the recording # query is not guaranteed, so adding it would lead to a flaky test. - @test_with_materialized_columns(event_properties=["$session_id", "$window_id"]) @freeze_time("2021-01-21T20:00:00.000Z") def test_person_query_includes_recording_events(self): _create_person(team_id=self.team.pk, distinct_ids=["u1"], properties={"email": "bla"}) @@ -132,7 +131,6 @@ class TestPerson(ClickhouseTestMixin, APIBaseTest): self.assertEqual(serialized_actors[0].get("matched_recordings"), None) @snapshot_clickhouse_queries - @test_with_materialized_columns(event_properties=["$session_id", "$window_id"]) @freeze_time("2021-01-21T20:00:00.000Z") def test_group_query_includes_recording_events(self): GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0) diff --git a/ee/clickhouse/sql/events.py b/ee/clickhouse/sql/events.py index 64fb283790a..47c469f33cb 100644 --- a/ee/clickhouse/sql/events.py +++ b/ee/clickhouse/sql/events.py @@ -2,7 +2,6 @@ from ee.kafka_client.topics import KAFKA_EVENTS from posthog.settings import CLICKHOUSE_CLUSTER, CLICKHOUSE_DATABASE, DEBUG from .clickhouse import KAFKA_COLUMNS, REPLACING_MERGE_TREE, STORAGE_POLICY, kafka_engine, table_engine -from .person import GET_TEAM_PERSON_DISTINCT_IDS EVENTS_TABLE = "events" @@ -31,6 +30,9 @@ EVENTS_TABLE_MATERIALIZED_COLUMNS = """ , $group_2 VARCHAR materialized trim(BOTH '\"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2' , $group_3 VARCHAR materialized trim(BOTH '\"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3' , $group_4 VARCHAR materialized trim(BOTH '\"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4' + , $window_id VARCHAR materialized trim(BOTH '\"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer::$window_id' + , $session_id VARCHAR materialized trim(BOTH '\"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer::$session_id' + """ # :KLUDGE: This is not in sync with reality on cloud! Instead a distributed table engine is used with a sharded_events table. diff --git a/ee/clickhouse/sql/test/__snapshots__/test_schema.ambr b/ee/clickhouse/sql/test/__snapshots__/test_schema.ambr index 35e68492ae1..2dad45b54aa 100644 --- a/ee/clickhouse/sql/test/__snapshots__/test_schema.ambr +++ b/ee/clickhouse/sql/test/__snapshots__/test_schema.ambr @@ -341,7 +341,7 @@ ' --- -# name: test_create_table_query[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer.$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer.$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer.$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer.$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer.test_create_table_query[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer::$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer::$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4'\n\n \n, _timestamp DateTime\n, _offset UInt64\n\n) ENGINE = ReplacingMergeTree(_timestamp)\nPARTITION BY toYYYYMM(timestamp)\nORDER BY (team_id, toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid))\n\n\n] +# name: test_create_table_query[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer.$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer.$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer.$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer.$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer.$group_4'\n , $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer.$window_id'\n , $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer.test_create_table_query[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer::$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer::$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4'\n , $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer::$window_id'\n , $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer::$session_id'\n\n\n \n, _timestamp DateTime\n, _offset UInt64\n\n) ENGINE = ReplacingMergeTree(_timestamp)\nPARTITION BY toYYYYMM(timestamp)\nORDER BY (team_id, toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid))\n\n\n] ' CREATE TABLE IF NOT EXISTS events ON CLUSTER posthog @@ -360,6 +360,9 @@ , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2' , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3' , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4' + , $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer::$window_id' + , $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer::$session_id' + , _timestamp DateTime @@ -726,7 +729,7 @@ ' --- -# name: test_create_table_query_replicated_and_storage[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer.$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer.$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer.$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer.$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer.test_create_table_query_replicated_and_storage[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer::$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer::$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4'\n\n \n, _timestamp DateTime\n, _offset UInt64\n\n) ENGINE = ReplacingMergeTree(_timestamp)\nPARTITION BY toYYYYMM(timestamp)\nORDER BY (team_id, toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid))\n\n\n] +# name: test_create_table_query_replicated_and_storage[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer.$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer.$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer.$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer.$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer.$group_4'\n , $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer.$window_id'\n , $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer.test_create_table_query_replicated_and_storage[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer::$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer::$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4'\n , $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer::$window_id'\n , $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer::$session_id'\n\n\n \n, _timestamp DateTime\n, _offset UInt64\n\n) ENGINE = ReplacingMergeTree(_timestamp)\nPARTITION BY toYYYYMM(timestamp)\nORDER BY (team_id, toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid))\n\n\n] ' CREATE TABLE IF NOT EXISTS events ON CLUSTER posthog @@ -745,6 +748,9 @@ , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2' , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3' , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4' + , $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer::$window_id' + , $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer::$session_id' + , _timestamp DateTime diff --git a/posthog/api/test/__snapshots__/test_action_people.ambr b/posthog/api/test/__snapshots__/test_action_people.ambr index 58df13fa87e..446660500ef 100644 --- a/posthog/api/test/__snapshots__/test_action_people.ambr +++ b/posthog/api/test/__snapshots__/test_action_people.ambr @@ -9,10 +9,8 @@ FROM (SELECT e.timestamp as timestamp, e.properties as properties, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id, - trim(BOTH '"' - FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id, + e.$window_id as $window_id, + e.$session_id as $session_id, pdi.person_id as person_id, e.distinct_id as distinct_id, e.team_id as team_id,