0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 09:16:49 +01:00

Materialize the window_id and session_id on the events table (#8532)

* materialize columns

* update snapshots

* remove test_with_materialized_column

* update recording snapshot

* update new funnel tests

* update snapshots

* fix test

* update snapshots pt 2
This commit is contained in:
Rick Marron 2022-02-15 16:41:30 -08:00 committed by GitHub
parent b406b316ba
commit 52a9054900
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 91 additions and 223 deletions

View File

@ -18,7 +18,10 @@ from posthog.constants import GROUP_TYPES_LIMIT
from posthog.settings import CLICKHOUSE_DATABASE
from posthog.test.base import BaseTest
GROUPS_COLUMNS = [f"$group_{i}" for i in range(GROUP_TYPES_LIMIT)]
EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS = [f"$group_{i}" for i in range(GROUP_TYPES_LIMIT)] + [
"$session_id",
"$window_id",
]
def _create_event(**kwargs):
@ -30,7 +33,7 @@ def _create_event(**kwargs):
class TestMaterializedColumns(ClickhouseTestMixin, ClickhouseDestroyTablesMixin, BaseTest):
def test_get_columns_default(self):
self.assertCountEqual(get_materialized_columns("events"), GROUPS_COLUMNS)
self.assertCountEqual(get_materialized_columns("events"), EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS)
self.assertCountEqual(get_materialized_columns("person"), [])
self.assertEqual(
get_materialized_columns("session_recording_events"), {"has_full_snapshot": "has_full_snapshot"}
@ -43,19 +46,22 @@ class TestMaterializedColumns(ClickhouseTestMixin, ClickhouseDestroyTablesMixin,
materialize("person", "$zeta")
self.assertCountEqual(
get_materialized_columns("events", use_cache=True).keys(), ["$foo", "$bar", *GROUPS_COLUMNS]
get_materialized_columns("events", use_cache=True).keys(),
["$foo", "$bar", *EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS],
)
self.assertCountEqual(get_materialized_columns("person", use_cache=True).keys(), ["$zeta"])
materialize("events", "abc")
self.assertCountEqual(
get_materialized_columns("events", use_cache=True).keys(), ["$foo", "$bar", *GROUPS_COLUMNS]
get_materialized_columns("events", use_cache=True).keys(),
["$foo", "$bar", *EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS],
)
with freeze_time("2020-01-04T14:00:01Z"):
self.assertCountEqual(
get_materialized_columns("events", use_cache=True).keys(), ["$foo", "$bar", "abc", *GROUPS_COLUMNS]
get_materialized_columns("events", use_cache=True).keys(),
["$foo", "$bar", "abc", *EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS],
)
def test_materialized_column_naming(self):

View File

@ -0,0 +1,19 @@
from infi.clickhouse_orm import migrations
from ee.clickhouse.materialized_columns.columns import materialize
def create_materialized_columns(database):
try:
materialize("events", "$session_id", "$session_id")
except ValueError:
# session_id is already materialized, skip
pass
try:
materialize("events", "$window_id", "$window_id")
except ValueError:
# window_id is already materialized, skip
pass
operations = [migrations.RunPython(create_materialized_columns)]

View File

@ -87,10 +87,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,
@ -310,10 +308,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,
@ -463,10 +459,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,
@ -605,10 +599,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,
@ -745,10 +737,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,

View File

@ -148,10 +148,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,
@ -337,10 +335,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,
@ -526,10 +522,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,

View File

@ -108,10 +108,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,
@ -254,10 +252,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,
@ -400,10 +396,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,

View File

@ -76,10 +76,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,
@ -155,10 +153,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,
@ -234,10 +230,8 @@
e.timestamp as timestamp,
pdi.person_id as aggregation_target,
e.uuid AS uuid,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as "$session_id",
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as "$window_id"
e.$session_id as "$session_id",
e.$window_id as "$window_id"
FROM events e
INNER JOIN
(SELECT distinct_id,

View File

@ -15,7 +15,7 @@ from posthog.constants import INSIGHT_FUNNELS
from posthog.models import Cohort, Filter
from posthog.models.person import Person
from posthog.tasks.calculate_cohort import insert_cohort_from_insight_filter
from posthog.test.base import APIBaseTest, test_with_materialized_columns
from posthog.test.base import APIBaseTest
FORMAT_TIME = "%Y-%m-%d 00:00:00"
MAX_STEP_COLUMN = 0
@ -265,7 +265,6 @@ class TestClickhouseFunnelCorrelationActors(ClickhouseTestMixin, APIBaseTest):
self.assertCountEqual([str(val["id"]) for val in serialized_actors], [str(people["user_1"].uuid)])
@snapshot_clickhouse_queries
@test_with_materialized_columns(event_properties=["$window_id", "$session_id"])
@freeze_time("2021-01-02 00:00:00.000Z")
def test_funnel_correlation_on_event_with_recordings(self):
p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"})
@ -365,7 +364,6 @@ class TestClickhouseFunnelCorrelationActors(ClickhouseTestMixin, APIBaseTest):
)
@snapshot_clickhouse_queries
@test_with_materialized_columns(event_properties=["$window_id", "$session_id"])
@freeze_time("2021-01-02 00:00:00.000Z")
def test_funnel_correlation_on_properties_with_recordings(self):
p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"})
@ -423,7 +421,6 @@ class TestClickhouseFunnelCorrelationActors(ClickhouseTestMixin, APIBaseTest):
)
@snapshot_clickhouse_queries
@test_with_materialized_columns(event_properties=["$window_id", "$session_id"])
@freeze_time("2021-01-02 00:00:00.000Z")
def test_strict_funnel_correlation_with_recordings(self):

View File

@ -433,7 +433,6 @@ class TestFunnelPersons(ClickhouseTestMixin, APIBaseTest):
self.assertEqual(results[0]["id"], person.uuid)
@snapshot_clickhouse_queries
@test_with_materialized_columns(event_properties=["$window_id", "$session_id"])
@freeze_time("2021-01-02 00:00:00.000Z")
def test_funnel_person_recordings(self):
p1 = _create_person(distinct_ids=[f"user_1"], team=self.team)

View File

@ -12,7 +12,7 @@ from ee.clickhouse.util import ClickhouseTestMixin, snapshot_clickhouse_queries
from posthog.constants import INSIGHT_FUNNELS
from posthog.models.filters import Filter
from posthog.models.person import Person
from posthog.test.base import APIBaseTest, test_with_materialized_columns
from posthog.test.base import APIBaseTest
FORMAT_TIME = "%Y-%m-%d 00:00:00"
@ -139,7 +139,6 @@ class TestFunnelStrictStepsPersons(ClickhouseTestMixin, APIBaseTest):
self.assertEqual(0, len(serialized_results))
@snapshot_clickhouse_queries
@test_with_materialized_columns(event_properties=["$window_id", "$session_id"])
@freeze_time("2021-01-02 00:00:00.000Z")
def test_strict_funnel_person_recordings(self):
p1 = _create_person(distinct_ids=[f"user_1"], team=self.team)

View File

@ -13,7 +13,7 @@ from ee.clickhouse.util import ClickhouseTestMixin, snapshot_clickhouse_queries
from posthog.constants import INSIGHT_FUNNELS
from posthog.models.filters import Filter
from posthog.models.person import Person
from posthog.test.base import APIBaseTest, test_with_materialized_columns
from posthog.test.base import APIBaseTest
FORMAT_TIME = "%Y-%m-%d 00:00:00"
@ -161,7 +161,6 @@ class TestFunnelUnorderedStepsPersons(ClickhouseTestMixin, APIBaseTest):
self.assertEqual(10, len(serialized_results))
@snapshot_clickhouse_queries
@test_with_materialized_columns(event_properties=["$window_id", "$session_id"])
@freeze_time("2021-01-02 00:00:00.000Z")
def test_unordered_funnel_does_not_return_recordings(self):
p1 = _create_person(distinct_ids=[f"user_1"], team=self.team)

View File

@ -12,8 +12,7 @@
event,
team_id,
timestamp ,
trim(BOTH '"'
FROM JSONExtractRaw(properties, '$session_id')) as session_id
$session_id as session_id
FROM events
WHERE team_id = 2
AND event IN ['$pageview']
@ -69,8 +68,7 @@
event,
team_id,
timestamp ,
trim(BOTH '"'
FROM JSONExtractRaw(properties, '$session_id')) as session_id
$session_id as session_id
FROM events
WHERE team_id = 2
AND event IN ['$autocapture']
@ -304,115 +302,6 @@
OFFSET 0
'
---
# name: TestClickhouseSessionRecordingsList.test_event_filter_with_cohort_properties.5
'
SELECT DISTINCT p.id
FROM
(SELECT *
FROM person
JOIN
(SELECT id,
max(_timestamp) as _timestamp,
max(is_deleted) as is_deleted
FROM person
WHERE team_id = 2
GROUP BY id) as person_max ON person.id = person_max.id
AND person._timestamp = person_max._timestamp
WHERE team_id = 2
AND person_max.is_deleted = 0 ) AS p
INNER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON p.id = pdi.person_id
WHERE team_id = 2
AND pdi.distinct_id IN
(SELECT distinct_id
FROM
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0)
WHERE person_id IN
(select id
from
(SELECT *
FROM person
JOIN
(SELECT id,
max(_timestamp) as _timestamp,
max(is_deleted) as is_deleted
FROM person
WHERE team_id = 2
GROUP BY id) as person_max ON person.id = person_max.id
AND person._timestamp = person_max._timestamp
WHERE team_id = 2
AND person_max.is_deleted = 0
AND (has(['some_val'], trim(BOTH '"'
FROM JSONExtractRaw(properties, '$some_prop')))) )) )
ORDER BY _timestamp ASC
LIMIT 50000
OFFSET 50000
'
---
# name: TestClickhouseSessionRecordingsList.test_event_filter_with_cohort_properties.6
'
SELECT session_recordings.session_id,
any(session_recordings.start_time) as start_time,
any(session_recordings.end_time) as end_time,
any(session_recordings.duration) as duration,
any(session_recordings.distinct_id) as distinct_id
FROM
(SELECT session_id,
any(window_id) as window_id,
MIN(timestamp) AS start_time,
MAX(timestamp) AS end_time,
dateDiff('second', toDateTime(MIN(timestamp)), toDateTime(MAX(timestamp))) as duration,
any(distinct_id) as distinct_id,
SUM(has_full_snapshot) as full_snapshots
FROM session_recording_events
WHERE team_id = 2
AND timestamp >= '2021-08-13 12:00:00'
AND timestamp <= '2021-08-22 08:00:00'
GROUP BY session_id
HAVING full_snapshots > 0
AND start_time >= '2021-08-14 00:00:00'
AND start_time <= '2021-08-21 20:00:00') AS session_recordings
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi ON pdi.distinct_id = session_recordings.distinct_id
INNER JOIN
(SELECT id
FROM person
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE 1 = 1
AND person_id IN
(SELECT person_id
FROM cohortpeople
WHERE team_id = 2
AND cohort_id = 2
GROUP BY person_id,
cohort_id,
team_id
HAVING sum(sign) > 0)
GROUP BY session_recordings.session_id
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
'
---
# name: TestClickhouseSessionRecordingsList.test_event_filter_with_matching_on_session_id
'
@ -427,8 +316,7 @@
event,
team_id,
timestamp ,
trim(BOTH '"'
FROM JSONExtractRaw(properties, '$session_id')) as session_id
$session_id as session_id
FROM events
WHERE team_id = 2
AND event IN ['$pageview']
@ -484,8 +372,7 @@
event,
team_id,
timestamp ,
trim(BOTH '"'
FROM JSONExtractRaw(properties, '$session_id')) as session_id
$session_id as session_id
FROM events
WHERE team_id = 2
AND event IN ['$autocapture']

View File

@ -79,7 +79,7 @@ class TestClickhouseSessionRecordingsList(ClickhouseTestMixin, factory_session_r
@freeze_time("2021-01-21T20:00:00.000Z")
@snapshot_clickhouse_queries
@test_with_materialized_columns(["$current_url", "$session_id"])
@test_with_materialized_columns(["$current_url"])
def test_event_filter_with_matching_on_session_id(self):
Person.objects.create(team=self.team, distinct_ids=["user"], properties={"email": "bla"})
self.create_snapshot("user", "1", self.base_time, window_id="1")
@ -104,7 +104,7 @@ class TestClickhouseSessionRecordingsList(ClickhouseTestMixin, factory_session_r
@freeze_time("2021-01-21T20:00:00.000Z")
@snapshot_clickhouse_queries
@test_with_materialized_columns(["$current_url", "$session_id"])
@test_with_materialized_columns(["$current_url"])
def test_event_filter_matching_with_no_session_id(self):
Person.objects.create(team=self.team, distinct_ids=["user"], properties={"email": "bla"})
self.create_snapshot("user", "1", self.base_time, window_id="1")

View File

@ -1307,10 +1307,8 @@
pdi.person_id as person_id,
e.uuid AS uuid,
e.timestamp AS timestamp,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id,
e.$session_id as $session_id,
e.$window_id as $window_id,
if(0, '', if(e.event = '$pageview', if(length(trim(BOTH '"'
FROM JSONExtractRaw(properties, '$current_url'))) > 1, trim(TRAILING '/'
FROM trim(BOTH '"'
@ -1434,10 +1432,8 @@
pdi.person_id as person_id,
e.uuid AS uuid,
e.timestamp AS timestamp,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id,
e.$session_id as $session_id,
e.$window_id as $window_id,
if(0, '', if(e.event = '$pageview', if(length(trim(BOTH '"'
FROM JSONExtractRaw(properties, '$current_url'))) > 1, trim(TRAILING '/'
FROM trim(BOTH '"'
@ -1561,10 +1557,8 @@
pdi.person_id as person_id,
e.uuid AS uuid,
e.timestamp AS timestamp,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id,
e.$session_id as $session_id,
e.$window_id as $window_id,
if(0, '', if(e.event = '$pageview', if(length(trim(BOTH '"'
FROM JSONExtractRaw(properties, '$current_url'))) > 1, trim(TRAILING '/'
FROM trim(BOTH '"'
@ -1694,10 +1688,8 @@
pdi.person_id as person_id,
e.uuid AS uuid,
e.timestamp AS timestamp,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id,
e.$session_id as $session_id,
e.$window_id as $window_id,
if(0, '', if(e.event = '$pageview', if(length(trim(BOTH '"'
FROM JSONExtractRaw(properties, '$current_url'))) > 1, trim(TRAILING '/'
FROM trim(BOTH '"'

View File

@ -2782,7 +2782,6 @@ class TestClickhousePaths(ClickhouseTestMixin, paths_test_factory(ClickhousePath
# Note: not using `@snapshot_clickhouse_queries` here because the ordering of the session_ids in the recording
# query is not guaranteed, so adding it would lead to a flaky test.
@test_with_materialized_columns(["$current_url", "$window_id", "$session_id"])
@freeze_time("2012-01-01T03:21:34.000Z")
def test_path_recording(self):
# User with 2 matching paths with recordings
@ -2886,7 +2885,6 @@ class TestClickhousePaths(ClickhouseTestMixin, paths_test_factory(ClickhousePath
self.assertEqual([], matched_recordings[1])
@snapshot_clickhouse_queries
@test_with_materialized_columns(["$current_url", "$window_id", "$session_id"])
@freeze_time("2012-01-01T03:21:34.000Z")
def test_path_recording_with_no_window_or_session_id(self):
p1 = Person.objects.create(team_id=self.team.pk, distinct_ids=["p1"])
@ -2925,7 +2923,6 @@ class TestClickhousePaths(ClickhouseTestMixin, paths_test_factory(ClickhousePath
)
@snapshot_clickhouse_queries
@test_with_materialized_columns(["$current_url", "$window_id", "$session_id"])
@freeze_time("2012-01-01T03:21:34.000Z")
def test_path_recording_with_start_and_end(self):
p1 = Person.objects.create(team_id=self.team.pk, distinct_ids=["p1"])
@ -2990,7 +2987,6 @@ class TestClickhousePaths(ClickhouseTestMixin, paths_test_factory(ClickhousePath
)
@snapshot_clickhouse_queries
@test_with_materialized_columns(["$current_url", "$window_id", "$session_id"])
@freeze_time("2012-01-01T03:21:34.000Z")
def test_path_recording_for_dropoff(self):
p1 = Person.objects.create(team_id=self.team.pk, distinct_ids=["p1"])

View File

@ -9,10 +9,8 @@
FROM
(SELECT e.timestamp as timestamp,
e.$group_0 as $group_0,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id,
e.$window_id as $window_id,
e.$session_id as $session_id,
e.uuid as uuid
FROM events e
WHERE team_id = 2

View File

@ -13,7 +13,7 @@ from posthog.models.entity import Entity
from posthog.models.filters import Filter
from posthog.models.group_type_mapping import GroupTypeMapping
from posthog.models.person import Person
from posthog.test.base import APIBaseTest, test_with_materialized_columns
from posthog.test.base import APIBaseTest
def _create_person(**kwargs):
@ -42,7 +42,6 @@ class TestPerson(ClickhouseTestMixin, APIBaseTest):
# Note: not using `@snapshot_clickhouse_queries` here because the ordering of the session_ids in the recording
# query is not guaranteed, so adding it would lead to a flaky test.
@test_with_materialized_columns(event_properties=["$session_id", "$window_id"])
@freeze_time("2021-01-21T20:00:00.000Z")
def test_person_query_includes_recording_events(self):
_create_person(team_id=self.team.pk, distinct_ids=["u1"], properties={"email": "bla"})
@ -132,7 +131,6 @@ class TestPerson(ClickhouseTestMixin, APIBaseTest):
self.assertEqual(serialized_actors[0].get("matched_recordings"), None)
@snapshot_clickhouse_queries
@test_with_materialized_columns(event_properties=["$session_id", "$window_id"])
@freeze_time("2021-01-21T20:00:00.000Z")
def test_group_query_includes_recording_events(self):
GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0)

View File

@ -2,7 +2,6 @@ from ee.kafka_client.topics import KAFKA_EVENTS
from posthog.settings import CLICKHOUSE_CLUSTER, CLICKHOUSE_DATABASE, DEBUG
from .clickhouse import KAFKA_COLUMNS, REPLACING_MERGE_TREE, STORAGE_POLICY, kafka_engine, table_engine
from .person import GET_TEAM_PERSON_DISTINCT_IDS
EVENTS_TABLE = "events"
@ -31,6 +30,9 @@ EVENTS_TABLE_MATERIALIZED_COLUMNS = """
, $group_2 VARCHAR materialized trim(BOTH '\"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2'
, $group_3 VARCHAR materialized trim(BOTH '\"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3'
, $group_4 VARCHAR materialized trim(BOTH '\"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4'
, $window_id VARCHAR materialized trim(BOTH '\"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer::$window_id'
, $session_id VARCHAR materialized trim(BOTH '\"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer::$session_id'
"""
# :KLUDGE: This is not in sync with reality on cloud! Instead a distributed table engine is used with a sharded_events table.

View File

@ -341,7 +341,7 @@
'
---
# name: test_create_table_query[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer.$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer.$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer.$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer.$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer.test_create_table_query[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer::$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer::$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4'\n\n \n, _timestamp DateTime\n, _offset UInt64\n\n) ENGINE = ReplacingMergeTree(_timestamp)\nPARTITION BY toYYYYMM(timestamp)\nORDER BY (team_id, toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid))\n\n\n]
# name: test_create_table_query[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer.$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer.$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer.$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer.$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer.$group_4'\n , $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer.$window_id'\n , $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer.test_create_table_query[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer::$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer::$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4'\n , $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer::$window_id'\n , $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer::$session_id'\n\n\n \n, _timestamp DateTime\n, _offset UInt64\n\n) ENGINE = ReplacingMergeTree(_timestamp)\nPARTITION BY toYYYYMM(timestamp)\nORDER BY (team_id, toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid))\n\n\n]
'
CREATE TABLE IF NOT EXISTS events ON CLUSTER posthog
@ -360,6 +360,9 @@
, $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2'
, $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3'
, $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4'
, $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer::$window_id'
, $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer::$session_id'
, _timestamp DateTime
@ -726,7 +729,7 @@
'
---
# name: test_create_table_query_replicated_and_storage[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer.$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer.$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer.$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer.$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer.test_create_table_query_replicated_and_storage[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer::$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer::$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4'\n\n \n, _timestamp DateTime\n, _offset UInt64\n\n) ENGINE = ReplacingMergeTree(_timestamp)\nPARTITION BY toYYYYMM(timestamp)\nORDER BY (team_id, toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid))\n\n\n]
# name: test_create_table_query_replicated_and_storage[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer.$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer.$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer.$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer.$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer.$group_4'\n , $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer.$window_id'\n , $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer.test_create_table_query_replicated_and_storage[\nCREATE TABLE IF NOT EXISTS events ON CLUSTER posthog\n(\n uuid UUID,\n event VARCHAR,\n properties VARCHAR,\n timestamp DateTime64(6, 'UTC'),\n team_id Int64,\n distinct_id VARCHAR,\n elements_chain VARCHAR,\n created_at DateTime64(6, 'UTC')\n \n , $group_0 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_0')) COMMENT 'column_materializer::$group_0'\n , $group_1 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_1')) COMMENT 'column_materializer::$group_1'\n , $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2'\n , $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3'\n , $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4'\n , $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer::$window_id'\n , $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer::$session_id'\n\n\n \n, _timestamp DateTime\n, _offset UInt64\n\n) ENGINE = ReplacingMergeTree(_timestamp)\nPARTITION BY toYYYYMM(timestamp)\nORDER BY (team_id, toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid))\n\n\n]
'
CREATE TABLE IF NOT EXISTS events ON CLUSTER posthog
@ -745,6 +748,9 @@
, $group_2 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_2')) COMMENT 'column_materializer::$group_2'
, $group_3 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_3')) COMMENT 'column_materializer::$group_3'
, $group_4 VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$group_4')) COMMENT 'column_materializer::$group_4'
, $window_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$window_id')) COMMENT 'column_materializer::$window_id'
, $session_id VARCHAR materialized trim(BOTH '"' FROM JSONExtractRaw(properties, '$session_id')) COMMENT 'column_materializer::$session_id'
, _timestamp DateTime

View File

@ -9,10 +9,8 @@
FROM
(SELECT e.timestamp as timestamp,
e.properties as properties,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$window_id')) as $window_id,
trim(BOTH '"'
FROM JSONExtractRaw(e.properties, '$session_id')) as $session_id,
e.$window_id as $window_id,
e.$session_id as $session_id,
pdi.person_id as person_id,
e.distinct_id as distinct_id,
e.team_id as team_id,