diff --git a/ee/clickhouse/queries/session_recordings/clickhouse_session_recording_list.py b/ee/clickhouse/queries/session_recordings/clickhouse_session_recording_list.py index fc72e1d988b..b80993fd151 100644 --- a/ee/clickhouse/queries/session_recordings/clickhouse_session_recording_list.py +++ b/ee/clickhouse/queries/session_recordings/clickhouse_session_recording_list.py @@ -40,16 +40,17 @@ class ClickhouseSessionRecordingList(ClickhouseEventQuery): _event_and_recording_match_conditions_clause = """ ( - -- If there is a window_id on the recording, then it is newer data and we can match - -- the recording and events on session_id + -- If there is a session_id on the event, then it is from posthog-js + -- and we should use the session_id to match the recording ( - notEmpty(session_recordings.window_id) AND + notEmpty(events.session_id) AND events.session_id == session_recordings.session_id ) OR - -- If there's no window_id on the recording, then it is older data and we should match - -- events and recordings on timestamps + -- If there's no session_id on the event, then it is either older data or + -- from a posthog-client that doesn't support session (e.g. backend client) + -- and we should match on timestamp ( - empty(session_recordings.window_id) AND + empty(events.session_id) AND ( events.timestamp >= session_recordings.start_time AND events.timestamp <= session_recordings.end_time diff --git a/ee/clickhouse/queries/session_recordings/test/__snapshots__/test_clickhouse_session_recording_list.ambr b/ee/clickhouse/queries/session_recordings/test/__snapshots__/test_clickhouse_session_recording_list.ambr index f9ccbe65ef0..28b92a3a5d8 100644 --- a/ee/clickhouse/queries/session_recordings/test/__snapshots__/test_clickhouse_session_recording_list.ambr +++ b/ee/clickhouse/queries/session_recordings/test/__snapshots__/test_clickhouse_session_recording_list.ambr @@ -1,3 +1,117 @@ +# name: TestClickhouseSessionRecordingsList.test_event_filter_matching_with_no_session_id + ' + + SELECT session_recordings.session_id, + any(session_recordings.start_time) as start_time, + any(session_recordings.end_time) as end_time, + any(session_recordings.duration) as duration, + any(session_recordings.distinct_id) as distinct_id , + sum(if(event = '$pageview', 1, 0)) as count_event_match_0 + FROM + (SELECT distinct_id, + event, + team_id, + timestamp , + trim(BOTH '"' + FROM JSONExtractRaw(properties, '$session_id')) as session_id + FROM events + WHERE team_id = 2 + AND event IN ['$pageview'] + AND timestamp >= '2021-01-13 12:00:00' + AND timestamp <= '2021-01-22 08:00:00' ) AS events + JOIN + (SELECT session_id, + any(window_id) as window_id, + MIN(timestamp) AS start_time, + MAX(timestamp) AS end_time, + dateDiff('second', toDateTime(MIN(timestamp)), toDateTime(MAX(timestamp))) as duration, + any(distinct_id) as distinct_id, + SUM(has_full_snapshot) as full_snapshots + FROM session_recording_events + WHERE team_id = 2 + AND timestamp >= '2021-01-13 12:00:00' + AND timestamp <= '2021-01-22 08:00:00' + GROUP BY session_id + HAVING full_snapshots > 0 + AND start_time >= '2021-01-14 00:00:00' + AND start_time <= '2021-01-21 20:00:00') AS session_recordings ON session_recordings.distinct_id = events.distinct_id + JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) as pdi ON pdi.distinct_id = session_recordings.distinct_id + WHERE ((notEmpty(events.session_id) + AND events.session_id == session_recordings.session_id) + OR (empty(events.session_id) + AND (events.timestamp >= session_recordings.start_time + AND events.timestamp <= session_recordings.end_time))) + GROUP BY session_recordings.session_id + HAVING 1 = 1 + AND count_event_match_0 > 0 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 + ' +--- +# name: TestClickhouseSessionRecordingsList.test_event_filter_matching_with_no_session_id.1 + ' + + SELECT session_recordings.session_id, + any(session_recordings.start_time) as start_time, + any(session_recordings.end_time) as end_time, + any(session_recordings.duration) as duration, + any(session_recordings.distinct_id) as distinct_id , + sum(if(event = '$autocapture', 1, 0)) as count_event_match_0 + FROM + (SELECT distinct_id, + event, + team_id, + timestamp , + trim(BOTH '"' + FROM JSONExtractRaw(properties, '$session_id')) as session_id + FROM events + WHERE team_id = 2 + AND event IN ['$autocapture'] + AND timestamp >= '2021-01-13 12:00:00' + AND timestamp <= '2021-01-22 08:00:00' ) AS events + JOIN + (SELECT session_id, + any(window_id) as window_id, + MIN(timestamp) AS start_time, + MAX(timestamp) AS end_time, + dateDiff('second', toDateTime(MIN(timestamp)), toDateTime(MAX(timestamp))) as duration, + any(distinct_id) as distinct_id, + SUM(has_full_snapshot) as full_snapshots + FROM session_recording_events + WHERE team_id = 2 + AND timestamp >= '2021-01-13 12:00:00' + AND timestamp <= '2021-01-22 08:00:00' + GROUP BY session_id + HAVING full_snapshots > 0 + AND start_time >= '2021-01-14 00:00:00' + AND start_time <= '2021-01-21 20:00:00') AS session_recordings ON session_recordings.distinct_id = events.distinct_id + JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) as pdi ON pdi.distinct_id = session_recordings.distinct_id + WHERE ((notEmpty(events.session_id) + AND events.session_id == session_recordings.session_id) + OR (empty(events.session_id) + AND (events.timestamp >= session_recordings.start_time + AND events.timestamp <= session_recordings.end_time))) + GROUP BY session_recordings.session_id + HAVING 1 = 1 + AND count_event_match_0 > 0 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 + ' +--- # name: TestClickhouseSessionRecordingsList.test_event_filter_with_cohort_properties ' @@ -231,9 +345,9 @@ WHERE team_id = 2 GROUP BY distinct_id HAVING argMax(is_deleted, version) = 0) as pdi ON pdi.distinct_id = session_recordings.distinct_id - WHERE ((notEmpty(session_recordings.window_id) + WHERE ((notEmpty(events.session_id) AND events.session_id == session_recordings.session_id) - OR (empty(session_recordings.window_id) + OR (empty(events.session_id) AND (events.timestamp >= session_recordings.start_time AND events.timestamp <= session_recordings.end_time))) GROUP BY session_recordings.session_id @@ -288,9 +402,9 @@ WHERE team_id = 2 GROUP BY distinct_id HAVING argMax(is_deleted, version) = 0) as pdi ON pdi.distinct_id = session_recordings.distinct_id - WHERE ((notEmpty(session_recordings.window_id) + WHERE ((notEmpty(events.session_id) AND events.session_id == session_recordings.session_id) - OR (empty(session_recordings.window_id) + OR (empty(events.session_id) AND (events.timestamp >= session_recordings.start_time AND events.timestamp <= session_recordings.end_time))) GROUP BY session_recordings.session_id diff --git a/ee/clickhouse/queries/session_recordings/test/test_clickhouse_session_recording_list.py b/ee/clickhouse/queries/session_recordings/test/test_clickhouse_session_recording_list.py index b24daa0d712..eabf97847c1 100644 --- a/ee/clickhouse/queries/session_recordings/test/test_clickhouse_session_recording_list.py +++ b/ee/clickhouse/queries/session_recordings/test/test_clickhouse_session_recording_list.py @@ -101,3 +101,31 @@ class TestClickhouseSessionRecordingsList(ClickhouseTestMixin, factory_session_r session_recording_list_instance = ClickhouseSessionRecordingList(filter=filter, team_id=self.team.pk) (session_recordings, _) = session_recording_list_instance.run() self.assertEqual(len(session_recordings), 0) + + @freeze_time("2021-01-21T20:00:00.000Z") + @snapshot_clickhouse_queries + @test_with_materialized_columns(["$current_url", "$session_id"]) + def test_event_filter_matching_with_no_session_id(self): + Person.objects.create(team=self.team, distinct_ids=["user"], properties={"email": "bla"}) + self.create_snapshot("user", "1", self.base_time, window_id="1") + self.create_event("user", self.base_time) + self.create_snapshot("user", "1", self.base_time + relativedelta(seconds=30), window_id="1") + self.create_event("user", self.base_time + relativedelta(seconds=31), event_name="$autocapture") + + # Pageview within timestamps matches recording + filter = SessionRecordingsFilter( + team=self.team, data={"events": [{"id": "$pageview", "type": "events", "order": 0, "name": "$pageview"}]}, + ) + session_recording_list_instance = ClickhouseSessionRecordingList(filter=filter, team_id=self.team.pk) + (session_recordings, _) = session_recording_list_instance.run() + self.assertEqual(len(session_recordings), 1) + self.assertEqual(session_recordings[0]["session_id"], "1") + + # Pageview outside timestamps does not match recording + filter = SessionRecordingsFilter( + team=self.team, + data={"events": [{"id": "$autocapture", "type": "events", "order": 0, "name": "$autocapture"}]}, + ) + session_recording_list_instance = ClickhouseSessionRecordingList(filter=filter, team_id=self.team.pk) + (session_recordings, _) = session_recording_list_instance.run() + self.assertEqual(len(session_recordings), 0)