0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 09:16:49 +01:00

recording filters work with backend events (#8276)

This commit is contained in:
Rick Marron 2022-01-25 16:42:01 -08:00 committed by GitHub
parent ad6ef7d4f8
commit b9877a6efe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 153 additions and 10 deletions

View File

@ -40,16 +40,17 @@ class ClickhouseSessionRecordingList(ClickhouseEventQuery):
_event_and_recording_match_conditions_clause = """
(
-- If there is a window_id on the recording, then it is newer data and we can match
-- the recording and events on session_id
-- If there is a session_id on the event, then it is from posthog-js
-- and we should use the session_id to match the recording
(
notEmpty(session_recordings.window_id) AND
notEmpty(events.session_id) AND
events.session_id == session_recordings.session_id
) OR
-- If there's no window_id on the recording, then it is older data and we should match
-- events and recordings on timestamps
-- If there's no session_id on the event, then it is either older data or
-- from a posthog-client that doesn't support session (e.g. backend client)
-- and we should match on timestamp
(
empty(session_recordings.window_id) AND
empty(events.session_id) AND
(
events.timestamp >= session_recordings.start_time
AND events.timestamp <= session_recordings.end_time

View File

@ -1,3 +1,117 @@
# name: TestClickhouseSessionRecordingsList.test_event_filter_matching_with_no_session_id
'
SELECT session_recordings.session_id,
any(session_recordings.start_time) as start_time,
any(session_recordings.end_time) as end_time,
any(session_recordings.duration) as duration,
any(session_recordings.distinct_id) as distinct_id ,
sum(if(event = '$pageview', 1, 0)) as count_event_match_0
FROM
(SELECT distinct_id,
event,
team_id,
timestamp ,
trim(BOTH '"'
FROM JSONExtractRaw(properties, '$session_id')) as session_id
FROM events
WHERE team_id = 2
AND event IN ['$pageview']
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00' ) AS events
JOIN
(SELECT session_id,
any(window_id) as window_id,
MIN(timestamp) AS start_time,
MAX(timestamp) AS end_time,
dateDiff('second', toDateTime(MIN(timestamp)), toDateTime(MAX(timestamp))) as duration,
any(distinct_id) as distinct_id,
SUM(has_full_snapshot) as full_snapshots
FROM session_recording_events
WHERE team_id = 2
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
GROUP BY session_id
HAVING full_snapshots > 0
AND start_time >= '2021-01-14 00:00:00'
AND start_time <= '2021-01-21 20:00:00') AS session_recordings ON session_recordings.distinct_id = events.distinct_id
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi ON pdi.distinct_id = session_recordings.distinct_id
WHERE ((notEmpty(events.session_id)
AND events.session_id == session_recordings.session_id)
OR (empty(events.session_id)
AND (events.timestamp >= session_recordings.start_time
AND events.timestamp <= session_recordings.end_time)))
GROUP BY session_recordings.session_id
HAVING 1 = 1
AND count_event_match_0 > 0
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
'
---
# name: TestClickhouseSessionRecordingsList.test_event_filter_matching_with_no_session_id.1
'
SELECT session_recordings.session_id,
any(session_recordings.start_time) as start_time,
any(session_recordings.end_time) as end_time,
any(session_recordings.duration) as duration,
any(session_recordings.distinct_id) as distinct_id ,
sum(if(event = '$autocapture', 1, 0)) as count_event_match_0
FROM
(SELECT distinct_id,
event,
team_id,
timestamp ,
trim(BOTH '"'
FROM JSONExtractRaw(properties, '$session_id')) as session_id
FROM events
WHERE team_id = 2
AND event IN ['$autocapture']
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00' ) AS events
JOIN
(SELECT session_id,
any(window_id) as window_id,
MIN(timestamp) AS start_time,
MAX(timestamp) AS end_time,
dateDiff('second', toDateTime(MIN(timestamp)), toDateTime(MAX(timestamp))) as duration,
any(distinct_id) as distinct_id,
SUM(has_full_snapshot) as full_snapshots
FROM session_recording_events
WHERE team_id = 2
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
GROUP BY session_id
HAVING full_snapshots > 0
AND start_time >= '2021-01-14 00:00:00'
AND start_time <= '2021-01-21 20:00:00') AS session_recordings ON session_recordings.distinct_id = events.distinct_id
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi ON pdi.distinct_id = session_recordings.distinct_id
WHERE ((notEmpty(events.session_id)
AND events.session_id == session_recordings.session_id)
OR (empty(events.session_id)
AND (events.timestamp >= session_recordings.start_time
AND events.timestamp <= session_recordings.end_time)))
GROUP BY session_recordings.session_id
HAVING 1 = 1
AND count_event_match_0 > 0
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
'
---
# name: TestClickhouseSessionRecordingsList.test_event_filter_with_cohort_properties
'
@ -231,9 +345,9 @@
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi ON pdi.distinct_id = session_recordings.distinct_id
WHERE ((notEmpty(session_recordings.window_id)
WHERE ((notEmpty(events.session_id)
AND events.session_id == session_recordings.session_id)
OR (empty(session_recordings.window_id)
OR (empty(events.session_id)
AND (events.timestamp >= session_recordings.start_time
AND events.timestamp <= session_recordings.end_time)))
GROUP BY session_recordings.session_id
@ -288,9 +402,9 @@
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi ON pdi.distinct_id = session_recordings.distinct_id
WHERE ((notEmpty(session_recordings.window_id)
WHERE ((notEmpty(events.session_id)
AND events.session_id == session_recordings.session_id)
OR (empty(session_recordings.window_id)
OR (empty(events.session_id)
AND (events.timestamp >= session_recordings.start_time
AND events.timestamp <= session_recordings.end_time)))
GROUP BY session_recordings.session_id

View File

@ -101,3 +101,31 @@ class TestClickhouseSessionRecordingsList(ClickhouseTestMixin, factory_session_r
session_recording_list_instance = ClickhouseSessionRecordingList(filter=filter, team_id=self.team.pk)
(session_recordings, _) = session_recording_list_instance.run()
self.assertEqual(len(session_recordings), 0)
@freeze_time("2021-01-21T20:00:00.000Z")
@snapshot_clickhouse_queries
@test_with_materialized_columns(["$current_url", "$session_id"])
def test_event_filter_matching_with_no_session_id(self):
Person.objects.create(team=self.team, distinct_ids=["user"], properties={"email": "bla"})
self.create_snapshot("user", "1", self.base_time, window_id="1")
self.create_event("user", self.base_time)
self.create_snapshot("user", "1", self.base_time + relativedelta(seconds=30), window_id="1")
self.create_event("user", self.base_time + relativedelta(seconds=31), event_name="$autocapture")
# Pageview within timestamps matches recording
filter = SessionRecordingsFilter(
team=self.team, data={"events": [{"id": "$pageview", "type": "events", "order": 0, "name": "$pageview"}]},
)
session_recording_list_instance = ClickhouseSessionRecordingList(filter=filter, team_id=self.team.pk)
(session_recordings, _) = session_recording_list_instance.run()
self.assertEqual(len(session_recordings), 1)
self.assertEqual(session_recordings[0]["session_id"], "1")
# Pageview outside timestamps does not match recording
filter = SessionRecordingsFilter(
team=self.team,
data={"events": [{"id": "$autocapture", "type": "events", "order": 0, "name": "$autocapture"}]},
)
session_recording_list_instance = ClickhouseSessionRecordingList(filter=filter, team_id=self.team.pk)
(session_recordings, _) = session_recording_list_instance.run()
self.assertEqual(len(session_recordings), 0)