0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-21 13:39:22 +01:00

feat: remove non-hogql replay filtering (#23345)

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Paul D'Ambra 2024-07-02 09:46:15 +01:00 committed by GitHub
parent b16c3d81ac
commit dd4be4fc3f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 4178 additions and 6223 deletions

View File

@ -726,42 +726,6 @@
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.1
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.2
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.3
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.4
'''
/* user_id:0 request:_snapshot_ */
SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '') AS value,
@ -780,6 +744,138 @@
OFFSET 0
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.2
'''
/* user_id:0 request:_snapshot_ */
SELECT groupArray(day_start) as date,
groupArray(count) AS total,
breakdown_value
FROM
(SELECT SUM(total) as count,
day_start,
breakdown_value
FROM
(SELECT *
FROM
(SELECT toUInt16(0) AS total,
ticks.day_start as day_start,
breakdown_value
FROM
(SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start
FROM numbers(6)
UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks
CROSS JOIN
(SELECT breakdown_value
FROM
(SELECT ['test', 'control'] as breakdown_value) ARRAY
JOIN breakdown_value) as sec
ORDER BY breakdown_value,
day_start
UNION ALL SELECT count(*) as total,
toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start,
transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['test', 'control']), (['test', 'control']), '$$_posthog_breakdown_other_$$') as breakdown_value
FROM events e
WHERE e.team_id = 2
AND event = '$pageview'
AND (((isNull(replaceRegexpAll(JSONExtractRaw(e.properties, 'exclude'), '^"|"$', ''))
OR NOT JSONHas(e.properties, 'exclude')))
AND (has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', ''))))
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC')
GROUP BY day_start,
breakdown_value))
GROUP BY day_start,
breakdown_value
ORDER BY breakdown_value,
day_start)
GROUP BY breakdown_value
ORDER BY breakdown_value
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.3
'''
/* user_id:0 request:_snapshot_ */
SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', '') AS value,
count(*) as count
FROM events e
WHERE team_id = 2
AND event = '$feature_flag_called'
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC')
AND (((isNull(replaceRegexpAll(JSONExtractRaw(e.properties, 'exclude'), '^"|"$', ''))
OR NOT JSONHas(e.properties, 'exclude')))
AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', '')))
AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', '')))))
GROUP BY value
ORDER BY count DESC, value DESC
LIMIT 26
OFFSET 0
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.4
'''
/* user_id:0 request:_snapshot_ */
SELECT groupArray(day_start) as date,
groupArray(count) AS total,
breakdown_value
FROM
(SELECT SUM(total) as count,
day_start,
breakdown_value
FROM
(SELECT *
FROM
(SELECT toUInt16(0) AS total,
ticks.day_start as day_start,
breakdown_value
FROM
(SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start
FROM numbers(6)
UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks
CROSS JOIN
(SELECT breakdown_value
FROM
(SELECT ['control', 'test'] as breakdown_value) ARRAY
JOIN breakdown_value) as sec
ORDER BY breakdown_value,
day_start
UNION ALL SELECT count(DISTINCT person_id) as total,
toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start,
breakdown_value
FROM
(SELECT person_id,
min(timestamp) as timestamp,
breakdown_value
FROM
(SELECT pdi.person_id as person_id, timestamp, transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test']), (['control', 'test']), '$$_posthog_breakdown_other_$$') as breakdown_value
FROM events e
INNER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id
WHERE e.team_id = 2
AND event = '$feature_flag_called'
AND (((isNull(replaceRegexpAll(JSONExtractRaw(e.properties, 'exclude'), '^"|"$', ''))
OR NOT JSONHas(e.properties, 'exclude')))
AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', '')))
AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', '')))))
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') )
GROUP BY person_id,
breakdown_value) AS pdi
GROUP BY day_start,
breakdown_value))
GROUP BY day_start,
breakdown_value
ORDER BY breakdown_value,
day_start)
GROUP BY breakdown_value
ORDER BY breakdown_value
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.5
'''
/* user_id:0 request:_snapshot_ */
@ -925,42 +1021,6 @@
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.1
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.2
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.3
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.4
'''
/* user_id:0 request:_snapshot_ */
SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '') AS value,
@ -977,6 +1037,79 @@
OFFSET 0
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.2
'''
/* user_id:0 request:_snapshot_ */
SELECT groupArray(day_start) as date,
groupArray(count) AS total,
breakdown_value
FROM
(SELECT SUM(total) as count,
day_start,
breakdown_value
FROM
(SELECT *
FROM
(SELECT toUInt16(0) AS total,
ticks.day_start as day_start,
breakdown_value
FROM
(SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start
FROM numbers(6)
UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks
CROSS JOIN
(SELECT breakdown_value
FROM
(SELECT ['control', 'test_1', 'test_2'] as breakdown_value) ARRAY
JOIN breakdown_value) as sec
ORDER BY breakdown_value,
day_start
UNION ALL SELECT count(*) as total,
toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start,
transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test_1', 'test_2']), (['control', 'test_1', 'test_2']), '$$_posthog_breakdown_other_$$') as breakdown_value
FROM events e
WHERE e.team_id = 2
AND event = '$pageview1'
AND (has(['control', 'test_1', 'test_2', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', '')))
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC')
GROUP BY day_start,
breakdown_value))
GROUP BY day_start,
breakdown_value
ORDER BY breakdown_value,
day_start)
GROUP BY breakdown_value
ORDER BY breakdown_value
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.3
'''
/* user_id:0 request:_snapshot_ */
SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', '') AS value,
count(*) as count
FROM events e
WHERE team_id = 2
AND event = '$feature_flag_called'
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC')
AND ((has(['control', 'test_1', 'test_2', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', '')))
AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', ''))))
GROUP BY value
ORDER BY count DESC, value DESC
LIMIT 26
OFFSET 0
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.4
'''
/* user_id:0 request:_snapshot_ */
SELECT [now()] AS date,
[0] AS total,
'' AS breakdown_value
LIMIT 0
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.5
'''
/* user_id:0 request:_snapshot_ */
@ -1063,42 +1196,6 @@
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.1
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.2
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.3
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.4
'''
/* user_id:0 request:_snapshot_ */
SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '') AS value,
@ -1115,6 +1212,132 @@
OFFSET 0
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.2
'''
/* user_id:0 request:_snapshot_ */
SELECT groupArray(day_start) as date,
groupArray(count) AS total,
breakdown_value
FROM
(SELECT SUM(total) as count,
day_start,
breakdown_value
FROM
(SELECT *
FROM
(SELECT toUInt16(0) AS total,
ticks.day_start as day_start,
breakdown_value
FROM
(SELECT toStartOfDay(toDateTime('2020-01-06 07:00:00', 'US/Pacific')) - toIntervalDay(number) as day_start
FROM numbers(6)
UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 02:10:00', 'US/Pacific')) as day_start) as ticks
CROSS JOIN
(SELECT breakdown_value
FROM
(SELECT ['test', 'control'] as breakdown_value) ARRAY
JOIN breakdown_value) as sec
ORDER BY breakdown_value,
day_start
UNION ALL SELECT count(*) as total,
toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'US/Pacific')) as day_start,
transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['test', 'control']), (['test', 'control']), '$$_posthog_breakdown_other_$$') as breakdown_value
FROM events e
WHERE e.team_id = 2
AND event = '$pageview'
AND (has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', '')))
AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime('2020-01-01 02:10:00', 'US/Pacific')
AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2020-01-06 07:00:00', 'US/Pacific')
GROUP BY day_start,
breakdown_value))
GROUP BY day_start,
breakdown_value
ORDER BY breakdown_value,
day_start)
GROUP BY breakdown_value
ORDER BY breakdown_value
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.3
'''
/* user_id:0 request:_snapshot_ */
SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', '') AS value,
count(*) as count
FROM events e
WHERE team_id = 2
AND event = '$feature_flag_called'
AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime('2020-01-01 02:10:00', 'US/Pacific')
AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2020-01-06 07:00:00', 'US/Pacific')
AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', '')))
AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', ''))))
GROUP BY value
ORDER BY count DESC, value DESC
LIMIT 26
OFFSET 0
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.4
'''
/* user_id:0 request:_snapshot_ */
SELECT groupArray(day_start) as date,
groupArray(count) AS total,
breakdown_value
FROM
(SELECT SUM(total) as count,
day_start,
breakdown_value
FROM
(SELECT *
FROM
(SELECT toUInt16(0) AS total,
ticks.day_start as day_start,
breakdown_value
FROM
(SELECT toStartOfDay(toDateTime('2020-01-06 07:00:00', 'US/Pacific')) - toIntervalDay(number) as day_start
FROM numbers(6)
UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 02:10:00', 'US/Pacific')) as day_start) as ticks
CROSS JOIN
(SELECT breakdown_value
FROM
(SELECT ['control', 'test'] as breakdown_value) ARRAY
JOIN breakdown_value) as sec
ORDER BY breakdown_value,
day_start
UNION ALL SELECT count(DISTINCT person_id) as total,
toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'US/Pacific')) as day_start,
breakdown_value
FROM
(SELECT person_id,
min(timestamp) as timestamp,
breakdown_value
FROM
(SELECT pdi.person_id as person_id, timestamp, transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test']), (['control', 'test']), '$$_posthog_breakdown_other_$$') as breakdown_value
FROM events e
INNER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id
WHERE e.team_id = 2
AND event = '$feature_flag_called'
AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', '')))
AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', ''))))
AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime('2020-01-01 02:10:00', 'US/Pacific')
AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2020-01-06 07:00:00', 'US/Pacific') )
GROUP BY person_id,
breakdown_value) AS pdi
GROUP BY day_start,
breakdown_value))
GROUP BY day_start,
breakdown_value
ORDER BY breakdown_value,
day_start)
GROUP BY breakdown_value
ORDER BY breakdown_value
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.5
'''
/* user_id:0 request:_snapshot_ */
@ -1254,42 +1477,6 @@
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.1
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.2
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.3
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.4
'''
/* user_id:0 request:_snapshot_ */
SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '') AS value,
@ -1307,6 +1494,133 @@
OFFSET 0
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.2
'''
/* user_id:0 request:_snapshot_ */
SELECT groupArray(day_start) as date,
groupArray(count) AS total,
breakdown_value
FROM
(SELECT SUM(total) as count,
day_start,
breakdown_value
FROM
(SELECT *
FROM
(SELECT toUInt16(0) AS total,
ticks.day_start as day_start,
breakdown_value
FROM
(SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start
FROM numbers(6)
UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks
CROSS JOIN
(SELECT breakdown_value
FROM
(SELECT ['test', 'control'] as breakdown_value) ARRAY
JOIN breakdown_value) as sec
ORDER BY breakdown_value,
day_start
UNION ALL SELECT count(*) as total,
toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start,
transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['test', 'control']), (['test', 'control']), '$$_posthog_breakdown_other_$$') as breakdown_value
FROM events e
WHERE e.team_id = 2
AND event = '$pageview'
AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', '')))
AND (ifNull(ilike(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'hogql'), ''), 'null'), '^"|"$', ''), 'true'), 0)))
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC')
GROUP BY day_start,
breakdown_value))
GROUP BY day_start,
breakdown_value
ORDER BY breakdown_value,
day_start)
GROUP BY breakdown_value
ORDER BY breakdown_value
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.3
'''
/* user_id:0 request:_snapshot_ */
SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', '') AS value,
count(*) as count
FROM events e
WHERE team_id = 2
AND event = '$feature_flag_called'
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC')
AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', '')))
AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', ''))))
GROUP BY value
ORDER BY count DESC, value DESC
LIMIT 26
OFFSET 0
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.4
'''
/* user_id:0 request:_snapshot_ */
SELECT groupArray(day_start) as date,
groupArray(count) AS total,
breakdown_value
FROM
(SELECT SUM(total) as count,
day_start,
breakdown_value
FROM
(SELECT *
FROM
(SELECT toUInt16(0) AS total,
ticks.day_start as day_start,
breakdown_value
FROM
(SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start
FROM numbers(6)
UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks
CROSS JOIN
(SELECT breakdown_value
FROM
(SELECT ['control', 'test'] as breakdown_value) ARRAY
JOIN breakdown_value) as sec
ORDER BY breakdown_value,
day_start
UNION ALL SELECT count(DISTINCT person_id) as total,
toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start,
breakdown_value
FROM
(SELECT person_id,
min(timestamp) as timestamp,
breakdown_value
FROM
(SELECT pdi.person_id as person_id, timestamp, transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test']), (['control', 'test']), '$$_posthog_breakdown_other_$$') as breakdown_value
FROM events e
INNER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id
WHERE e.team_id = 2
AND event = '$feature_flag_called'
AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', '')))
AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', ''))))
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') )
GROUP BY person_id,
breakdown_value) AS pdi
GROUP BY day_start,
breakdown_value))
GROUP BY day_start,
breakdown_value
ORDER BY breakdown_value,
day_start)
GROUP BY breakdown_value
ORDER BY breakdown_value
'''
# ---
# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.5
'''
/* user_id:0 request:_snapshot_ */

View File

@ -1,347 +0,0 @@
from itertools import product
from unittest import mock
from uuid import uuid4
from dateutil.relativedelta import relativedelta
from django.utils.timezone import now
from freezegun import freeze_time
from parameterized import parameterized
from ee.clickhouse.materialized_columns.columns import materialize
from posthog.clickhouse.client import sync_execute
from posthog.models import Person
from posthog.models.filters import SessionRecordingsFilter
from posthog.schema import PersonsOnEventsMode
from posthog.session_recordings.queries.session_recording_list_from_replay_summary import (
SessionRecordingListFromReplaySummary,
)
from posthog.session_recordings.queries.test.session_replay_sql import produce_replay_summary
from posthog.session_recordings.sql.session_replay_event_sql import TRUNCATE_SESSION_REPLAY_EVENTS_TABLE_SQL
from posthog.test.base import (
APIBaseTest,
ClickhouseTestMixin,
QueryMatchingTest,
snapshot_clickhouse_queries,
_create_event,
)
@freeze_time("2021-01-01T13:46:23")
class TestClickhouseSessionRecordingsListFromSessionReplay(ClickhouseTestMixin, APIBaseTest, QueryMatchingTest):
def tearDown(self) -> None:
sync_execute(TRUNCATE_SESSION_REPLAY_EVENTS_TABLE_SQL())
@property
def base_time(self):
return (now() - relativedelta(hours=1)).replace(microsecond=0, second=0)
def create_event(
self,
distinct_id,
timestamp,
team=None,
event_name="$pageview",
properties=None,
):
if team is None:
team = self.team
if properties is None:
properties = {"$os": "Windows 95", "$current_url": "aloha.com/2"}
return _create_event(
team=team,
event=event_name,
timestamp=timestamp,
distinct_id=distinct_id,
properties=properties,
)
@parameterized.expand(
[
[
"test_poe_v1_still_falls_back_to_person_subquery",
True,
False,
False,
PersonsOnEventsMode.PERSON_ID_NO_OVERRIDE_PROPERTIES_ON_EVENTS,
{
"kperson_filter_pre__0": "rgInternal",
"kpersonquery_person_filter_fin__0": "rgInternal",
"person_uuid": None,
"vperson_filter_pre__0": ["false"],
"vpersonquery_person_filter_fin__0": ["false"],
},
True,
False,
],
[
"test_poe_being_unavailable_we_fall_back_to_person_subquery",
False,
False,
False,
PersonsOnEventsMode.DISABLED,
{
"kperson_filter_pre__0": "rgInternal",
"kpersonquery_person_filter_fin__0": "rgInternal",
"person_uuid": None,
"vperson_filter_pre__0": ["false"],
"vpersonquery_person_filter_fin__0": ["false"],
},
True,
False,
],
[
"test_allow_denormalised_props_fix_does_not_stop_all_poe_processing",
False,
True,
False,
PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS,
{
"event_names": [],
"event_start_time": mock.ANY,
"event_end_time": mock.ANY,
"kglobal_0": "rgInternal",
"vglobal_0": ["false"],
},
False,
True,
],
[
"test_poe_v2_available_person_properties_are_used_in_replay_listing",
False,
True,
True,
PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS,
{
"event_end_time": mock.ANY,
"event_names": [],
"event_start_time": mock.ANY,
"kglobal_0": "rgInternal",
"vglobal_0": ["false"],
},
False,
True,
],
]
)
def test_effect_of_poe_settings_on_query_generated(
self,
_name: str,
poe_v1: bool,
poe_v2: bool,
allow_denormalized_props: bool,
expected_poe_mode: PersonsOnEventsMode,
expected_query_params: dict,
unmaterialized_person_column_used: bool,
materialized_event_column_used: bool,
) -> None:
with self.settings(
PERSON_ON_EVENTS_OVERRIDE=poe_v1,
PERSON_ON_EVENTS_V2_OVERRIDE=poe_v2,
ALLOW_DENORMALIZED_PROPS_IN_LISTING=allow_denormalized_props,
):
assert self.team.person_on_events_mode == expected_poe_mode
materialize("events", "rgInternal", table_column="person_properties")
filter = SessionRecordingsFilter(
team=self.team,
data={
"properties": [
{
"key": "rgInternal",
"value": ["false"],
"operator": "exact",
"type": "person",
}
]
},
)
session_recording_list_instance = SessionRecordingListFromReplaySummary(filter=filter, team=self.team)
[generated_query, query_params] = session_recording_list_instance.get_query()
assert query_params == {
"clamped_to_storage_ttl": mock.ANY,
"end_time": mock.ANY,
"limit": 51,
"offset": 0,
"start_time": mock.ANY,
"team_id": self.team.id,
**expected_query_params,
}
json_extract_fragment = (
"has(%(vperson_filter_pre__0)s, replaceRegexpAll(JSONExtractRaw(properties, %(kperson_filter_pre__0)s)"
)
materialized_column_fragment = 'AND ( has(%(vglobal_0)s, "mat_pp_rgInternal"))'
# it will always have one of these fragments
assert (json_extract_fragment in generated_query) or (materialized_column_fragment in generated_query)
# the unmaterialized person column
assert (json_extract_fragment in generated_query) is unmaterialized_person_column_used
# materialized event column
assert (materialized_column_fragment in generated_query) is materialized_event_column_used
self.assertQueryMatchesSnapshot(generated_query)
settings_combinations = [
["poe v2 and materialized columns allowed", False, True, True],
["poe v2 and materialized columns off", False, True, False],
["poe off and materialized columns allowed", False, False, True],
["poe off and materialized columns not allowed", False, False, False],
["poe v1 and materialized columns allowed", True, False, True],
["poe v1 and not materialized columns not allowed", True, False, False],
]
# Options for "materialize person columns"
materialization_options = [
[" with materialization", True],
[" without materialization", False],
]
# Expand the parameter list to the product of all combinations with "materialize person columns"
# e.g. [a, b] x [c, d] = [a, c], [a, d], [b, c], [b, d]
test_case_combinations = [
[f"{name}{mat_option}", poe_v1, poe, mat_columns, mat_person]
for (name, poe_v1, poe, mat_columns), (mat_option, mat_person) in product(
settings_combinations, materialization_options
)
]
@parameterized.expand(test_case_combinations)
@snapshot_clickhouse_queries
def test_event_filter_with_person_properties_materialized(
self,
_name: str,
poe1_enabled: bool,
poe2_enabled: bool,
allow_denormalised_props: bool,
materialize_person_props: bool,
) -> None:
# KLUDGE: I couldn't figure out how to use @also_test_with_materialized_columns(person_properties=["email"])
# KLUDGE: and the parameterized.expand decorator at the same time, so we generate test case combos
# KLUDGE: for materialization on and off to test both sides the way the decorator would have
if materialize_person_props:
materialize("events", "email", table_column="person_properties")
materialize("person", "email")
with self.settings(
PERSON_ON_EVENTS_OVERRIDE=poe1_enabled,
PERSON_ON_EVENTS_V2_OVERRIDE=poe2_enabled,
ALLOW_DENORMALIZED_PROPS_IN_LISTING=allow_denormalised_props,
):
user_one = "test_event_filter_with_person_properties-user"
user_two = "test_event_filter_with_person_properties-user2"
session_id_one = f"test_event_filter_with_person_properties-1-{str(uuid4())}"
session_id_two = f"test_event_filter_with_person_properties-2-{str(uuid4())}"
Person.objects.create(team=self.team, distinct_ids=[user_one], properties={"email": "bla"})
Person.objects.create(team=self.team, distinct_ids=[user_two], properties={"email": "bla2"})
self._add_replay_with_pageview(session_id_one, user_one)
produce_replay_summary(
distinct_id=user_one,
session_id=session_id_one,
first_timestamp=(self.base_time + relativedelta(seconds=30)),
team_id=self.team.id,
)
self._add_replay_with_pageview(session_id_two, user_two)
produce_replay_summary(
distinct_id=user_two,
session_id=session_id_two,
first_timestamp=(self.base_time + relativedelta(seconds=30)),
team_id=self.team.id,
)
match_everyone_filter = SessionRecordingsFilter(
team=self.team,
data={"properties": []},
)
session_recording_list_instance = SessionRecordingListFromReplaySummary(
filter=match_everyone_filter, team=self.team
)
(session_recordings, _) = session_recording_list_instance.run()
assert sorted([x["session_id"] for x in session_recordings]) == sorted([session_id_one, session_id_two])
match_bla_filter = SessionRecordingsFilter(
team=self.team,
data={
"properties": [
{
"key": "email",
"value": ["bla"],
"operator": "exact",
"type": "person",
}
]
},
)
session_recording_list_instance = SessionRecordingListFromReplaySummary(
filter=match_bla_filter, team=self.team
)
(session_recordings, _) = session_recording_list_instance.run()
assert len(session_recordings) == 1
assert session_recordings[0]["session_id"] == session_id_one
def _add_replay_with_pageview(self, session_id: str, user_one):
self.create_event(
user_one,
self.base_time,
properties={"$session_id": session_id, "$window_id": str(uuid4())},
)
produce_replay_summary(
distinct_id=user_one,
session_id=session_id,
first_timestamp=self.base_time,
team_id=self.team.id,
)
@parameterized.expand(test_case_combinations)
@snapshot_clickhouse_queries
def test_person_id_filter(
self,
_name: str,
poe2_enabled: bool,
poe1_enabled: bool,
allow_denormalised_props: bool,
materialize_person_props: bool,
) -> None:
# KLUDGE: I couldn't figure out how to use @also_test_with_materialized_columns(person_properties=["email"])
# KLUDGE: and the parameterized.expand decorator at the same time, so we generate test case combos
# KLUDGE: for materialization on and off to test both sides the way the decorator would have
if materialize_person_props:
# it shouldn't matter to this test whether any column is materialized
# but let's keep the tests in this file similar so we flush out any unexpected interactions
materialize("events", "email", table_column="person_properties")
materialize("person", "email")
with self.settings(
PERSON_ON_EVENTS_OVERRIDE=poe1_enabled,
PERSON_ON_EVENTS_V2_OVERRIDE=poe2_enabled,
ALLOW_DENORMALIZED_PROPS_IN_LISTING=allow_denormalised_props,
):
three_user_ids = ["person-1-distinct-1", "person-1-distinct-2", "person-2"]
session_id_one = f"test_person_id_filter-session-one"
session_id_two = f"test_person_id_filter-session-two"
session_id_three = f"test_person_id_filter-session-three"
p = Person.objects.create(
team=self.team,
distinct_ids=[three_user_ids[0], three_user_ids[1]],
properties={"email": "bla"},
)
Person.objects.create(
team=self.team,
distinct_ids=[three_user_ids[2]],
properties={"email": "bla2"},
)
self._add_replay_with_pageview(session_id_one, three_user_ids[0])
self._add_replay_with_pageview(session_id_two, three_user_ids[1])
self._add_replay_with_pageview(session_id_three, three_user_ids[2])
filter = SessionRecordingsFilter(team=self.team, data={"person_uuid": str(p.uuid)})
session_recording_list_instance = SessionRecordingListFromReplaySummary(filter=filter, team=self.team)
(session_recordings, _) = session_recording_list_instance.run()
assert sorted([r["session_id"] for r in session_recordings]) == sorted([session_id_two, session_id_one])

View File

@ -194,7 +194,6 @@ export const FEATURE_FLAGS = {
HEATMAPS_UI: 'heatmaps-ui', // owner: @benjackwhite
THEME: 'theme', // owner: @aprilfools
PLUGINS_FILTERING: 'plugins-filtering', // owner: @benjackwhite
SESSION_REPLAY_HOG_QL_FILTERING: 'session-replay-hogql-filtering', // owner: #team-replay
INSIGHT_LOADING_BAR: 'insight-loading-bar', // owner: @aspicer
PROXY_AS_A_SERVICE: 'proxy-as-a-service', // owner: #team-infrastructure
LIVE_EVENTS: 'live-events', // owner: @zach or @jams

View File

@ -4,9 +4,7 @@ import { DateFilter } from 'lib/components/DateFilter/DateFilter'
import { PropertyFilters } from 'lib/components/PropertyFilters/PropertyFilters'
import { TaxonomicFilterGroupType } from 'lib/components/TaxonomicFilter/types'
import { TestAccountFilterSwitch } from 'lib/components/TestAccountFiltersSwitch'
import { FEATURE_FLAGS } from 'lib/constants'
import { LemonLabel } from 'lib/lemon-ui/LemonLabel/LemonLabel'
import { featureFlagLogic } from 'lib/logic/featureFlagLogic'
import { ActionFilter } from 'scenes/insights/filters/ActionFilter/ActionFilter'
import { MathAvailability } from 'scenes/insights/filters/ActionFilter/ActionFilterRow/ActionFilterRow'
import { defaultRecordingDurationFilter } from 'scenes/session-recordings/playlist/sessionRecordingsPlaylistLogic'
@ -74,8 +72,6 @@ export const AdvancedSessionRecordingsFilters = ({
}): JSX.Element => {
const { groupsTaxonomicTypes } = useValues(groupsModel)
const { featureFlags } = useValues(featureFlagLogic)
const allowedPropertyTaxonomyTypes = [
TaxonomicFilterGroupType.EventProperties,
TaxonomicFilterGroupType.EventFeatureFlags,
@ -84,16 +80,10 @@ export const AdvancedSessionRecordingsFilters = ({
...groupsTaxonomicTypes,
]
const hasHogQLFiltering = featureFlags[FEATURE_FLAGS.SESSION_REPLAY_HOG_QL_FILTERING]
if (hasHogQLFiltering) {
allowedPropertyTaxonomyTypes.push(TaxonomicFilterGroupType.SessionProperties)
}
allowedPropertyTaxonomyTypes.push(TaxonomicFilterGroupType.SessionProperties)
const addFilterTaxonomyTypes = [TaxonomicFilterGroupType.PersonProperties, TaxonomicFilterGroupType.Cohorts]
if (hasHogQLFiltering) {
addFilterTaxonomyTypes.push(TaxonomicFilterGroupType.SessionProperties)
}
addFilterTaxonomyTypes.push(TaxonomicFilterGroupType.SessionProperties)
return (
<div className="space-y-2 bg-light p-3">
@ -125,15 +115,9 @@ export const AdvancedSessionRecordingsFilters = ({
buttonProps={{ type: 'secondary', size: 'small' }}
/>
{hasHogQLFiltering ? (
<LemonLabel info="Show recordings by persons, cohorts, and more that match the set criteria">
Properties
</LemonLabel>
) : (
<LemonLabel info="Show recordings by persons who match the set criteria">
Persons and cohorts
</LemonLabel>
)}
<LemonLabel info="Show recordings by persons, cohorts, and more that match the set criteria">
Properties
</LemonLabel>
<TestAccountFilterSwitch
checked={filters.filter_test_accounts ?? false}
@ -144,7 +128,7 @@ export const AdvancedSessionRecordingsFilters = ({
{showPropertyFilters && (
<PropertyFilters
pageKey="session-recordings"
buttonText={hasHogQLFiltering ? 'Add filter' : 'Person or cohort'}
buttonText="Add filter"
taxonomicGroupTypes={addFilterTaxonomyTypes}
propertyFilters={filters.properties}
onChange={(properties) => {

View File

@ -287,7 +287,6 @@ export const sessionRecordingsPlaylistLogic = kea<sessionRecordingsPlaylistLogic
person_uuid: props.personUUID ?? '',
target_entity_order: values.orderBy,
limit: RECORDINGS_LIMIT,
hog_ql_filtering: values.useHogQLFiltering,
}
if (values.orderBy === 'start_time') {
@ -570,10 +569,6 @@ export const sessionRecordingsPlaylistLogic = kea<sessionRecordingsPlaylistLogic
},
})),
selectors({
useHogQLFiltering: [
(s) => [s.featureFlags],
(featureFlags) => !!featureFlags[FEATURE_FLAGS.SESSION_REPLAY_HOG_QL_FILTERING],
],
useUniversalFiltering: [
(s) => [s.featureFlags],
(featureFlags) => !!featureFlags[FEATURE_FLAGS.SESSION_REPLAY_UNIVERSAL_FILTERS],

View File

@ -378,10 +378,6 @@ posthog/tasks/exports/test/test_export_utils.py:0: error: Function is missing a
posthog/tasks/exports/test/test_csv_exporter_url_sanitising.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
posthog/tasks/exports/test/test_csv_exporter_url_sanitising.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
posthog/tasks/exports/test/test_csv_exporter_renders.py:0: error: Function is missing a type annotation [no-untyped-def]
posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: error: Missing return statement [empty-body]
posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: note: If the method is meant to be abstract, use @abc.abstractmethod
posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: error: Missing return statement [empty-body]
posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: note: If the method is meant to be abstract, use @abc.abstractmethod
posthog/hogql_queries/test/test_query_runner.py:0: error: Variable "TestQueryRunner" is not valid as a type [valid-type]
posthog/hogql_queries/test/test_query_runner.py:0: note: See https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases
posthog/hogql_queries/test/test_query_runner.py:0: error: Invalid base class "TestQueryRunner" [misc]

View File

@ -62,7 +62,8 @@
"mobile-replay:web:schema:build:json": "ts-json-schema-generator -f tsconfig.json --path 'node_modules/@rrweb/types/dist/index.d.ts' --type 'eventWithTime' --expose all --no-top-ref --out ee/frontend/mobile-replay/schema/web/rr-web-schema.json && prettier --write ee/frontend/mobile-replay/schema/web/rr-web-schema.json",
"mobile-replay:mobile:schema:build:json": "ts-json-schema-generator -f tsconfig.json --path 'ee/frontend/mobile-replay/mobile.types.ts' --type 'mobileEventWithTime' --expose all --no-top-ref --out ee/frontend/mobile-replay/schema/mobile/rr-mobile-schema.json && prettier --write ee/frontend/mobile-replay/schema/mobile/rr-mobile-schema.json",
"mobile-replay:schema:build:json": "pnpm mobile-replay:web:schema:build:json && pnpm mobile-replay:mobile:schema:build:json",
"visualize-toolbar-bundle": "pnpm exec esbuild-visualizer --metadata ./toolbar-esbuild-meta.json --filename=toolbar-esbuild-bundle-visualization.html"
"visualize-toolbar-bundle": "pnpm exec esbuild-visualizer --metadata ./toolbar-esbuild-meta.json --filename=toolbar-esbuild-bundle-visualization.html",
"mypy-baseline-sync": "mypy -p posthog | mypy-baseline sync"
},
"dependencies": {
"@ant-design/icons": "^4.7.0",

View File

@ -2,9 +2,9 @@ from collections import Counter
from typing import Literal, Optional
from collections import Counter as TCounter
from posthog.constants import AUTOCAPTURE_EVENT, TREND_FILTER_TYPE_ACTIONS
from posthog.constants import AUTOCAPTURE_EVENT
from posthog.hogql.hogql import HogQLContext
from posthog.models import Entity, Filter
from posthog.models import Filter
from posthog.models.action import Action
from posthog.models.action.action import ActionStepJSON
from posthog.models.property import Property, PropertyIdentifier
@ -149,35 +149,6 @@ def filter_event(
return conditions, params
def format_entity_filter(
team_id: int,
entity: Entity,
hogql_context: HogQLContext,
person_id_joined_alias: str,
prepend: str = "action",
filter_by_team=True,
) -> tuple[str, dict]:
if entity.type == TREND_FILTER_TYPE_ACTIONS:
action = entity.get_action()
entity_filter, params = format_action_filter(
team_id=team_id,
action=action,
prepend=prepend,
filter_by_team=filter_by_team,
person_id_joined_alias=person_id_joined_alias,
hogql_context=hogql_context,
)
elif entity.id is None:
entity_filter = "1 = 1"
params = {}
else:
key = f"{prepend}_event"
entity_filter = f"event = %({key})s"
params = {key: entity.id}
return entity_filter, params
def get_action_tables_and_properties(action: Action) -> TCounter[PropertyIdentifier]:
from posthog.models.property.util import extract_tables_and_properties

View File

@ -6,6 +6,7 @@ from posthog.hogql import ast
from posthog.hogql.ast import Constant, CompareOperation
from posthog.hogql.parser import parse_select
from posthog.hogql.property import entity_to_expr, property_to_expr
from posthog.hogql.query import execute_hogql_query
from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator
from posthog.models import Team, Property
from posthog.models.filters.session_recordings_filter import SessionRecordingsFilter
@ -152,7 +153,8 @@ class SessionRecordingListFromFilters:
if person_id_compare_operation:
exprs.append(person_id_compare_operation)
if self._filter.session_ids:
# we check for session_ids type not for truthiness since we want to allow empty lists
if isinstance(self._filter.session_ids, list):
exprs.append(
ast.CompareOperation(
op=ast.CompareOperationOp.In,
@ -181,7 +183,7 @@ class SessionRecordingListFromFilters:
optional_exprs: list[ast.Expr] = []
# if in PoE mode then we should be pushing person property queries into here
events_sub_query = EventsSubQuery(self._team, self._filter, self.ttl_days).get_query()
events_sub_query = ReplayFiltersEventsSubQuery(self._team, self._filter).get_query_for_session_id_matching()
if events_sub_query:
optional_exprs.append(
ast.CompareOperation(
@ -193,7 +195,7 @@ class SessionRecordingListFromFilters:
# we want to avoid a join to persons since we don't ever need to select from them,
# so we create our own persons sub query here
# if PoE mode is on then this will be handled in the events subquery and we don't need to do anything here
# if PoE mode is on then this will be handled in the events subquery, and we don't need to do anything here
person_subquery = PersonsPropertiesSubQuery(self._team, self._filter, self.ttl_days).get_query()
if person_subquery:
optional_exprs.append(
@ -426,15 +428,23 @@ class PersonsIdCompareOperation:
)
class EventsSubQuery:
class ReplayFiltersEventsSubQuery:
_team: Team
_filter: SessionRecordingsFilter
_ttl_days: int
def __init__(self, team: Team, filter: SessionRecordingsFilter, ttl_days: int):
@property
def ttl_days(self):
return ttl_days(self._team)
def __init__(
self,
team: Team,
filter: SessionRecordingsFilter,
hogql_query_modifiers: Optional[HogQLQueryModifiers] = None,
):
self._team = team
self._filter = filter
self._ttl_days = ttl_days
self._hogql_query_modifiers = hogql_query_modifiers
@cached_property
def _event_predicates(self):
@ -459,19 +469,43 @@ class EventsSubQuery:
return event_exprs, list(event_names)
def get_query(self) -> ast.SelectQuery | ast.SelectUnionQuery | None:
def _select_from_events(self, select_expr: ast.Expr) -> ast.SelectQuery:
return ast.SelectQuery(
select=[select_expr],
select_from=ast.JoinExpr(table=ast.Field(chain=["events"])),
where=self._where_predicates(),
having=self._having_predicates(),
group_by=[ast.Field(chain=["$session_id"])],
)
def get_query_for_session_id_matching(self) -> ast.SelectQuery | ast.SelectUnionQuery | None:
use_poe = poe_is_active(self._team) and self.person_properties
if self._filter.entities or self.event_properties or use_poe:
return ast.SelectQuery(
select=[ast.Alias(alias="session_id", expr=ast.Field(chain=["$session_id"]))],
select_from=ast.JoinExpr(table=ast.Field(chain=["events"])),
where=self._where_predicates(),
having=self._having_predicates(),
group_by=[ast.Field(chain=["$session_id"])],
)
return self._select_from_events(ast.Alias(alias="session_id", expr=ast.Field(chain=["$session_id"])))
else:
return None
def get_query_for_event_id_matching(self) -> ast.SelectQuery | ast.SelectUnionQuery:
return self._select_from_events(ast.Call(name="groupUniqArray", args=[ast.Field(chain=["uuid"])]))
def get_event_ids_for_session(self) -> SessionRecordingQueryResult:
query = self.get_query_for_event_id_matching()
hogql_query_response = execute_hogql_query(
query=query,
team=self._team,
query_type="SessionRecordingMatchingEventsForSessionQuery",
modifiers=self._hogql_query_modifiers,
)
flattened_results = [str(uuid) for row in hogql_query_response.results for uuid in row[0]]
return SessionRecordingQueryResult(
results=flattened_results,
has_more_recording=False,
timings=hogql_query_response.timings,
)
def _where_predicates(self) -> ast.Expr:
exprs: list[ast.Expr] = [
ast.Call(
@ -482,7 +516,7 @@ class EventsSubQuery:
ast.CompareOperation(
op=ast.CompareOperationOp.GtEq,
left=ast.Field(chain=["timestamp"]),
right=ast.Constant(value=datetime.now() - timedelta(days=self._ttl_days)),
right=ast.Constant(value=datetime.now() - timedelta(days=self.ttl_days)),
),
ast.CompareOperation(
op=ast.CompareOperationOp.LtEq,

View File

@ -1,785 +0,0 @@
import dataclasses
import re
from datetime import datetime, timedelta
from typing import Any, Literal, NamedTuple, Union
from django.conf import settings
from sentry_sdk import capture_exception
from posthog.client import sync_execute
from posthog.constants import TREND_FILTER_TYPE_ACTIONS, PropertyOperatorType
from posthog.models import Entity, Team
from posthog.models.action.util import format_entity_filter
from posthog.models.filters.mixins.utils import cached_property
from posthog.models.filters.session_recordings_filter import SessionRecordingsFilter
from posthog.models.property import PropertyGroup
from posthog.models.property.util import parse_prop_grouped_clauses
from posthog.models.team import PersonsOnEventsMode
from posthog.queries.event_query import EventQuery
from posthog.queries.util import PersonPropertiesMode
from posthog.session_recordings.queries.session_replay_events import ttl_days
@dataclasses.dataclass(frozen=True)
class SummaryEventFiltersSQL:
having_conditions: str
having_select: str
where_conditions: str
params: dict[str, Any]
class SessionRecordingQueryResult(NamedTuple):
results: list
has_more_recording: bool
def _get_recording_start_time_clause(recording_filters: SessionRecordingsFilter) -> tuple[str, dict[str, Any]]:
start_time_clause = ""
start_time_params = {}
if recording_filters.date_from:
start_time_clause += "\nAND start_time >= %(start_time)s"
start_time_params["start_time"] = recording_filters.date_from
if recording_filters.date_to:
start_time_clause += "\nAND start_time <= %(end_time)s"
start_time_params["end_time"] = recording_filters.date_to
return start_time_clause, start_time_params
def _get_order_by_clause(filter_order: str | None) -> str:
order_by = filter_order or "start_time"
return f"ORDER BY {order_by} DESC"
def _get_filter_by_log_text_session_ids_clause(
team: Team, recording_filters: SessionRecordingsFilter, column_name="session_id"
) -> tuple[str, dict[str, Any]]:
if not recording_filters.console_search_query:
return "", {}
log_query, log_params = LogQuery(team=team, filter=recording_filters).get_query()
# we return this _even_ if there are no matching ids since if there are no matching ids
# then no sessions can match...
# sorted so that snapshots are consistent
return f'AND "{column_name}" in ({log_query}) as log_text_matching', log_params
def _get_filter_by_provided_session_ids_clause(
recording_filters: SessionRecordingsFilter, column_name="session_id"
) -> tuple[str, dict[str, Any]]:
if recording_filters.session_ids is None:
return "", {}
return f'AND "{column_name}" in %(session_ids)s', {"session_ids": recording_filters.session_ids}
class LogQuery:
_filter: SessionRecordingsFilter
_team_id: int
_team: Team
def __init__(
self,
team: Team,
filter: SessionRecordingsFilter,
):
self._filter = filter
self._team = team
self._team_id = team.pk
_rawQuery = """
SELECT distinct log_source_id as session_id
FROM log_entries
PREWHERE team_id = %(team_id)s
-- regardless of what other filters are applied
-- limit by storage TTL
AND timestamp >= %(clamped_to_storage_ttl)s
-- make sure we don't get the occasional unexpected future event
AND timestamp <= now()
-- and then any time filter for the events query
{events_timestamp_clause}
WHERE 1=1
{console_log_clause}
AND positionCaseInsensitive(message, %(console_search_query)s) > 0
"""
@property
def ttl_days(self):
return ttl_days(self._team)
@cached_property
def _get_events_timestamp_clause(self) -> tuple[str, dict[str, Any]]:
timestamp_clause = ""
timestamp_params = {}
if self._filter.date_from:
timestamp_clause += "\nAND timestamp >= %(event_start_time)s"
timestamp_params["event_start_time"] = self._filter.date_from - timedelta(minutes=2)
if self._filter.date_to:
timestamp_clause += "\nAND timestamp <= %(event_end_time)s"
timestamp_params["event_end_time"] = self._filter.date_to
return timestamp_clause, timestamp_params
@staticmethod
def _get_console_log_clause(
console_logs_filter: list[Literal["error", "warn", "info"]],
) -> tuple[str, dict[str, Any]]:
return (
(
f"AND level in %(console_logs_levels)s",
{"console_logs_levels": console_logs_filter},
)
if console_logs_filter
else ("", {})
)
def get_query(self) -> tuple[str, dict]:
if not self._filter.console_search_query:
return "", {}
(
events_timestamp_clause,
events_timestamp_params,
) = self._get_events_timestamp_clause
console_log_clause, console_log_params = self._get_console_log_clause(self._filter.console_logs_filter)
return self._rawQuery.format(
events_timestamp_clause=events_timestamp_clause,
console_log_clause=console_log_clause,
), {
"team_id": self._team_id,
"clamped_to_storage_ttl": (datetime.now() - timedelta(days=self.ttl_days)),
"console_search_query": self._filter.console_search_query,
**events_timestamp_params,
**console_log_params,
}
class ActorsQuery(EventQuery):
_filter: SessionRecordingsFilter
def __init__(
self,
team: Team,
**kwargs,
):
person_on_events_mode = team.person_on_events_mode
super().__init__(
**kwargs,
team=team,
person_on_events_mode=person_on_events_mode,
)
# we have to implement this from EventQuery but don't need it
def _determine_should_join_distinct_ids(self) -> None:
pass
# we have to implement this from EventQuery but don't need it
def _data_to_return(self, results: list[Any]) -> list[dict[str, Any]]:
pass
_raw_persons_query = """
SELECT distinct_id, argMax(person_id, version) as current_person_id
{select_person_props}
FROM person_distinct_id2 as pdi
{filter_persons_clause}
WHERE team_id = %(team_id)s
{prop_filter_clause}
{all_distinct_ids_that_might_match_a_person}
GROUP BY distinct_id
HAVING
argMax(is_deleted, version) = 0
{prop_having_clause}
{filter_by_person_uuid_condition}
"""
def get_query(self) -> tuple[str, dict[str, Any]]:
# we don't support PoE V1 - hopefully that's ok
if self._person_on_events_mode == PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS:
return "", {}
prop_query, prop_params = self._get_prop_groups(
PropertyGroup(
type=PropertyOperatorType.AND,
values=[g for g in self._filter.property_groups.flat if g.type == "person" or "cohort" in g.type],
),
person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id",
)
# hogql person props queries rely on an aggregated column and so have to go in the having clause
# not the where clause
having_prop_query, having_prop_params = self._get_prop_groups(
PropertyGroup(
type=PropertyOperatorType.AND,
values=[
g for g in self._filter.property_groups.flat if g.type == "hogql" and "person.properties" in g.key
],
),
person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.current_person_id",
)
person_query, person_query_params = self._get_person_query()
should_join_persons = self._filter.person_uuid or person_query
if not should_join_persons:
return "", {}
else:
filter_persons_clause = person_query or ""
filter_by_person_uuid_condition = (
"and current_person_id = %(person_uuid)s" if self._filter.person_uuid else ""
)
all_distinct_ids_that_might_match_a_person = (
"""
AND distinct_id IN (
SELECT distinct_id
FROM person_distinct_id2
WHERE team_id = %(team_id)s
AND person_id = %(person_uuid)s) as all_distinct_ids_that_might_match_a_person
"""
if self._filter.person_uuid
else ""
)
return self._raw_persons_query.format(
filter_persons_clause=filter_persons_clause,
select_person_props=(
", argMax(person_props, version) as person_props" if "person_props" in filter_persons_clause else ""
),
prop_filter_clause=prop_query,
prop_having_clause=having_prop_query,
filter_by_person_uuid_condition=filter_by_person_uuid_condition,
all_distinct_ids_that_might_match_a_person=all_distinct_ids_that_might_match_a_person,
), {
"team_id": self._team_id,
**person_query_params,
"person_uuid": self._filter.person_uuid,
**prop_params,
**having_prop_params,
}
class SessionIdEventsQuery(EventQuery):
_filter: SessionRecordingsFilter
def __init__(
self,
team: Team,
**kwargs,
):
person_on_events_mode = team.person_on_events_mode
super().__init__(
**kwargs,
team=team,
person_on_events_mode=person_on_events_mode,
)
# we have to implement this from EventQuery but don't need it
def _determine_should_join_distinct_ids(self) -> None:
pass
# we have to implement this from EventQuery but don't need it
def _data_to_return(self, results: list[Any]) -> list[dict[str, Any]]:
pass
def _determine_should_join_events(self):
filters_by_event_or_action = self._filter.entities and len(self._filter.entities) > 0
# for e.g. test account filters might have event properties without having an event or action filter
has_event_property_filters = (
len(
[
pg
for pg in self._filter.property_groups.flat
# match when it is an event property filter
# or if its hogql and the key contains "properties." but not "person.properties."
# it's ok to match if there's both "properties." and "person.properties." in the key
# but not when its only "person.properties."
if pg.type == "event" or pg.type == "hogql" and re.search(r"(?<!person\.)properties\.", pg.key)
]
)
> 0
)
has_poe_filters = (
self._person_on_events_mode == PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS
and len(
[
pg
for pg in self._filter.property_groups.flat
if pg.type == "person" or (pg.type == "hogql" and "person.properties" in pg.key)
]
)
> 0
)
has_poe_person_filter = (
self._person_on_events_mode == PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS
and self._filter.person_uuid
)
return filters_by_event_or_action or has_event_property_filters or has_poe_filters or has_poe_person_filter
@property
def ttl_days(self):
return ttl_days(self._team)
_raw_events_query = """
{context_comment}
SELECT
{select_event_ids}
{event_filter_having_events_select}
`$session_id`
FROM events e
{groups_query}
-- sometimes we have to join on persons so we can access e.g. person_props in filters
{persons_join}
PREWHERE
team_id = %(team_id)s
-- regardless of what other filters are applied
-- limit by storage TTL
AND e.timestamp >= %(clamped_to_storage_ttl)s
-- make sure we don't get the occasional unexpected future event
AND e.timestamp <= now()
-- and then any time filter for the events query
{events_timestamp_clause}
WHERE
notEmpty(`$session_id`)
{event_filter_where_conditions}
{prop_filter_clause}
{provided_session_ids_clause}
-- other times we can check distinct id against a sub query which should be faster than joining
{persons_sub_query}
GROUP BY `$session_id`
HAVING 1=1 {event_filter_having_events_condition}
"""
def format_event_filter(self, entity: Entity, prepend: str, team_id: int) -> tuple[str, dict[str, Any]]:
filter_sql, params = format_entity_filter(
team_id=team_id,
entity=entity,
prepend=prepend,
filter_by_team=False,
person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id",
hogql_context=self._filter.hogql_context,
)
filters, filter_params = parse_prop_grouped_clauses(
team_id=team_id,
property_group=entity.property_groups,
prepend=prepend,
allow_denormalized_props=True,
has_person_id_joined=True,
person_properties_mode=(
PersonPropertiesMode.DIRECT_ON_EVENTS_WITH_POE_V2
if self._person_on_events_mode == PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS
else PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN
),
hogql_context=self._filter.hogql_context,
)
filter_sql += f" {filters}"
params = {**params, **filter_params}
return filter_sql, params
@cached_property
def build_event_filters(self) -> SummaryEventFiltersSQL:
event_names_to_filter: list[Union[int, str]] = []
params: dict = {}
condition_sql = ""
for index, entity in enumerate(self._filter.entities):
if entity.type == TREND_FILTER_TYPE_ACTIONS:
action = entity.get_action()
# NOTE: Do we need a short circuit here for "none" - i.e. all events?
event_names_to_filter.extend(
[ae for ae in action.get_step_events() if ae and ae not in event_names_to_filter]
)
else:
if entity.id and entity.id not in event_names_to_filter:
event_names_to_filter.append(entity.id)
(
this_entity_condition_sql,
this_entity_filter_params,
) = self.format_event_filter(entity, prepend=f"event_matcher_{index}", team_id=self._team_id)
joining = "OR" if index > 0 else ""
condition_sql += f"{joining} {this_entity_condition_sql}"
# wrap in smooths to constrain the scope of the OR
condition_sql = f"( {condition_sql} )"
params = {**params, **this_entity_filter_params}
params = {**params, "event_names": list(event_names_to_filter)}
if len(event_names_to_filter) == 0:
# using "All events"
having_conditions = ""
having_select = ""
else:
having_conditions = "AND hasAll(event_names, %(event_names)s)"
having_select = """
-- select the unique events in this session to support filtering sessions by presence of an event
groupUniqArray(event) as event_names,"""
if self._person_on_events_mode == PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS:
person_id_clause, person_id_params = self._get_person_id_clause
condition_sql += person_id_clause
params = {**params, **person_id_params}
condition_sql = (
f" AND {condition_sql}" if condition_sql and not condition_sql.startswith("AND") else condition_sql
)
return SummaryEventFiltersSQL(
having_conditions=having_conditions,
having_select=having_select,
where_conditions=f"{condition_sql}" if condition_sql else "",
params=params,
)
def _get_groups_query(self) -> tuple[str, dict]:
try:
from ee.clickhouse.queries.groups_join_query import GroupsJoinQuery
except ImportError:
# if EE not available then we use a no-op version
from posthog.queries.groups_join_query import GroupsJoinQuery
return GroupsJoinQuery(
self._filter,
self._team_id,
self._column_optimizer,
person_on_events_mode=self._person_on_events_mode,
).get_join_query()
# We want to select events beyond the range of the recording to handle the case where
# a recording spans the time boundaries
@cached_property
def _get_events_timestamp_clause(self) -> tuple[str, dict[str, Any]]:
timestamp_clause = ""
timestamp_params = {}
if self._filter.date_from:
timestamp_clause += "\nAND timestamp >= %(event_start_time)s"
timestamp_params["event_start_time"] = self._filter.date_from - timedelta(hours=12)
if self._filter.date_to:
timestamp_clause += "\nAND timestamp <= %(event_end_time)s"
timestamp_params["event_end_time"] = self._filter.date_to + timedelta(hours=12)
return timestamp_clause, timestamp_params
def get_query(self, select_event_ids: bool = False) -> tuple[str, dict[str, Any]]:
if not self._determine_should_join_events():
return "", {}
base_params = {
"team_id": self._team_id,
"clamped_to_storage_ttl": (datetime.now() - timedelta(days=self.ttl_days)),
}
_, recording_start_time_params = _get_recording_start_time_clause(self._filter)
(
provided_session_ids_clause,
provided_session_ids_params,
) = _get_filter_by_provided_session_ids_clause(recording_filters=self._filter, column_name="$session_id")
event_filters = self.build_event_filters
event_filters_params = event_filters.params
(
events_timestamp_clause,
events_timestamp_params,
) = self._get_events_timestamp_clause
groups_query, groups_params = self._get_groups_query()
# these will be applied to the events table,
# so we only want property filters that make sense in that context
prop_query, prop_params = self._get_prop_groups(
PropertyGroup(
type=PropertyOperatorType.AND,
values=[
g
for g in self._filter.property_groups.flat
if (
self._person_on_events_mode == PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS
and g.type == "person"
)
or (
(g.type == "hogql" and "person.properties" not in g.key)
or (g.type != "hogql" and "cohort" not in g.type and g.type != "person")
)
],
),
person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id",
# TRICKY: we saw unusual memory usage behavior in EU clickhouse cluster
# when allowing use of denormalized properties in this query
# it is likely this can be returned to the default of True in future
# but would need careful monitoring
allow_denormalized_props=settings.ALLOW_DENORMALIZED_PROPS_IN_LISTING,
person_properties_mode=(
PersonPropertiesMode.DIRECT_ON_EVENTS_WITH_POE_V2
if self._person_on_events_mode == PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS
else PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN
),
)
(
persons_join,
persons_select_params,
persons_sub_query,
) = self._persons_join_or_subquery(event_filters, prop_query)
return (
self._raw_events_query.format(
select_event_ids="groupArray(uuid) as event_ids," if select_event_ids else "",
event_filter_where_conditions=event_filters.where_conditions,
event_filter_having_events_condition=event_filters.having_conditions,
event_filter_having_events_select=event_filters.having_select,
events_timestamp_clause=events_timestamp_clause,
prop_filter_clause=prop_query,
provided_session_ids_clause=provided_session_ids_clause,
persons_join=persons_join,
persons_sub_query=persons_sub_query,
groups_query=groups_query,
context_comment=f"-- running in PoE Mode: {self._person_on_events_mode}",
),
{
**base_params,
**recording_start_time_params,
**provided_session_ids_params,
**events_timestamp_params,
**event_filters_params,
**prop_params,
**persons_select_params,
**groups_params,
},
)
def _persons_join_or_subquery(self, event_filters, prop_query):
persons_select, persons_select_params = ActorsQuery(filter=self._filter, team=self._team).get_query()
persons_join = ""
persons_sub_query = ""
if persons_select:
# we want to join as infrequently as possible so only join if there are filters that expect it
if (
"person_props" in prop_query
or "pdi.person_id" in prop_query
or "person_props" in event_filters.where_conditions
):
persons_join = f"JOIN ({persons_select}) as pdi on pdi.distinct_id = e.distinct_id"
else:
persons_sub_query = (
f"AND e.distinct_id in (select distinct_id from ({persons_select}) as events_persons_sub_query)"
)
return persons_join, persons_select_params, persons_sub_query
@cached_property
def _get_person_id_clause(self) -> tuple[str, dict[str, Any]]:
person_id_clause = ""
person_id_params = {}
if self._filter.person_uuid:
person_id_clause = "AND person_id = %(person_uuid)s"
person_id_params = {"person_uuid": self._filter.person_uuid}
return person_id_clause, person_id_params
def matching_events(self) -> list[str]:
self._filter.hogql_context.modifiers.personsOnEventsMode = self._person_on_events_mode
query, query_params = self.get_query(select_event_ids=True)
query_results = sync_execute(query, {**query_params, **self._filter.hogql_context.values})
results = [row[0] for row in query_results]
# flatten and return results
return [item for sublist in results for item in sublist]
class SessionRecordingListFromReplaySummary(EventQuery):
# we have to implement this from EventQuery but don't need it
def _determine_should_join_distinct_ids(self) -> None:
pass
_filter: SessionRecordingsFilter
SESSION_RECORDINGS_DEFAULT_LIMIT = 50
def __init__(
self,
team=Team,
**kwargs,
):
person_on_events_mode = team.person_on_events_mode
super().__init__(
**kwargs,
team=team,
person_on_events_mode=person_on_events_mode,
)
@property
def ttl_days(self):
return ttl_days(self._team)
_session_recordings_query: str = """
{context_comment}
SELECT
s.session_id,
any(s.team_id),
any(s.distinct_id),
min(s.min_first_timestamp) as start_time,
max(s.max_last_timestamp) as end_time,
dateDiff('SECOND', start_time, end_time) as duration,
argMinMerge(s.first_url) as first_url,
sum(s.click_count),
sum(s.keypress_count),
sum(s.mouse_activity_count),
sum(s.active_milliseconds)/1000 as active_seconds,
duration-active_seconds as inactive_seconds,
sum(s.console_log_count) as console_log_count,
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
WHERE s.team_id = %(team_id)s
-- regardless of what other filters are applied
-- limit by storage TTL
AND s.min_first_timestamp >= %(clamped_to_storage_ttl)s
-- we can filter on the pre-aggregated timestamp columns
-- because any not-the-lowest min value is _more_ greater than the min value
-- and any not-the-highest max value is _less_ lower than the max value
AND s.min_first_timestamp >= %(start_time)s
AND s.min_first_timestamp <= %(end_time)s
{persons_sub_query}
{events_sub_query}
{provided_session_ids_clause}
{log_matching_session_ids_clause}
GROUP BY session_id
HAVING 1=1 {duration_clause} {console_log_clause}
{order_by_clause}
LIMIT %(limit)s OFFSET %(offset)s
"""
@staticmethod
def _data_to_return(results: list[Any]) -> list[dict[str, Any]]:
default_columns = [
"session_id",
"team_id",
"distinct_id",
"start_time",
"end_time",
"duration",
"first_url",
"click_count",
"keypress_count",
"mouse_activity_count",
"active_seconds",
"inactive_seconds",
"console_log_count",
"console_warn_count",
"console_error_count",
]
return [
{
**dict(zip(default_columns, row[: len(default_columns)])),
}
for row in results
]
def _paginate_results(self, session_recordings) -> SessionRecordingQueryResult:
more_recordings_available = False
if len(session_recordings) > self.limit:
more_recordings_available = True
session_recordings = session_recordings[0 : self.limit]
return SessionRecordingQueryResult(session_recordings, more_recordings_available)
def run(self) -> SessionRecordingQueryResult:
try:
self._filter.hogql_context.modifiers.personsOnEventsMode = self._person_on_events_mode
query, query_params = self.get_query()
query_results = sync_execute(query, {**query_params, **self._filter.hogql_context.values})
session_recordings = self._data_to_return(query_results)
return self._paginate_results(session_recordings)
except Exception as ex:
# error here weren't making it to sentry, let's be explicit
capture_exception(ex, tags={"team_id": self._team.pk})
raise
@property
def limit(self):
return self._filter.limit or self.SESSION_RECORDINGS_DEFAULT_LIMIT
def get_query(self) -> tuple[str, dict[str, Any]]:
offset = self._filter.offset or 0
base_params = {
"team_id": self._team_id,
"limit": self.limit + 1,
"offset": offset,
"clamped_to_storage_ttl": (datetime.now() - timedelta(days=self.ttl_days)),
}
_, recording_start_time_params = _get_recording_start_time_clause(self._filter)
(
provided_session_ids_clause,
provided_session_ids_params,
) = _get_filter_by_provided_session_ids_clause(recording_filters=self._filter)
(
log_matching_session_ids_clause,
log_matching_session_ids_params,
) = _get_filter_by_log_text_session_ids_clause(team=self._team, recording_filters=self._filter)
order_by_clause = _get_order_by_clause(self._filter.target_entity_order)
duration_clause, duration_params = self.duration_clause(self._filter.duration_type_filter)
console_log_clause = self._get_console_log_clause(self._filter.console_logs_filter)
events_select, events_join_params = SessionIdEventsQuery(
team=self._team,
filter=self._filter,
).get_query()
if events_select:
events_select = f"AND s.session_id in (select `$session_id` as session_id from ({events_select}) as session_events_sub_query)"
persons_select, persons_select_params = ActorsQuery(filter=self._filter, team=self._team).get_query()
if persons_select:
persons_select = (
f"AND s.distinct_id in (select distinct_id from ({persons_select}) as session_persons_sub_query)"
)
return (
self._session_recordings_query.format(
duration_clause=duration_clause,
provided_session_ids_clause=provided_session_ids_clause,
console_log_clause=console_log_clause,
persons_sub_query=persons_select,
events_sub_query=events_select,
log_matching_session_ids_clause=log_matching_session_ids_clause,
order_by_clause=order_by_clause,
context_comment=f"-- running in PoE Mode: {self._person_on_events_mode}",
),
{
**base_params,
**events_join_params,
**recording_start_time_params,
**duration_params,
**provided_session_ids_params,
**persons_select_params,
**log_matching_session_ids_params,
},
)
def duration_clause(
self,
duration_filter_type: Literal["duration", "active_seconds", "inactive_seconds"],
) -> tuple[str, dict[str, Any]]:
duration_clause = ""
duration_params = {}
if self._filter.recording_duration_filter:
if self._filter.recording_duration_filter.operator == "gt":
operator = ">"
else:
operator = "<"
duration_clause = "\nAND {duration_type} {operator} %(recording_duration)s".format(
duration_type=duration_filter_type, operator=operator
)
duration_params = {
"recording_duration": self._filter.recording_duration_filter.value,
}
return duration_clause, duration_params
@staticmethod
def _get_console_log_clause(console_logs_filter: list[Literal["error", "warn", "info"]]) -> str:
# to avoid a CH migration we map from info to log when constructing the query here
filters = [f"console_{'log' if log == 'info' else log}_count > 0" for log in console_logs_filter]
return f"AND ({' OR '.join(filters)})" if filters else ""

View File

@ -37,12 +37,9 @@ from posthog.session_recordings.models.session_recording_event import (
SessionRecordingViewed,
)
from posthog.session_recordings.queries.session_recording_list_from_replay_summary import (
SessionRecordingListFromReplaySummary,
SessionIdEventsQuery,
)
from posthog.session_recordings.queries.session_recording_list_from_filters import (
SessionRecordingListFromFilters,
ReplayFiltersEventsSubQuery,
)
from posthog.session_recordings.queries.session_recording_properties import (
SessionRecordingProperties,
@ -302,8 +299,21 @@ class SessionRecordingViewSet(TeamAndOrgViewSetMixin, viewsets.GenericViewSet):
"Must specify at least one event or action filter",
)
matching_events: list[str] = SessionIdEventsQuery(filter=filter, team=self.team).matching_events()
return JsonResponse(data={"results": matching_events})
distinct_id = str(cast(User, request.user).distinct_id)
modifiers = safely_read_modifiers_overrides(distinct_id, self.team)
matching_events_query_response = ReplayFiltersEventsSubQuery(
filter=filter, team=self.team, hogql_query_modifiers=modifiers
).get_event_ids_for_session()
response = JsonResponse(data={"results": matching_events_query_response.results})
response.headers["Server-Timing"] = ", ".join(
f"{key};dur={round(duration, ndigits=2)}"
for key, duration in _generate_timings(
matching_events_query_response.timings, ServerTimingsGathered()
).items()
)
return response
# Returns metadata about the recording
def retrieve(self, request: request.Request, *args: Any, **kwargs: Any) -> Response:
@ -756,23 +766,13 @@ def list_recordings(
filter = filter.shallow_clone({SESSION_RECORDINGS_FILTER_IDS: remaining_session_ids})
if (all_session_ids and filter.session_ids) or not all_session_ids:
has_hog_ql_filtering = request.GET.get("hog_ql_filtering", "false") == "true"
distinct_id = str(cast(User, request.user).distinct_id)
modifiers = safely_read_modifiers_overrides(distinct_id, team)
if has_hog_ql_filtering:
distinct_id = str(cast(User, request.user).distinct_id)
modifiers = safely_read_modifiers_overrides(distinct_id, team)
with timer("load_recordings_from_hogql"):
(ch_session_recordings, more_recordings_available, hogql_timings) = SessionRecordingListFromFilters(
filter=filter, team=team, hogql_query_modifiers=modifiers
).run()
else:
# Only go to clickhouse if we still have remaining specified IDs, or we are not specifying IDs
with timer("load_recordings_from_clickhouse"):
(
ch_session_recordings,
more_recordings_available,
) = SessionRecordingListFromReplaySummary(filter=filter, team=team).run()
with timer("load_recordings_from_hogql"):
(ch_session_recordings, more_recordings_available, hogql_timings) = SessionRecordingListFromFilters(
filter=filter, team=team, hogql_query_modifiers=modifiers
).run()
with timer("build_recordings"):
recordings_from_clickhouse = SessionRecording.get_or_build_from_clickhouse(team, ch_session_recordings)

View File

@ -167,7 +167,7 @@ class TestSessionRecordings(APIBaseTest, ClickhouseTestMixin, QueryMatchingTest)
assert results_[0]["distinct_id"] == "user2"
assert results_[1]["distinct_id"] in twelve_distinct_ids
@patch("posthog.session_recordings.session_recording_api.SessionRecordingListFromReplaySummary")
@patch("posthog.session_recordings.session_recording_api.SessionRecordingListFromFilters")
def test_console_log_filters_are_correctly_passed_to_listing(self, mock_summary_lister):
mock_summary_lister.return_value.run.return_value = ([], False)