mirror of
https://github.com/PostHog/posthog.git
synced 2024-11-28 18:26:15 +01:00
e3bf0cb31d
* Add scheduled task to wipe session recordings * Create a new table for session recording * Save snapshot events to different table * Use SessionRecordingEvent over Events everywhere We can remove a ton of cruft this way as well * Add missing signature * Extract util from models/event * Attempt to update ingest side of clickhouse session recording events Note that it's using main kafka topic - not sure if a good idea. * Get separate table in ch working for session recording events * WIP: query sessions * Make both session recording queries work * Make linter happy * Rebase migration * Make tests work * Apply a TTL to session recordings and other configuration: - toYYYYMMDD partitioning should be smoother with TTL setup - TTL achieves not needing to archive the data ourselves - index_granularity will enable smaller reads per session_id - ORDER BY clause is to make single session as well as time range query reasonable * Convert retention cronjob to new model * Add tests to process_event changes * Add test for ee_capture change * Fixup migration * Make clickhouse tests drop/create session recording tables * Make TTL not be there in tests Otherwise writes get eaten by it during tests when mocking time * Fix retention task Co-authored-by: Tim Glaser <tim@glsr.nl>
59 lines
2.0 KiB
Python
59 lines
2.0 KiB
Python
SESSIONS_NO_EVENTS_SQL = """
|
|
SELECT
|
|
distinct_id,
|
|
uuid,
|
|
session_uuid,
|
|
session_duration_seconds,
|
|
timestamp,
|
|
session_end_ts
|
|
FROM
|
|
(
|
|
SELECT
|
|
distinct_id,
|
|
uuid,
|
|
if(is_new_session, uuid, NULL) AS session_uuid,
|
|
is_new_session,
|
|
is_end_session,
|
|
if(is_end_session AND is_new_session, 0, if(is_new_session AND (NOT is_end_session), dateDiff('second', toDateTime(timestamp), toDateTime(neighbor(timestamp, 1))), NULL)) AS session_duration_seconds,
|
|
timestamp,
|
|
if(is_end_session AND is_new_session, timestamp, if(is_new_session AND (NOT is_end_session), neighbor(timestamp, 1), NULL)) AS session_end_ts
|
|
FROM
|
|
(
|
|
SELECT
|
|
distinct_id,
|
|
uuid,
|
|
timestamp,
|
|
neighbor(distinct_id, -1) AS start_possible_neighbor,
|
|
neighbor(timestamp, -1) AS start_possible_prev_ts,
|
|
if((start_possible_neighbor != distinct_id) OR (dateDiff('minute', toDateTime(start_possible_prev_ts), toDateTime(timestamp)) > 30), 1, 0) AS is_new_session,
|
|
neighbor(distinct_id, 1) AS end_possible_neighbor,
|
|
neighbor(timestamp, 1) AS end_possible_prev_ts,
|
|
if((end_possible_neighbor != distinct_id) OR (dateDiff('minute', toDateTime(timestamp), toDateTime(end_possible_prev_ts)) > 30), 1, 0) AS is_end_session
|
|
FROM
|
|
(
|
|
SELECT
|
|
uuid,
|
|
timestamp,
|
|
distinct_id
|
|
FROM events
|
|
WHERE
|
|
team_id = %(team_id)s
|
|
AND event != '$feature_flag_called'
|
|
{date_from}
|
|
{date_to}
|
|
{filters}
|
|
GROUP BY
|
|
uuid,
|
|
timestamp,
|
|
distinct_id
|
|
ORDER BY
|
|
distinct_id ASC,
|
|
timestamp ASC
|
|
)
|
|
)
|
|
WHERE (is_new_session AND (NOT is_end_session)) OR (is_end_session AND (NOT is_new_session)) OR (is_end_session AND is_new_session)
|
|
)
|
|
WHERE is_new_session
|
|
{sessions_limit}
|
|
"""
|