0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-24 18:07:17 +01:00

feat: refactor listing to join persons twice (#16760)

This commit is contained in:
Paul D'Ambra 2023-07-26 12:12:37 +01:00 committed by GitHub
parent f303cddb64
commit 0465857218
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 517 additions and 375 deletions

View File

@ -18,6 +18,7 @@
<env name="SESSION_RECORDING_KAFKA_HOSTS" value="localhost" />
<env name="SESSION_RECORDING_KAFKA_MAX_REQUEST_SIZE_BYTES" value="524288" />
<env name="SKIP_SERVICE_VERSION_REQUIREMENTS" value="1" />
<env name="PYDEVD_USE_CYTHON" value="NO" />
</envs>
<option name="SDK_HOME" value="$PROJECT_DIR$/env/bin/python" />
<option name="SDK_NAME" value="Python 3.10 (posthog)" />
@ -46,4 +47,4 @@
<option name="customRunCommand" value="" />
<method v="2" />
</configuration>
</component>
</component>

View File

@ -16,9 +16,49 @@ class SummaryEventFiltersSQL:
params: Dict[str, Any]
class SessionRecordingListFromReplaySummary(SessionRecordingList):
SESSION_RECORDINGS_DEFAULT_LIMIT = 50
class PersonsQuery(SessionRecordingList):
_raw_persons_query = """
SELECT distinct_id, argMax(person_id, version) as person_id
{select_person_props}
FROM person_distinct_id2 as pdi
{filter_persons_clause}
WHERE team_id = %(team_id)s
{prop_filter_clause}
GROUP BY distinct_id
HAVING
argMax(is_deleted, version) = 0
{filter_by_person_uuid_condition}
"""
def get_query(self) -> Tuple[str, Dict[str, Any]]:
prop_query, prop_params = self._get_prop_groups(
self._filter.property_groups, person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id"
)
person_query, person_query_params = self._get_person_query()
should_join_persons = self._filter.person_uuid or person_query
if not should_join_persons:
return "", {}
else:
filter_persons_clause = person_query or ""
filter_by_person_uuid_condition = "and person_id = %(person_uuid)s" if self._filter.person_uuid else ""
return self._raw_persons_query.format(
filter_persons_clause=filter_persons_clause,
select_person_props=", argMax(person_props, version) as person_props"
if "person_props" in filter_persons_clause
else "",
prop_filter_clause=prop_query,
filter_by_person_uuid_condition=filter_by_person_uuid_condition,
), {
"team_id": self._team_id,
**person_query_params,
"person_uuid": self._filter.person_uuid,
**prop_params,
}
class SessionIdEventsQuery(SessionRecordingList):
def __init__(
self,
**kwargs,
@ -28,92 +68,26 @@ class SessionRecordingListFromReplaySummary(SessionRecordingList):
)
self.ttl_days = (get_instance_setting("RECORDINGS_TTL_WEEKS") or 3) * 7
_raw_persons_or_events_join = """
join (
SELECT
{event_filter_having_events_select}
`$session_id`,
pdi.distinct_id,
argMax(pdi.person_id, version) as person_id
FROM events e
JOIN person_distinct_id2 as pdi on pdi.distinct_id = e.distinct_id
{filter_persons_clause}
WHERE
e.team_id = %(team_id)s
and notEmpty(`$session_id`)
{events_timestamp_clause}
{event_filter_where_conditions}
{prop_filter_clause}
GROUP BY `$session_id`, pdi.distinct_id
HAVING
argMax(is_deleted, version) = 0
{filter_by_person_uuid_condition}
{event_filter_having_events_condition}
) as e on s.session_id = e.`$session_id`
"""
_raw_persons_join = """
join (
SELECT distinct_id, argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi
{filter_persons_clause}
WHERE team_id = %(team_id)s
{prop_filter_clause}
GROUP BY distinct_id
HAVING
argMax(is_deleted, version) = 0
{filter_by_person_uuid_condition}
) as pdi on pdi.distinct_id = s.distinct_id
"""
_raw_events_join = """ join (
SELECT
{event_filter_having_events_select}
`$session_id`
FROM events e
WHERE
team_id = %(team_id)s
and notEmpty(`$session_id`)
{events_timestamp_clause}
{event_filter_where_conditions}
{prop_filter_clause}
GROUP BY `$session_id`
HAVING 1=1 {event_filter_having_events_condition}
) as e on s.session_id = e.`$session_id`"""
_session_recordings_query: str = """
SELECT
s.session_id,
any(s.team_id),
any(s.distinct_id),
min(s.min_first_timestamp) as start_time,
max(s.max_last_timestamp) as end_time,
dateDiff('SECOND', start_time, end_time) as duration,
argMinMerge(s.first_url) as first_url,
sum(s.click_count),
sum(s.keypress_count),
sum(s.mouse_activity_count),
sum(s.active_milliseconds)/1000 as active_seconds,
duration-active_seconds as inactive_seconds,
sum(s.console_log_count) as console_log_count,
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
{persons_or_events_join}
WHERE s.team_id = %(team_id)s
-- regardless of what other filters are applied
-- limit by storage TTL
AND s.min_first_timestamp >= %(clamped_to_storage_ttl)s
-- we can filter on the pre-aggregated timestamp columns
-- because any not-the-lowest min value is _more_ greater than the min value
-- and any not-the-highest max value is _less_ lower than the max value
AND s.min_first_timestamp >= %(start_time)s
AND s.max_last_timestamp <= %(end_time)s
{provided_session_ids_clause}
GROUP BY session_id
HAVING 1=1 {duration_clause} {console_log_clause}
ORDER BY start_time DESC
LIMIT %(limit)s OFFSET %(offset)s
_raw_events_query = """
SELECT
{event_filter_having_events_select}
`$session_id`
FROM events e
{persons_join}
PREWHERE
team_id = %(team_id)s
-- regardless of what other filters are applied
-- limit by storage TTL
AND e.timestamp >= %(clamped_to_storage_ttl)s
AND e.timestamp <= now()
WHERE
notEmpty(`$session_id`)
{events_timestamp_clause}
{event_filter_where_conditions}
{prop_filter_clause}
{provided_session_ids_clause}
GROUP BY `$session_id`
HAVING 1=1 {event_filter_having_events_condition}
"""
@cached_property
@ -148,8 +122,8 @@ join (
else:
having_conditions = "AND hasAll(event_names, %(event_names)s)"
having_select = """
-- select the unique events in this session to support filtering sessions by presence of an event
groupUniqArray(event) as event_names,"""
-- select the unique events in this session to support filtering sessions by presence of an event
groupUniqArray(event) as event_names,"""
return SummaryEventFiltersSQL(
having_conditions=having_conditions,
@ -158,6 +132,116 @@ join (
params=params,
)
@cached_property
def _get_filter_by_provided_session_ids_clause(self) -> Tuple[str, Dict[str, Any]]:
if self._filter.session_ids is None:
return "", {}
return "AND `$session_id` in %(session_ids)s", {"session_ids": self._filter.session_ids}
def get_query(self) -> Tuple[str, Dict[str, Any]]:
if not self._determine_should_join_events():
return "", {}
base_params = {
"team_id": self._team_id,
"clamped_to_storage_ttl": (datetime.datetime.now() - datetime.timedelta(days=self.ttl_days)),
}
_, recording_start_time_params = self._get_recording_start_time_clause
provided_session_ids_clause, provided_session_ids_params = self._get_filter_by_provided_session_ids_clause
event_filters = self.build_event_filters
event_filters_params = event_filters.params
events_timestamp_clause, events_timestamp_params = self._get_events_timestamp_clause
prop_query, prop_params = self._get_prop_groups(
self._filter.property_groups, person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id"
)
persons_select, persons_join_params = PersonsQuery(filter=self._filter, team=self._team).get_query()
if persons_select:
persons_select = f"JOIN ({persons_select}) as pdi on pdi.distinct_id = e.distinct_id"
return (
self._raw_events_query.format(
event_filter_where_conditions=event_filters.where_conditions,
event_filter_having_events_condition=event_filters.having_conditions,
event_filter_having_events_select=event_filters.having_select,
events_timestamp_clause=events_timestamp_clause,
prop_filter_clause=prop_query,
provided_session_ids_clause=provided_session_ids_clause,
persons_join=persons_select,
),
{
**base_params,
**recording_start_time_params,
**provided_session_ids_params,
**events_timestamp_params,
**event_filters_params,
**prop_params,
**persons_join_params,
},
)
@cached_property
def _get_person_id_clause(self) -> Tuple[str, Dict[str, Any]]:
person_id_clause = ""
person_id_params = {}
if self._filter.person_uuid:
person_id_clause = "AND person_id = %(person_uuid)s"
person_id_params = {"person_uuid": self._filter.person_uuid}
return person_id_clause, person_id_params
class SessionRecordingListFromReplaySummary(SessionRecordingList):
SESSION_RECORDINGS_DEFAULT_LIMIT = 50
def __init__(
self,
**kwargs,
):
super().__init__(
**kwargs,
)
self.ttl_days = (get_instance_setting("RECORDINGS_TTL_WEEKS") or 3) * 7
_session_recordings_query: str = """
SELECT
s.session_id,
any(s.team_id),
any(s.distinct_id),
min(s.min_first_timestamp) as start_time,
max(s.max_last_timestamp) as end_time,
dateDiff('SECOND', start_time, end_time) as duration,
argMinMerge(s.first_url) as first_url,
sum(s.click_count),
sum(s.keypress_count),
sum(s.mouse_activity_count),
sum(s.active_milliseconds)/1000 as active_seconds,
duration-active_seconds as inactive_seconds,
sum(s.console_log_count) as console_log_count,
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
{events_join}
{persons_join}
WHERE s.team_id = %(team_id)s
-- regardless of what other filters are applied
-- limit by storage TTL
AND s.min_first_timestamp >= %(clamped_to_storage_ttl)s
-- we can filter on the pre-aggregated timestamp columns
-- because any not-the-lowest min value is _more_ greater than the min value
-- and any not-the-highest max value is _less_ lower than the max value
AND s.min_first_timestamp >= %(start_time)s
AND s.max_last_timestamp <= %(end_time)s
{provided_session_ids_clause}
GROUP BY session_id
HAVING 1=1 {duration_clause} {console_log_clause}
ORDER BY start_time DESC
LIMIT %(limit)s OFFSET %(offset)s
"""
def _data_to_return(self, results: List[Any]) -> List[Dict[str, Any]]:
default_columns = [
"session_id",
@ -188,6 +272,50 @@ join (
def limit(self):
return self._filter.limit or self.SESSION_RECORDINGS_DEFAULT_LIMIT
def get_query(self) -> Tuple[str, Dict[str, Any]]:
offset = self._filter.offset or 0
base_params = {
"team_id": self._team_id,
"limit": self.limit + 1,
"offset": offset,
"clamped_to_storage_ttl": (datetime.datetime.now() - datetime.timedelta(days=self.ttl_days)),
}
_, recording_start_time_params = self._get_recording_start_time_clause
provided_session_ids_clause, provided_session_ids_params = self._get_filter_by_provided_session_ids_clause
duration_clause, duration_params = self.duration_clause(self._filter.duration_type_filter)
console_log_clause = self._get_console_log_clause(self._filter.console_logs_filter)
events_select, events_join_params = SessionIdEventsQuery(
team=self._team,
filter=self._filter,
).get_query()
if events_select:
events_select = f"JOIN ({events_select}) as e on s.session_id = e.`$session_id`"
persons_select, persons_join_params = PersonsQuery(filter=self._filter, team=self._team).get_query()
if persons_select:
persons_select = f"JOIN ({persons_select}) as pdi on pdi.distinct_id = s.distinct_id"
return (
self._session_recordings_query.format(
duration_clause=duration_clause,
events_join=events_select,
provided_session_ids_clause=provided_session_ids_clause,
console_log_clause=console_log_clause,
persons_join=persons_select,
),
{
**base_params,
**events_join_params,
**recording_start_time_params,
**duration_params,
**provided_session_ids_params,
**persons_join_params,
},
)
def duration_clause(
self, duration_filter_type: Literal["duration", "active_seconds", "inactive_seconds"]
) -> Tuple[str, Dict[str, Any]]:
@ -206,117 +334,7 @@ join (
}
return duration_clause, duration_params
def get_query(self) -> Tuple[str, Dict[str, Any]]:
offset = self._filter.offset or 0
base_params = {
"team_id": self._team_id,
"limit": self.limit + 1,
"offset": offset,
"clamped_to_storage_ttl": (datetime.datetime.now() - datetime.timedelta(days=self.ttl_days)),
}
_, recording_start_time_params = self._get_recording_start_time_clause
provided_session_ids_clause, provided_session_ids_params = self._get_filter_by_provided_session_ids_clause
person_id_clause, person_id_params = self._get_person_id_clause
duration_clause, duration_params = self.duration_clause(self._filter.duration_type_filter)
console_log_clause = self._get_console_log_clause(self._filter.console_logs_filter)
persons_or_events_join, persons_or_events_join_params = self._get_persons_or_events_join
return (
self._session_recordings_query.format(
person_id_clause=person_id_clause,
duration_clause=duration_clause,
persons_or_events_join=persons_or_events_join,
provided_session_ids_clause=provided_session_ids_clause,
console_log_clause=console_log_clause,
),
{
**base_params,
**persons_or_events_join_params,
**recording_start_time_params,
**person_id_params,
**duration_params,
**provided_session_ids_params,
},
)
@cached_property
def _get_persons_or_events_join(self) -> Tuple[str, Dict[str, Any]]:
person_id_params: Dict[str, Any] = {}
events_timestamp_clause = ""
events_timestamp_params: Dict[str, Any] = {}
event_filters_params = {}
event_filters_where_conditions = ""
event_filters_having_conditions = ""
event_filters_having_select = ""
should_join_events = self._determine_should_join_events()
if should_join_events:
event_filters = self.build_event_filters
event_filters_params = event_filters.params
event_filters_where_conditions = event_filters.where_conditions
event_filters_having_conditions = event_filters.having_conditions
event_filters_having_select = event_filters.having_select
events_timestamp_clause, events_timestamp_params = self._get_events_timestamp_clause
prop_query, prop_params = self._get_prop_groups(
self._filter.property_groups, person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id"
)
person_query, person_query_params = self._get_person_query()
filter_persons_clause = ""
filter_by_person_uuid_condition = ""
should_join_persons = self._filter.person_uuid or person_query
if should_join_persons:
person_id_params = {
**person_id_params,
**person_query_params,
"person_uuid": self._filter.person_uuid,
**prop_params,
}
filter_persons_clause = person_query or ""
filter_by_person_uuid_condition = "and person_id = %(person_uuid)s" if self._filter.person_uuid else ""
join_clause = ""
if should_join_persons and should_join_events:
join_clause = self._raw_persons_or_events_join
elif should_join_persons:
join_clause = self._raw_persons_join
elif should_join_events:
join_clause = self._raw_events_join
return (
join_clause.format(
event_filter_where_conditions=event_filters_where_conditions,
event_filter_having_events_condition=event_filters_having_conditions,
event_filter_having_events_select=event_filters_having_select,
events_timestamp_clause=events_timestamp_clause,
filter_persons_clause=filter_persons_clause,
filter_by_person_uuid_condition=filter_by_person_uuid_condition,
prop_filter_clause=prop_query,
),
{
**events_timestamp_params,
**event_filters_params,
**prop_params,
**person_id_params,
},
)
@cached_property
def _get_person_id_clause(self) -> Tuple[str, Dict[str, Any]]:
person_id_clause = ""
person_id_params = {}
if self._filter.person_uuid:
person_id_clause = "AND person_id = %(person_uuid)s"
person_id_params = {"person_uuid": self._filter.person_uuid}
return person_id_clause, person_id_params
def _get_console_log_clause(self, console_logs_filter: List[Literal["error", "warn", "log"]]) -> str:
@staticmethod
def _get_console_log_clause(console_logs_filter: List[Literal["error", "warn", "log"]]) -> str:
filters = [f"console_{log}_count > 0" for log in console_logs_filter]
return f"AND ({' OR '.join(filters)})" if filters else ""

View File

@ -17,12 +17,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2022-12-14 00:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2022-12-27 12:00:00'
AND timestamp <= '2023-01-04 12:00:00'
AND (((event = 'custom-event'
@ -62,12 +63,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2022-12-14 00:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2022-12-27 12:00:00'
AND timestamp <= '2023-01-04 12:00:00'
AND (((event = 'custom-event'
@ -106,12 +108,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2022-12-14 00:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2022-12-27 12:00:00'
AND timestamp <= '2023-01-04 12:00:00'
AND (((event = 'custom-event'
@ -151,12 +154,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2022-12-14 00:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2022-12-27 12:00:00'
AND timestamp <= '2023-01-04 12:00:00'
AND (((event = 'custom-event'
@ -196,30 +200,48 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`,
pdi.distinct_id,
argMax(pdi.person_id, version) as person_id
`$session_id`
FROM events e
JOIN person_distinct_id2 as pdi on pdi.distinct_id = e.distinct_id
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi
INNER JOIN
(SELECT id
FROM person
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0
and person_id = '00000000-0000-0000-0000-000000000000') as pdi on pdi.distinct_id = e.distinct_id PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-21 12:00:00'
AND timestamp <= '2021-01-05 11:59:59'
AND ((event = '$pageview')
OR ((event = 'custom-event')))
GROUP BY `$session_id`
HAVING 1=1
AND hasAll(event_names, ['$pageview', 'custom-event'])) as e on s.session_id = e.`$session_id`
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi
INNER JOIN
(SELECT id
FROM person
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE e.team_id = 2
and notEmpty(`$session_id`)
AND timestamp >= '2020-12-21 12:00:00'
AND timestamp <= '2021-01-05 11:59:59'
AND ((event = '$pageview')
OR ((event = 'custom-event')))
GROUP BY `$session_id`,
pdi.distinct_id
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0
and person_id = '00000000-0000-0000-0000-000000000000'
AND hasAll(event_names, ['$pageview', 'custom-event'])) as e on s.session_id = e.`$session_id`
and person_id = '00000000-0000-0000-0000-000000000000') as pdi on pdi.distinct_id = s.distinct_id
WHERE s.team_id = 2
AND s.min_first_timestamp >= '2020-12-11 13:46:23'
AND s.min_first_timestamp >= '2020-12-22 00:00:00'
@ -251,11 +273,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT `$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-31 20:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1)
@ -291,11 +314,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT `$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-31 20:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1
@ -332,11 +356,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT `$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-31 20:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1
@ -373,11 +398,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT `$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-31 20:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1)
@ -413,11 +439,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT `$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-31 20:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1
@ -454,11 +481,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT `$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-31 20:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1
@ -980,12 +1008,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview')
@ -1022,12 +1051,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$autocapture')
@ -1064,12 +1094,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview')
@ -1136,12 +1167,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview')
@ -1179,12 +1211,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview')
@ -1222,13 +1255,41 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`,
pdi.distinct_id,
argMax(pdi.person_id, version) as person_id
`$session_id`
FROM events e
JOIN person_distinct_id2 as pdi on pdi.distinct_id = e.distinct_id
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id ,
argMax(person_props, version) as person_props
FROM person_distinct_id2 as pdi
INNER JOIN
(SELECT id,
argMax(properties, version) as person_props
FROM person
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = e.distinct_id PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
AND (ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Chrome'), 0)
AND ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'email'), ''), 'null'), '^"|"$', ''), 'bla'), 0)))
GROUP BY `$session_id`
HAVING 1=1
AND hasAll(event_names, ['$pageview'])) as e on s.session_id = e.`$session_id`
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id ,
argMax(person_props, version) as person_props
FROM person_distinct_id2 as pdi
INNER JOIN
(SELECT id,
argMax(properties, version) as person_props
@ -1236,17 +1297,9 @@
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE e.team_id = 2
and notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
AND (ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Chrome'), 0)
AND ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'email'), ''), 'null'), '^"|"$', ''), 'bla'), 0)))
GROUP BY `$session_id`,
pdi.distinct_id
HAVING argMax(is_deleted, version) = 0
AND hasAll(event_names, ['$pageview'])) as e on s.session_id = e.`$session_id`
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = s.distinct_id
WHERE s.team_id = 2
AND s.min_first_timestamp >= '2020-12-11 13:46:23'
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
@ -1277,12 +1330,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
@ -1320,12 +1374,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview')
@ -1362,12 +1417,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$autocapture')
@ -1404,7 +1460,7 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi
@ -1453,12 +1509,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
@ -1496,12 +1553,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
@ -1539,12 +1597,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
@ -1582,12 +1641,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
@ -1625,12 +1685,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-31 20:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (event = '$pageview')
@ -1668,12 +1729,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-31 20:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (event = '$pageview')
@ -1710,12 +1772,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-31 20:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND ((event = '$pageview')
@ -2146,7 +2209,7 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi
@ -2215,21 +2278,32 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`,
pdi.distinct_id,
argMax(pdi.person_id, version) as person_id
`$session_id`
FROM events e
JOIN person_distinct_id2 as pdi on pdi.distinct_id = e.distinct_id
INNER JOIN
(SELECT id
FROM person
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi
INNER JOIN
(SELECT id
FROM person
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE e.team_id = 2
and notEmpty(`$session_id`)
AND (pdi.person_id IN
(SELECT DISTINCT person_id
FROM cohortpeople
WHERE team_id = 2
AND cohort_id = 2
AND version = 0 ))
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = e.distinct_id PREWHERE team_id = 2
AND e.timestamp >= '2021-07-31 20:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-08-13 12:00:00'
AND timestamp <= '2021-08-22 08:00:00'
AND (event = '$pageview')
@ -2239,10 +2313,28 @@
WHERE team_id = 2
AND cohort_id = 2
AND version = 0 ))
GROUP BY `$session_id`,
pdi.distinct_id
HAVING argMax(is_deleted, version) = 0
GROUP BY `$session_id`
HAVING 1=1
AND hasAll(event_names, ['$pageview'])) as e on s.session_id = e.`$session_id`
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi
INNER JOIN
(SELECT id
FROM person
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE team_id = 2
AND (pdi.person_id IN
(SELECT DISTINCT person_id
FROM cohortpeople
WHERE team_id = 2
AND cohort_id = 2
AND version = 0 ))
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = s.distinct_id
WHERE s.team_id = 2
AND s.min_first_timestamp >= '2021-07-31 20:00:00'
AND s.min_first_timestamp >= '2021-08-14 00:00:00'
@ -2273,21 +2365,32 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`,
pdi.distinct_id,
argMax(pdi.person_id, version) as person_id
`$session_id`
FROM events e
JOIN person_distinct_id2 as pdi on pdi.distinct_id = e.distinct_id
INNER JOIN
(SELECT id
FROM person
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi
INNER JOIN
(SELECT id
FROM person
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE e.team_id = 2
and notEmpty(`$session_id`)
AND (pdi.person_id IN
(SELECT DISTINCT person_id
FROM cohortpeople
WHERE team_id = 2
AND cohort_id = 2
AND version = 0 ))
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = e.distinct_id PREWHERE team_id = 2
AND e.timestamp >= '2021-07-31 20:00:00'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-08-13 12:00:00'
AND timestamp <= '2021-08-22 08:00:00'
AND (event = 'custom_event')
@ -2297,10 +2400,28 @@
WHERE team_id = 2
AND cohort_id = 2
AND version = 0 ))
GROUP BY `$session_id`,
pdi.distinct_id
HAVING argMax(is_deleted, version) = 0
GROUP BY `$session_id`
HAVING 1=1
AND hasAll(event_names, ['custom_event'])) as e on s.session_id = e.`$session_id`
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi
INNER JOIN
(SELECT id
FROM person
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE team_id = 2
AND (pdi.person_id IN
(SELECT DISTINCT person_id
FROM cohortpeople
WHERE team_id = 2
AND cohort_id = 2
AND version = 0 ))
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = s.distinct_id
WHERE s.team_id = 2
AND s.min_first_timestamp >= '2021-07-31 20:00:00'
AND s.min_first_timestamp >= '2021-08-14 00:00:00'
@ -2331,12 +2452,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND ((event = '$pageview')
@ -2374,12 +2496,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
FROM events e
WHERE team_id = 2
and notEmpty(`$session_id`)
FROM events e PREWHERE team_id = 2
AND e.timestamp >= '2020-12-11 13:46:23'
AND e.timestamp <= now()
WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND ((event = '$pageview')
@ -2417,7 +2540,7 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
join
JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi