diff --git a/.run/PostHog.run.xml b/.run/PostHog.run.xml
index fe267209927..7dedccabd3b 100644
--- a/.run/PostHog.run.xml
+++ b/.run/PostHog.run.xml
@@ -18,6 +18,7 @@
+
@@ -46,4 +47,4 @@
-
+
\ No newline at end of file
diff --git a/posthog/queries/session_recordings/session_recording_list_from_replay_summary.py b/posthog/queries/session_recordings/session_recording_list_from_replay_summary.py
index cfb2b501a98..d91839a50ee 100644
--- a/posthog/queries/session_recordings/session_recording_list_from_replay_summary.py
+++ b/posthog/queries/session_recordings/session_recording_list_from_replay_summary.py
@@ -16,9 +16,49 @@ class SummaryEventFiltersSQL:
params: Dict[str, Any]
-class SessionRecordingListFromReplaySummary(SessionRecordingList):
- SESSION_RECORDINGS_DEFAULT_LIMIT = 50
+class PersonsQuery(SessionRecordingList):
+ _raw_persons_query = """
+ SELECT distinct_id, argMax(person_id, version) as person_id
+ {select_person_props}
+ FROM person_distinct_id2 as pdi
+ {filter_persons_clause}
+ WHERE team_id = %(team_id)s
+ {prop_filter_clause}
+ GROUP BY distinct_id
+ HAVING
+ argMax(is_deleted, version) = 0
+ {filter_by_person_uuid_condition}
+ """
+ def get_query(self) -> Tuple[str, Dict[str, Any]]:
+ prop_query, prop_params = self._get_prop_groups(
+ self._filter.property_groups, person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id"
+ )
+
+ person_query, person_query_params = self._get_person_query()
+ should_join_persons = self._filter.person_uuid or person_query
+
+ if not should_join_persons:
+ return "", {}
+ else:
+ filter_persons_clause = person_query or ""
+ filter_by_person_uuid_condition = "and person_id = %(person_uuid)s" if self._filter.person_uuid else ""
+ return self._raw_persons_query.format(
+ filter_persons_clause=filter_persons_clause,
+ select_person_props=", argMax(person_props, version) as person_props"
+ if "person_props" in filter_persons_clause
+ else "",
+ prop_filter_clause=prop_query,
+ filter_by_person_uuid_condition=filter_by_person_uuid_condition,
+ ), {
+ "team_id": self._team_id,
+ **person_query_params,
+ "person_uuid": self._filter.person_uuid,
+ **prop_params,
+ }
+
+
+class SessionIdEventsQuery(SessionRecordingList):
def __init__(
self,
**kwargs,
@@ -28,92 +68,26 @@ class SessionRecordingListFromReplaySummary(SessionRecordingList):
)
self.ttl_days = (get_instance_setting("RECORDINGS_TTL_WEEKS") or 3) * 7
- _raw_persons_or_events_join = """
-join (
- SELECT
- {event_filter_having_events_select}
- `$session_id`,
- pdi.distinct_id,
- argMax(pdi.person_id, version) as person_id
- FROM events e
- JOIN person_distinct_id2 as pdi on pdi.distinct_id = e.distinct_id
- {filter_persons_clause}
- WHERE
- e.team_id = %(team_id)s
- and notEmpty(`$session_id`)
- {events_timestamp_clause}
- {event_filter_where_conditions}
- {prop_filter_clause}
- GROUP BY `$session_id`, pdi.distinct_id
- HAVING
- argMax(is_deleted, version) = 0
- {filter_by_person_uuid_condition}
- {event_filter_having_events_condition}
-) as e on s.session_id = e.`$session_id`
- """
-
- _raw_persons_join = """
- join (
- SELECT distinct_id, argMax(person_id, version) as person_id
- FROM person_distinct_id2 as pdi
- {filter_persons_clause}
- WHERE team_id = %(team_id)s
- {prop_filter_clause}
- GROUP BY distinct_id
- HAVING
- argMax(is_deleted, version) = 0
- {filter_by_person_uuid_condition}
- ) as pdi on pdi.distinct_id = s.distinct_id
- """
-
- _raw_events_join = """ join (
- SELECT
- {event_filter_having_events_select}
- `$session_id`
- FROM events e
- WHERE
- team_id = %(team_id)s
- and notEmpty(`$session_id`)
- {events_timestamp_clause}
- {event_filter_where_conditions}
- {prop_filter_clause}
- GROUP BY `$session_id`
- HAVING 1=1 {event_filter_having_events_condition}
- ) as e on s.session_id = e.`$session_id`"""
-
- _session_recordings_query: str = """
- SELECT
- s.session_id,
- any(s.team_id),
- any(s.distinct_id),
- min(s.min_first_timestamp) as start_time,
- max(s.max_last_timestamp) as end_time,
- dateDiff('SECOND', start_time, end_time) as duration,
- argMinMerge(s.first_url) as first_url,
- sum(s.click_count),
- sum(s.keypress_count),
- sum(s.mouse_activity_count),
- sum(s.active_milliseconds)/1000 as active_seconds,
- duration-active_seconds as inactive_seconds,
- sum(s.console_log_count) as console_log_count,
- sum(s.console_warn_count) as console_warn_count,
- sum(s.console_error_count) as console_error_count
- FROM session_replay_events s
- {persons_or_events_join}
- WHERE s.team_id = %(team_id)s
- -- regardless of what other filters are applied
- -- limit by storage TTL
- AND s.min_first_timestamp >= %(clamped_to_storage_ttl)s
- -- we can filter on the pre-aggregated timestamp columns
- -- because any not-the-lowest min value is _more_ greater than the min value
- -- and any not-the-highest max value is _less_ lower than the max value
- AND s.min_first_timestamp >= %(start_time)s
- AND s.max_last_timestamp <= %(end_time)s
- {provided_session_ids_clause}
- GROUP BY session_id
- HAVING 1=1 {duration_clause} {console_log_clause}
- ORDER BY start_time DESC
- LIMIT %(limit)s OFFSET %(offset)s
+ _raw_events_query = """
+ SELECT
+ {event_filter_having_events_select}
+ `$session_id`
+ FROM events e
+ {persons_join}
+ PREWHERE
+ team_id = %(team_id)s
+ -- regardless of what other filters are applied
+ -- limit by storage TTL
+ AND e.timestamp >= %(clamped_to_storage_ttl)s
+ AND e.timestamp <= now()
+ WHERE
+ notEmpty(`$session_id`)
+ {events_timestamp_clause}
+ {event_filter_where_conditions}
+ {prop_filter_clause}
+ {provided_session_ids_clause}
+ GROUP BY `$session_id`
+ HAVING 1=1 {event_filter_having_events_condition}
"""
@cached_property
@@ -148,8 +122,8 @@ join (
else:
having_conditions = "AND hasAll(event_names, %(event_names)s)"
having_select = """
- -- select the unique events in this session to support filtering sessions by presence of an event
- groupUniqArray(event) as event_names,"""
+ -- select the unique events in this session to support filtering sessions by presence of an event
+ groupUniqArray(event) as event_names,"""
return SummaryEventFiltersSQL(
having_conditions=having_conditions,
@@ -158,6 +132,116 @@ join (
params=params,
)
+ @cached_property
+ def _get_filter_by_provided_session_ids_clause(self) -> Tuple[str, Dict[str, Any]]:
+ if self._filter.session_ids is None:
+ return "", {}
+
+ return "AND `$session_id` in %(session_ids)s", {"session_ids": self._filter.session_ids}
+
+ def get_query(self) -> Tuple[str, Dict[str, Any]]:
+ if not self._determine_should_join_events():
+ return "", {}
+
+ base_params = {
+ "team_id": self._team_id,
+ "clamped_to_storage_ttl": (datetime.datetime.now() - datetime.timedelta(days=self.ttl_days)),
+ }
+
+ _, recording_start_time_params = self._get_recording_start_time_clause
+ provided_session_ids_clause, provided_session_ids_params = self._get_filter_by_provided_session_ids_clause
+
+ event_filters = self.build_event_filters
+ event_filters_params = event_filters.params
+ events_timestamp_clause, events_timestamp_params = self._get_events_timestamp_clause
+
+ prop_query, prop_params = self._get_prop_groups(
+ self._filter.property_groups, person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id"
+ )
+
+ persons_select, persons_join_params = PersonsQuery(filter=self._filter, team=self._team).get_query()
+ if persons_select:
+ persons_select = f"JOIN ({persons_select}) as pdi on pdi.distinct_id = e.distinct_id"
+
+ return (
+ self._raw_events_query.format(
+ event_filter_where_conditions=event_filters.where_conditions,
+ event_filter_having_events_condition=event_filters.having_conditions,
+ event_filter_having_events_select=event_filters.having_select,
+ events_timestamp_clause=events_timestamp_clause,
+ prop_filter_clause=prop_query,
+ provided_session_ids_clause=provided_session_ids_clause,
+ persons_join=persons_select,
+ ),
+ {
+ **base_params,
+ **recording_start_time_params,
+ **provided_session_ids_params,
+ **events_timestamp_params,
+ **event_filters_params,
+ **prop_params,
+ **persons_join_params,
+ },
+ )
+
+ @cached_property
+ def _get_person_id_clause(self) -> Tuple[str, Dict[str, Any]]:
+ person_id_clause = ""
+ person_id_params = {}
+ if self._filter.person_uuid:
+ person_id_clause = "AND person_id = %(person_uuid)s"
+ person_id_params = {"person_uuid": self._filter.person_uuid}
+ return person_id_clause, person_id_params
+
+
+class SessionRecordingListFromReplaySummary(SessionRecordingList):
+ SESSION_RECORDINGS_DEFAULT_LIMIT = 50
+
+ def __init__(
+ self,
+ **kwargs,
+ ):
+ super().__init__(
+ **kwargs,
+ )
+ self.ttl_days = (get_instance_setting("RECORDINGS_TTL_WEEKS") or 3) * 7
+
+ _session_recordings_query: str = """
+ SELECT
+ s.session_id,
+ any(s.team_id),
+ any(s.distinct_id),
+ min(s.min_first_timestamp) as start_time,
+ max(s.max_last_timestamp) as end_time,
+ dateDiff('SECOND', start_time, end_time) as duration,
+ argMinMerge(s.first_url) as first_url,
+ sum(s.click_count),
+ sum(s.keypress_count),
+ sum(s.mouse_activity_count),
+ sum(s.active_milliseconds)/1000 as active_seconds,
+ duration-active_seconds as inactive_seconds,
+ sum(s.console_log_count) as console_log_count,
+ sum(s.console_warn_count) as console_warn_count,
+ sum(s.console_error_count) as console_error_count
+ FROM session_replay_events s
+ {events_join}
+ {persons_join}
+ WHERE s.team_id = %(team_id)s
+ -- regardless of what other filters are applied
+ -- limit by storage TTL
+ AND s.min_first_timestamp >= %(clamped_to_storage_ttl)s
+ -- we can filter on the pre-aggregated timestamp columns
+ -- because any not-the-lowest min value is _more_ greater than the min value
+ -- and any not-the-highest max value is _less_ lower than the max value
+ AND s.min_first_timestamp >= %(start_time)s
+ AND s.max_last_timestamp <= %(end_time)s
+ {provided_session_ids_clause}
+ GROUP BY session_id
+ HAVING 1=1 {duration_clause} {console_log_clause}
+ ORDER BY start_time DESC
+ LIMIT %(limit)s OFFSET %(offset)s
+ """
+
def _data_to_return(self, results: List[Any]) -> List[Dict[str, Any]]:
default_columns = [
"session_id",
@@ -188,6 +272,50 @@ join (
def limit(self):
return self._filter.limit or self.SESSION_RECORDINGS_DEFAULT_LIMIT
+ def get_query(self) -> Tuple[str, Dict[str, Any]]:
+ offset = self._filter.offset or 0
+
+ base_params = {
+ "team_id": self._team_id,
+ "limit": self.limit + 1,
+ "offset": offset,
+ "clamped_to_storage_ttl": (datetime.datetime.now() - datetime.timedelta(days=self.ttl_days)),
+ }
+
+ _, recording_start_time_params = self._get_recording_start_time_clause
+ provided_session_ids_clause, provided_session_ids_params = self._get_filter_by_provided_session_ids_clause
+ duration_clause, duration_params = self.duration_clause(self._filter.duration_type_filter)
+ console_log_clause = self._get_console_log_clause(self._filter.console_logs_filter)
+
+ events_select, events_join_params = SessionIdEventsQuery(
+ team=self._team,
+ filter=self._filter,
+ ).get_query()
+ if events_select:
+ events_select = f"JOIN ({events_select}) as e on s.session_id = e.`$session_id`"
+
+ persons_select, persons_join_params = PersonsQuery(filter=self._filter, team=self._team).get_query()
+ if persons_select:
+ persons_select = f"JOIN ({persons_select}) as pdi on pdi.distinct_id = s.distinct_id"
+
+ return (
+ self._session_recordings_query.format(
+ duration_clause=duration_clause,
+ events_join=events_select,
+ provided_session_ids_clause=provided_session_ids_clause,
+ console_log_clause=console_log_clause,
+ persons_join=persons_select,
+ ),
+ {
+ **base_params,
+ **events_join_params,
+ **recording_start_time_params,
+ **duration_params,
+ **provided_session_ids_params,
+ **persons_join_params,
+ },
+ )
+
def duration_clause(
self, duration_filter_type: Literal["duration", "active_seconds", "inactive_seconds"]
) -> Tuple[str, Dict[str, Any]]:
@@ -206,117 +334,7 @@ join (
}
return duration_clause, duration_params
- def get_query(self) -> Tuple[str, Dict[str, Any]]:
- offset = self._filter.offset or 0
-
- base_params = {
- "team_id": self._team_id,
- "limit": self.limit + 1,
- "offset": offset,
- "clamped_to_storage_ttl": (datetime.datetime.now() - datetime.timedelta(days=self.ttl_days)),
- }
-
- _, recording_start_time_params = self._get_recording_start_time_clause
- provided_session_ids_clause, provided_session_ids_params = self._get_filter_by_provided_session_ids_clause
- person_id_clause, person_id_params = self._get_person_id_clause
- duration_clause, duration_params = self.duration_clause(self._filter.duration_type_filter)
-
- console_log_clause = self._get_console_log_clause(self._filter.console_logs_filter)
-
- persons_or_events_join, persons_or_events_join_params = self._get_persons_or_events_join
-
- return (
- self._session_recordings_query.format(
- person_id_clause=person_id_clause,
- duration_clause=duration_clause,
- persons_or_events_join=persons_or_events_join,
- provided_session_ids_clause=provided_session_ids_clause,
- console_log_clause=console_log_clause,
- ),
- {
- **base_params,
- **persons_or_events_join_params,
- **recording_start_time_params,
- **person_id_params,
- **duration_params,
- **provided_session_ids_params,
- },
- )
-
- @cached_property
- def _get_persons_or_events_join(self) -> Tuple[str, Dict[str, Any]]:
- person_id_params: Dict[str, Any] = {}
- events_timestamp_clause = ""
- events_timestamp_params: Dict[str, Any] = {}
- event_filters_params = {}
- event_filters_where_conditions = ""
- event_filters_having_conditions = ""
- event_filters_having_select = ""
-
- should_join_events = self._determine_should_join_events()
- if should_join_events:
- event_filters = self.build_event_filters
- event_filters_params = event_filters.params
- event_filters_where_conditions = event_filters.where_conditions
- event_filters_having_conditions = event_filters.having_conditions
- event_filters_having_select = event_filters.having_select
-
- events_timestamp_clause, events_timestamp_params = self._get_events_timestamp_clause
-
- prop_query, prop_params = self._get_prop_groups(
- self._filter.property_groups, person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id"
- )
-
- person_query, person_query_params = self._get_person_query()
-
- filter_persons_clause = ""
- filter_by_person_uuid_condition = ""
- should_join_persons = self._filter.person_uuid or person_query
- if should_join_persons:
- person_id_params = {
- **person_id_params,
- **person_query_params,
- "person_uuid": self._filter.person_uuid,
- **prop_params,
- }
- filter_persons_clause = person_query or ""
- filter_by_person_uuid_condition = "and person_id = %(person_uuid)s" if self._filter.person_uuid else ""
-
- join_clause = ""
- if should_join_persons and should_join_events:
- join_clause = self._raw_persons_or_events_join
- elif should_join_persons:
- join_clause = self._raw_persons_join
- elif should_join_events:
- join_clause = self._raw_events_join
-
- return (
- join_clause.format(
- event_filter_where_conditions=event_filters_where_conditions,
- event_filter_having_events_condition=event_filters_having_conditions,
- event_filter_having_events_select=event_filters_having_select,
- events_timestamp_clause=events_timestamp_clause,
- filter_persons_clause=filter_persons_clause,
- filter_by_person_uuid_condition=filter_by_person_uuid_condition,
- prop_filter_clause=prop_query,
- ),
- {
- **events_timestamp_params,
- **event_filters_params,
- **prop_params,
- **person_id_params,
- },
- )
-
- @cached_property
- def _get_person_id_clause(self) -> Tuple[str, Dict[str, Any]]:
- person_id_clause = ""
- person_id_params = {}
- if self._filter.person_uuid:
- person_id_clause = "AND person_id = %(person_uuid)s"
- person_id_params = {"person_uuid": self._filter.person_uuid}
- return person_id_clause, person_id_params
-
- def _get_console_log_clause(self, console_logs_filter: List[Literal["error", "warn", "log"]]) -> str:
+ @staticmethod
+ def _get_console_log_clause(console_logs_filter: List[Literal["error", "warn", "log"]]) -> str:
filters = [f"console_{log}_count > 0" for log in console_logs_filter]
return f"AND ({' OR '.join(filters)})" if filters else ""
diff --git a/posthog/queries/session_recordings/test/__snapshots__/test_session_recording_list_from_session_replay.ambr b/posthog/queries/session_recordings/test/__snapshots__/test_session_recording_list_from_session_replay.ambr
index a9a067a103b..970c5d3cd65 100644
--- a/posthog/queries/session_recordings/test/__snapshots__/test_session_recording_list_from_session_replay.ambr
+++ b/posthog/queries/session_recordings/test/__snapshots__/test_session_recording_list_from_session_replay.ambr
@@ -17,12 +17,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2022-12-14 00:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2022-12-27 12:00:00'
AND timestamp <= '2023-01-04 12:00:00'
AND (((event = 'custom-event'
@@ -62,12 +63,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2022-12-14 00:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2022-12-27 12:00:00'
AND timestamp <= '2023-01-04 12:00:00'
AND (((event = 'custom-event'
@@ -106,12 +108,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2022-12-14 00:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2022-12-27 12:00:00'
AND timestamp <= '2023-01-04 12:00:00'
AND (((event = 'custom-event'
@@ -151,12 +154,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2022-12-14 00:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2022-12-27 12:00:00'
AND timestamp <= '2023-01-04 12:00:00'
AND (((event = 'custom-event'
@@ -196,30 +200,48 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
- `$session_id`,
- pdi.distinct_id,
- argMax(pdi.person_id, version) as person_id
+ `$session_id`
FROM events e
- JOIN person_distinct_id2 as pdi on pdi.distinct_id = e.distinct_id
+ JOIN
+ (SELECT distinct_id,
+ argMax(person_id, version) as person_id
+ FROM person_distinct_id2 as pdi
+ INNER JOIN
+ (SELECT id
+ FROM person
+ WHERE team_id = 2
+ GROUP BY id
+ HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
+ WHERE team_id = 2
+ GROUP BY distinct_id
+ HAVING argMax(is_deleted, version) = 0
+ and person_id = '00000000-0000-0000-0000-000000000000') as pdi on pdi.distinct_id = e.distinct_id PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
+ AND timestamp >= '2020-12-21 12:00:00'
+ AND timestamp <= '2021-01-05 11:59:59'
+ AND ((event = '$pageview')
+ OR ((event = 'custom-event')))
+ GROUP BY `$session_id`
+ HAVING 1=1
+ AND hasAll(event_names, ['$pageview', 'custom-event'])) as e on s.session_id = e.`$session_id`
+ JOIN
+ (SELECT distinct_id,
+ argMax(person_id, version) as person_id
+ FROM person_distinct_id2 as pdi
INNER JOIN
(SELECT id
FROM person
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
- WHERE e.team_id = 2
- and notEmpty(`$session_id`)
- AND timestamp >= '2020-12-21 12:00:00'
- AND timestamp <= '2021-01-05 11:59:59'
- AND ((event = '$pageview')
- OR ((event = 'custom-event')))
- GROUP BY `$session_id`,
- pdi.distinct_id
+ WHERE team_id = 2
+ GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0
- and person_id = '00000000-0000-0000-0000-000000000000'
- AND hasAll(event_names, ['$pageview', 'custom-event'])) as e on s.session_id = e.`$session_id`
+ and person_id = '00000000-0000-0000-0000-000000000000') as pdi on pdi.distinct_id = s.distinct_id
WHERE s.team_id = 2
AND s.min_first_timestamp >= '2020-12-11 13:46:23'
AND s.min_first_timestamp >= '2020-12-22 00:00:00'
@@ -251,11 +273,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT `$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-31 20:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1)
@@ -291,11 +314,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT `$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-31 20:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1
@@ -332,11 +356,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT `$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-31 20:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1
@@ -373,11 +398,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT `$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-31 20:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1)
@@ -413,11 +439,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT `$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-31 20:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1
@@ -454,11 +481,12 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT `$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-31 20:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (1 = 1
@@ -980,12 +1008,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview')
@@ -1022,12 +1051,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$autocapture')
@@ -1064,12 +1094,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview')
@@ -1136,12 +1167,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview')
@@ -1179,12 +1211,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview')
@@ -1222,13 +1255,41 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
- `$session_id`,
- pdi.distinct_id,
- argMax(pdi.person_id, version) as person_id
+ `$session_id`
FROM events e
- JOIN person_distinct_id2 as pdi on pdi.distinct_id = e.distinct_id
+ JOIN
+ (SELECT distinct_id,
+ argMax(person_id, version) as person_id ,
+ argMax(person_props, version) as person_props
+ FROM person_distinct_id2 as pdi
+ INNER JOIN
+ (SELECT id,
+ argMax(properties, version) as person_props
+ FROM person
+ WHERE team_id = 2
+ GROUP BY id
+ HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
+ WHERE team_id = 2
+ GROUP BY distinct_id
+ HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = e.distinct_id PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
+ AND timestamp >= '2020-12-24 12:00:00'
+ AND timestamp <= '2021-01-02 01:46:23'
+ AND (event = '$pageview'
+ AND (ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Chrome'), 0)
+ AND ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'email'), ''), 'null'), '^"|"$', ''), 'bla'), 0)))
+ GROUP BY `$session_id`
+ HAVING 1=1
+ AND hasAll(event_names, ['$pageview'])) as e on s.session_id = e.`$session_id`
+ JOIN
+ (SELECT distinct_id,
+ argMax(person_id, version) as person_id ,
+ argMax(person_props, version) as person_props
+ FROM person_distinct_id2 as pdi
INNER JOIN
(SELECT id,
argMax(properties, version) as person_props
@@ -1236,17 +1297,9 @@
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
- WHERE e.team_id = 2
- and notEmpty(`$session_id`)
- AND timestamp >= '2020-12-24 12:00:00'
- AND timestamp <= '2021-01-02 01:46:23'
- AND (event = '$pageview'
- AND (ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Chrome'), 0)
- AND ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'email'), ''), 'null'), '^"|"$', ''), 'bla'), 0)))
- GROUP BY `$session_id`,
- pdi.distinct_id
- HAVING argMax(is_deleted, version) = 0
- AND hasAll(event_names, ['$pageview'])) as e on s.session_id = e.`$session_id`
+ WHERE team_id = 2
+ GROUP BY distinct_id
+ HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = s.distinct_id
WHERE s.team_id = 2
AND s.min_first_timestamp >= '2020-12-11 13:46:23'
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
@@ -1277,12 +1330,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
@@ -1320,12 +1374,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview')
@@ -1362,12 +1417,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$autocapture')
@@ -1404,7 +1460,7 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi
@@ -1453,12 +1509,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
@@ -1496,12 +1553,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
@@ -1539,12 +1597,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
@@ -1582,12 +1641,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND (event = '$pageview'
@@ -1625,12 +1685,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-31 20:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (event = '$pageview')
@@ -1668,12 +1729,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-31 20:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND (event = '$pageview')
@@ -1710,12 +1772,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-31 20:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
AND ((event = '$pageview')
@@ -2146,7 +2209,7 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi
@@ -2215,21 +2278,32 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
- `$session_id`,
- pdi.distinct_id,
- argMax(pdi.person_id, version) as person_id
+ `$session_id`
FROM events e
- JOIN person_distinct_id2 as pdi on pdi.distinct_id = e.distinct_id
- INNER JOIN
- (SELECT id
- FROM person
+ JOIN
+ (SELECT distinct_id,
+ argMax(person_id, version) as person_id
+ FROM person_distinct_id2 as pdi
+ INNER JOIN
+ (SELECT id
+ FROM person
+ WHERE team_id = 2
+ GROUP BY id
+ HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE team_id = 2
- GROUP BY id
- HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
- WHERE e.team_id = 2
- and notEmpty(`$session_id`)
+ AND (pdi.person_id IN
+ (SELECT DISTINCT person_id
+ FROM cohortpeople
+ WHERE team_id = 2
+ AND cohort_id = 2
+ AND version = 0 ))
+ GROUP BY distinct_id
+ HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = e.distinct_id PREWHERE team_id = 2
+ AND e.timestamp >= '2021-07-31 20:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-08-13 12:00:00'
AND timestamp <= '2021-08-22 08:00:00'
AND (event = '$pageview')
@@ -2239,10 +2313,28 @@
WHERE team_id = 2
AND cohort_id = 2
AND version = 0 ))
- GROUP BY `$session_id`,
- pdi.distinct_id
- HAVING argMax(is_deleted, version) = 0
+ GROUP BY `$session_id`
+ HAVING 1=1
AND hasAll(event_names, ['$pageview'])) as e on s.session_id = e.`$session_id`
+ JOIN
+ (SELECT distinct_id,
+ argMax(person_id, version) as person_id
+ FROM person_distinct_id2 as pdi
+ INNER JOIN
+ (SELECT id
+ FROM person
+ WHERE team_id = 2
+ GROUP BY id
+ HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
+ WHERE team_id = 2
+ AND (pdi.person_id IN
+ (SELECT DISTINCT person_id
+ FROM cohortpeople
+ WHERE team_id = 2
+ AND cohort_id = 2
+ AND version = 0 ))
+ GROUP BY distinct_id
+ HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = s.distinct_id
WHERE s.team_id = 2
AND s.min_first_timestamp >= '2021-07-31 20:00:00'
AND s.min_first_timestamp >= '2021-08-14 00:00:00'
@@ -2273,21 +2365,32 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
- `$session_id`,
- pdi.distinct_id,
- argMax(pdi.person_id, version) as person_id
+ `$session_id`
FROM events e
- JOIN person_distinct_id2 as pdi on pdi.distinct_id = e.distinct_id
- INNER JOIN
- (SELECT id
- FROM person
+ JOIN
+ (SELECT distinct_id,
+ argMax(person_id, version) as person_id
+ FROM person_distinct_id2 as pdi
+ INNER JOIN
+ (SELECT id
+ FROM person
+ WHERE team_id = 2
+ GROUP BY id
+ HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
WHERE team_id = 2
- GROUP BY id
- HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
- WHERE e.team_id = 2
- and notEmpty(`$session_id`)
+ AND (pdi.person_id IN
+ (SELECT DISTINCT person_id
+ FROM cohortpeople
+ WHERE team_id = 2
+ AND cohort_id = 2
+ AND version = 0 ))
+ GROUP BY distinct_id
+ HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = e.distinct_id PREWHERE team_id = 2
+ AND e.timestamp >= '2021-07-31 20:00:00'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2021-08-13 12:00:00'
AND timestamp <= '2021-08-22 08:00:00'
AND (event = 'custom_event')
@@ -2297,10 +2400,28 @@
WHERE team_id = 2
AND cohort_id = 2
AND version = 0 ))
- GROUP BY `$session_id`,
- pdi.distinct_id
- HAVING argMax(is_deleted, version) = 0
+ GROUP BY `$session_id`
+ HAVING 1=1
AND hasAll(event_names, ['custom_event'])) as e on s.session_id = e.`$session_id`
+ JOIN
+ (SELECT distinct_id,
+ argMax(person_id, version) as person_id
+ FROM person_distinct_id2 as pdi
+ INNER JOIN
+ (SELECT id
+ FROM person
+ WHERE team_id = 2
+ GROUP BY id
+ HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id
+ WHERE team_id = 2
+ AND (pdi.person_id IN
+ (SELECT DISTINCT person_id
+ FROM cohortpeople
+ WHERE team_id = 2
+ AND cohort_id = 2
+ AND version = 0 ))
+ GROUP BY distinct_id
+ HAVING argMax(is_deleted, version) = 0) as pdi on pdi.distinct_id = s.distinct_id
WHERE s.team_id = 2
AND s.min_first_timestamp >= '2021-07-31 20:00:00'
AND s.min_first_timestamp >= '2021-08-14 00:00:00'
@@ -2331,12 +2452,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND ((event = '$pageview')
@@ -2374,12 +2496,13 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT groupUniqArray(event) as event_names,
`$session_id`
- FROM events e
- WHERE team_id = 2
- and notEmpty(`$session_id`)
+ FROM events e PREWHERE team_id = 2
+ AND e.timestamp >= '2020-12-11 13:46:23'
+ AND e.timestamp <= now()
+ WHERE notEmpty(`$session_id`)
AND timestamp >= '2020-12-24 12:00:00'
AND timestamp <= '2021-01-02 01:46:23'
AND ((event = '$pageview')
@@ -2417,7 +2540,7 @@
sum(s.console_warn_count) as console_warn_count,
sum(s.console_error_count) as console_error_count
FROM session_replay_events s
- join
+ JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2 as pdi