2022-01-06 16:38:29 +01:00
|
|
|
from typing import Dict, Optional, Tuple, cast
|
2021-09-22 11:53:10 +02:00
|
|
|
|
2021-12-01 07:46:14 +01:00
|
|
|
from ee.clickhouse.queries.actor_base_query import ActorBaseQuery
|
2021-09-22 11:53:10 +02:00
|
|
|
from ee.clickhouse.queries.paths.paths import ClickhousePaths
|
|
|
|
from posthog.models.filters.filter import Filter
|
|
|
|
|
|
|
|
|
2021-12-01 13:10:29 +01:00
|
|
|
class ClickhousePathsActors(ClickhousePaths, ActorBaseQuery): # type: ignore
|
|
|
|
"""
|
2021-09-27 19:30:24 +02:00
|
|
|
`path_start_key`, `path_end_key`, and `path_dropoff_key` are three new params for this class.
|
|
|
|
These determine the start and end point of Paths you want. All of these are optional.
|
2021-09-22 19:01:25 +02:00
|
|
|
|
|
|
|
Not specifying them means "get me all users on this path query".
|
|
|
|
|
|
|
|
Only specifying `path_start_key` means "get me all users whose paths start at this key"
|
|
|
|
Only specifying `path_end_key` means "get me all users whose paths end at this key"
|
|
|
|
|
|
|
|
Specifying both means "get me all users whose path starts at `start_key` and ends at `end_key`."
|
2021-09-27 19:30:24 +02:00
|
|
|
|
|
|
|
Specifying `path_dropoff_key` means "get me users who dropped off after this key. If you specify
|
|
|
|
this key, the other two keys are invalid
|
|
|
|
|
2021-09-22 19:01:25 +02:00
|
|
|
Note that:
|
|
|
|
Persons are calculated only between direct paths. There should not be any
|
|
|
|
other path item between start and end key.
|
|
|
|
"""
|
|
|
|
|
2022-01-06 16:38:29 +01:00
|
|
|
def actor_query(self, limit_actors: Optional[bool] = True) -> Tuple[str, Dict]:
|
2021-09-22 11:53:10 +02:00
|
|
|
paths_per_person_query = self.get_paths_per_person_query()
|
|
|
|
person_path_filter = self.get_person_path_filter()
|
|
|
|
paths_funnel_cte = ""
|
|
|
|
|
|
|
|
if self.should_query_funnel():
|
|
|
|
paths_funnel_cte = self.get_path_query_funnel_cte(cast(Filter, self._funnel_filter))
|
|
|
|
|
2022-01-19 00:29:52 +01:00
|
|
|
select_statement = "DISTINCT person_id AS actor_id"
|
|
|
|
group_statement = ""
|
|
|
|
if self._filter.include_recordings:
|
|
|
|
select_statement = """
|
|
|
|
person_id AS actor_id
|
|
|
|
, groupUniqArray(10)((uuid, timestamp, $session_id, $window_id)) as matching_events
|
|
|
|
"""
|
|
|
|
group_statement = "GROUP BY person_id"
|
|
|
|
|
2021-09-22 19:01:25 +02:00
|
|
|
self.params["limit"] = self._filter.limit
|
2021-09-22 11:53:10 +02:00
|
|
|
self.params["offset"] = self._filter.offset
|
|
|
|
|
2021-12-01 07:46:14 +01:00
|
|
|
return (
|
|
|
|
f"""
|
2021-09-22 11:53:10 +02:00
|
|
|
{paths_funnel_cte}
|
2022-01-19 00:29:52 +01:00
|
|
|
SELECT
|
|
|
|
{select_statement}
|
2021-09-22 11:53:10 +02:00
|
|
|
FROM (
|
|
|
|
{paths_per_person_query}
|
|
|
|
)
|
|
|
|
WHERE {person_path_filter}
|
2022-01-19 00:29:52 +01:00
|
|
|
{group_statement}
|
2021-09-22 11:53:10 +02:00
|
|
|
ORDER BY person_id
|
2022-01-06 16:38:29 +01:00
|
|
|
{"LIMIT %(limit)s" if limit_actors else ""}
|
|
|
|
{"OFFSET %(offset)s" if limit_actors else ""}
|
2021-12-01 07:46:14 +01:00
|
|
|
""",
|
|
|
|
self.params,
|
|
|
|
)
|
2021-09-22 11:53:10 +02:00
|
|
|
|
|
|
|
def get_person_path_filter(self) -> str:
|
|
|
|
conditions = []
|
|
|
|
|
2021-09-27 19:30:24 +02:00
|
|
|
if self._filter.path_dropoff_key:
|
2022-01-19 00:29:52 +01:00
|
|
|
conditions.append("path_dropoff_key = %(path_dropoff_key)s AND path_dropoff_key = path_key")
|
2021-09-27 19:30:24 +02:00
|
|
|
self.params["path_dropoff_key"] = self._filter.path_dropoff_key
|
|
|
|
else:
|
|
|
|
if self._filter.path_start_key:
|
|
|
|
conditions.append("last_path_key = %(path_start_key)s")
|
|
|
|
self.params["path_start_key"] = self._filter.path_start_key
|
|
|
|
|
|
|
|
if self._filter.path_end_key:
|
|
|
|
conditions.append("path_key = %(path_end_key)s")
|
|
|
|
self.params["path_end_key"] = self._filter.path_end_key
|
2021-09-22 11:53:10 +02:00
|
|
|
|
2021-09-22 19:01:25 +02:00
|
|
|
if conditions:
|
|
|
|
return " AND ".join(conditions)
|
|
|
|
|
|
|
|
return "1=1"
|