2021-06-22 17:44:55 +02:00
|
|
|
from abc import ABCMeta, abstractmethod
|
|
|
|
from typing import Any, Dict, List, Tuple
|
2021-06-10 20:47:33 +02:00
|
|
|
|
|
|
|
from ee.clickhouse.models.cohort import format_person_query, get_precalculated_query, is_precalculated_query
|
|
|
|
from ee.clickhouse.models.property import filter_element, prop_filter_json_extract
|
2021-06-22 17:44:55 +02:00
|
|
|
from ee.clickhouse.queries.util import parse_timestamps
|
2021-06-28 16:20:08 +02:00
|
|
|
from posthog.models import Cohort, Filter, Property, Team
|
2021-06-10 20:47:33 +02:00
|
|
|
|
|
|
|
|
2021-06-22 17:44:55 +02:00
|
|
|
class ClickhouseEventQuery(metaclass=ABCMeta):
|
2021-06-10 20:47:33 +02:00
|
|
|
DISTINCT_ID_TABLE_ALIAS = "pdi"
|
|
|
|
PERSON_TABLE_ALIAS = "person"
|
|
|
|
EVENT_TABLE_ALIAS = "e"
|
|
|
|
|
|
|
|
_PERSON_PROPERTIES_ALIAS = "person_props"
|
|
|
|
_filter: Filter
|
|
|
|
_team_id: int
|
|
|
|
_should_join_distinct_ids = False
|
|
|
|
_should_join_persons = False
|
|
|
|
_should_round_interval = False
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
filter: Filter,
|
|
|
|
team_id: int,
|
|
|
|
round_interval=False,
|
|
|
|
should_join_distinct_ids=False,
|
|
|
|
should_join_persons=False,
|
|
|
|
**kwargs,
|
|
|
|
) -> None:
|
|
|
|
self._filter = filter
|
|
|
|
self._team_id = team_id
|
|
|
|
self.params = {
|
|
|
|
"team_id": self._team_id,
|
|
|
|
}
|
|
|
|
|
|
|
|
self._should_join_distinct_ids = should_join_distinct_ids
|
|
|
|
self._should_join_persons = should_join_persons
|
|
|
|
|
|
|
|
if not self._should_join_distinct_ids:
|
|
|
|
self._determine_should_join_distinct_ids()
|
|
|
|
|
|
|
|
if not self._should_join_persons:
|
|
|
|
self._determine_should_join_persons()
|
|
|
|
|
|
|
|
self._should_round_interval = round_interval
|
|
|
|
|
2021-06-22 17:44:55 +02:00
|
|
|
@abstractmethod
|
2021-06-10 20:47:33 +02:00
|
|
|
def get_query(self) -> Tuple[str, Dict[str, Any]]:
|
2021-06-22 17:44:55 +02:00
|
|
|
pass
|
2021-06-10 20:47:33 +02:00
|
|
|
|
2021-06-22 17:44:55 +02:00
|
|
|
@abstractmethod
|
2021-06-10 20:47:33 +02:00
|
|
|
def _determine_should_join_distinct_ids(self) -> None:
|
2021-06-22 17:44:55 +02:00
|
|
|
pass
|
2021-06-10 20:47:33 +02:00
|
|
|
|
|
|
|
def _get_disintct_id_query(self) -> str:
|
|
|
|
if self._should_join_distinct_ids:
|
|
|
|
return f"""
|
|
|
|
INNER JOIN (
|
2021-07-05 11:49:55 +02:00
|
|
|
SELECT
|
|
|
|
person_id,
|
2021-06-10 20:47:33 +02:00
|
|
|
distinct_id
|
|
|
|
FROM (
|
2021-07-05 11:49:55 +02:00
|
|
|
SELECT *
|
|
|
|
FROM person_distinct_id
|
|
|
|
JOIN (
|
|
|
|
SELECT distinct_id,
|
|
|
|
max(_offset) as _offset
|
2021-06-10 20:47:33 +02:00
|
|
|
FROM person_distinct_id
|
|
|
|
WHERE team_id = %(team_id)s
|
2021-07-05 11:49:55 +02:00
|
|
|
GROUP BY distinct_id
|
|
|
|
) as person_max
|
|
|
|
USING (distinct_id, _offset)
|
|
|
|
WHERE team_id = %(team_id)s
|
2021-06-10 20:47:33 +02:00
|
|
|
)
|
|
|
|
WHERE team_id = %(team_id)s
|
|
|
|
) AS {self.DISTINCT_ID_TABLE_ALIAS}
|
|
|
|
ON events.distinct_id = {self.DISTINCT_ID_TABLE_ALIAS}.distinct_id
|
|
|
|
"""
|
|
|
|
else:
|
|
|
|
return ""
|
|
|
|
|
|
|
|
def _determine_should_join_persons(self) -> None:
|
|
|
|
for prop in self._filter.properties:
|
|
|
|
if prop.type == "person":
|
|
|
|
self._should_join_distinct_ids = True
|
|
|
|
self._should_join_persons = True
|
|
|
|
return
|
|
|
|
if prop.type == "cohort" and self._does_cohort_need_persons(prop):
|
|
|
|
self._should_join_distinct_ids = True
|
|
|
|
self._should_join_persons = True
|
|
|
|
return
|
|
|
|
|
|
|
|
if self._filter.breakdown_type == "person":
|
|
|
|
self._should_join_distinct_ids = True
|
|
|
|
self._should_join_persons = True
|
|
|
|
|
|
|
|
if self._filter.filter_test_accounts:
|
|
|
|
test_account_filters = Team.objects.only("test_account_filters").get(id=self._team_id).test_account_filters
|
|
|
|
test_filter_props = [Property(**prop) for prop in test_account_filters]
|
|
|
|
for prop in test_filter_props:
|
|
|
|
if prop.type == "person":
|
|
|
|
self._should_join_distinct_ids = True
|
|
|
|
self._should_join_persons = True
|
|
|
|
return
|
|
|
|
|
|
|
|
def _does_cohort_need_persons(self, prop: Property) -> bool:
|
2021-07-01 19:30:18 +02:00
|
|
|
try:
|
|
|
|
cohort = Cohort.objects.get(pk=prop.value, team_id=self._team_id)
|
|
|
|
except Cohort.DoesNotExist:
|
|
|
|
return False
|
2021-07-05 11:49:55 +02:00
|
|
|
if is_precalculated_query(cohort):
|
|
|
|
return True
|
2021-06-10 20:47:33 +02:00
|
|
|
for group in cohort.groups:
|
|
|
|
if group.get("properties"):
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def _get_person_query(self) -> str:
|
|
|
|
if self._should_join_persons:
|
|
|
|
return f"""
|
|
|
|
INNER JOIN (
|
|
|
|
SELECT id, properties as person_props
|
|
|
|
FROM (
|
|
|
|
SELECT id,
|
|
|
|
argMax(properties, person._timestamp) as properties,
|
|
|
|
max(is_deleted) as is_deleted
|
|
|
|
FROM person
|
|
|
|
WHERE team_id = %(team_id)s
|
|
|
|
GROUP BY id
|
|
|
|
HAVING is_deleted = 0
|
|
|
|
)
|
|
|
|
) {self.PERSON_TABLE_ALIAS}
|
|
|
|
ON {self.PERSON_TABLE_ALIAS}.id = {self.DISTINCT_ID_TABLE_ALIAS}.person_id
|
|
|
|
"""
|
|
|
|
else:
|
|
|
|
return ""
|
|
|
|
|
|
|
|
def _get_date_filter(self) -> Tuple[str, Dict]:
|
|
|
|
|
|
|
|
parsed_date_from, parsed_date_to, date_params = parse_timestamps(filter=self._filter, team_id=self._team_id)
|
|
|
|
|
|
|
|
query = f"""
|
|
|
|
{parsed_date_from}
|
|
|
|
{parsed_date_to}
|
|
|
|
"""
|
|
|
|
|
|
|
|
return query, date_params
|
|
|
|
|
2021-06-22 17:44:55 +02:00
|
|
|
def _get_props(self, filters: List[Property], allow_denormalized_props: bool = False) -> Tuple[str, Dict]:
|
2021-06-10 20:47:33 +02:00
|
|
|
|
|
|
|
filter_test_accounts = self._filter.filter_test_accounts
|
|
|
|
team_id = self._team_id
|
|
|
|
table_name = f"{self.EVENT_TABLE_ALIAS}."
|
|
|
|
prepend = "global"
|
|
|
|
|
|
|
|
final = []
|
|
|
|
params: Dict[str, Any] = {}
|
|
|
|
|
|
|
|
if filter_test_accounts:
|
|
|
|
test_account_filters = Team.objects.only("test_account_filters").get(id=team_id).test_account_filters
|
|
|
|
filters.extend([Property(**prop) for prop in test_account_filters])
|
|
|
|
|
|
|
|
for idx, prop in enumerate(filters):
|
|
|
|
if prop.type == "cohort":
|
|
|
|
person_id_query, cohort_filter_params = self._get_cohort_subquery(prop)
|
|
|
|
params = {**params, **cohort_filter_params}
|
|
|
|
final.append(f"AND {person_id_query}")
|
|
|
|
|
|
|
|
elif prop.type == "person":
|
|
|
|
filter_query, filter_params = prop_filter_json_extract(
|
|
|
|
prop,
|
|
|
|
idx,
|
|
|
|
"{}person".format(prepend),
|
|
|
|
allow_denormalized_props=allow_denormalized_props,
|
|
|
|
prop_var=self._PERSON_PROPERTIES_ALIAS,
|
|
|
|
)
|
|
|
|
final.append(filter_query)
|
|
|
|
params.update(filter_params)
|
|
|
|
elif prop.type == "element":
|
|
|
|
query, filter_params = filter_element({prop.key: prop.value}, prepend="{}_".format(idx))
|
|
|
|
final.append("AND {}".format(query[0]))
|
|
|
|
params.update(filter_params)
|
|
|
|
else:
|
|
|
|
filter_query, filter_params = prop_filter_json_extract(
|
|
|
|
prop, idx, prepend, prop_var="properties", allow_denormalized_props=allow_denormalized_props,
|
|
|
|
)
|
|
|
|
|
|
|
|
final.append(filter_query)
|
|
|
|
params.update(filter_params)
|
|
|
|
return " ".join(final), params
|
|
|
|
|
|
|
|
def _get_cohort_subquery(self, prop) -> Tuple[str, Dict[str, Any]]:
|
2021-07-01 19:30:18 +02:00
|
|
|
try:
|
2021-07-05 11:49:55 +02:00
|
|
|
cohort: Cohort = Cohort.objects.get(pk=prop.value, team_id=self._team_id)
|
2021-07-01 19:30:18 +02:00
|
|
|
except Cohort.DoesNotExist:
|
|
|
|
return "0 = 1", {} # If cohort doesn't exist, nothing can match
|
|
|
|
|
2021-06-10 20:47:33 +02:00
|
|
|
is_precalculated = is_precalculated_query(cohort)
|
|
|
|
|
|
|
|
person_id_query, cohort_filter_params = (
|
|
|
|
get_precalculated_query(cohort, custom_match_field=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id")
|
|
|
|
if is_precalculated
|
|
|
|
else format_person_query(cohort, custom_match_field=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id")
|
|
|
|
)
|
|
|
|
|
|
|
|
return person_id_query, cohort_filter_params
|