0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 18:26:15 +01:00
posthog/ee/clickhouse/queries/retention/retention_event_query.py
Harry Waye c595976779
fix(retention): fix breakdown people urls (#7642)
* fix(retention): fix breakdown people urls

This change returns people_url for each breakdown cohort in the
response. We also merge the initial and returning queries together,
as this makes it easier to align the people query also.

Note that I'm talking about person_id as opposed to actor_type etc.
but perhaps that can be a followup.

* clean up clickhouse params

* tidy up a little

* remove import

* remove non-breakdown specific code

* make cohort by initial event date a special breakdown case

* keep date for backwards compat

* Remove unused sql

* make test stable

* wip

* Get most of the tests working

* test(retention): remove graph retention test

We no longer need this, we have all the information we need from the
table response for retention, and can construct this on the frontend.

* revert any changes to posthog/queries/retention.py

* revert any changes to ee/clickhouse/models/person.py

* Revert posthog/queries/retention.py to merge-base

* Ensure actor id is a str

* Add type for actor serialiser for type narrowing

* run black

* sort imports

* Remove retention_actors.py

* fix typings

* format

* reverse str type

* sort imports

* rename

* split out functions

* remove deuplicate logic

* working

* fix type

* don't stringify

* fix test

* ordering doesn't matter

* trigger ci

Co-authored-by: eric <eeoneric@gmail.com>
2021-12-15 18:20:56 +00:00

187 lines
8.5 KiB
Python

from typing import Any, Dict, Literal, Tuple, Union, cast
from ee.clickhouse.models.action import format_action_filter
from ee.clickhouse.models.group import get_aggregation_target_field
from ee.clickhouse.models.property import get_single_or_multi_property_string_expr
from ee.clickhouse.queries.event_query import ClickhouseEventQuery
from ee.clickhouse.queries.util import get_trunc_func_ch
from posthog.constants import (
PAGEVIEW_EVENT,
TREND_FILTER_TYPE_ACTIONS,
TREND_FILTER_TYPE_EVENTS,
TRENDS_LINEAR,
RetentionQueryType,
)
from posthog.models import Entity
from posthog.models.action import Action
from posthog.models.filters.retention_filter import RetentionFilter
class RetentionEventsQuery(ClickhouseEventQuery):
_filter: RetentionFilter
_event_query_type: RetentionQueryType
_trunc_func: str
def __init__(self, filter: RetentionFilter, event_query_type: RetentionQueryType, team_id: int):
self._event_query_type = event_query_type
super().__init__(filter=filter, team_id=team_id)
self._trunc_func = get_trunc_func_ch(self._filter.period)
def get_query(self) -> Tuple[str, Dict[str, Any]]:
_fields = [
self.get_timestamp_field(),
f"{get_aggregation_target_field(self._filter.aggregation_group_type_index, self.EVENT_TABLE_ALIAS, self.DISTINCT_ID_TABLE_ALIAS)} as target",
(
f"argMin(e.uuid, {self._trunc_func}(e.timestamp)) as min_uuid"
if self._event_query_type == RetentionQueryType.TARGET_FIRST_TIME
else f"{self.EVENT_TABLE_ALIAS}.uuid AS uuid"
),
(
f"argMin(e.event, {self._trunc_func}(e.timestamp)) as min_event"
if self._event_query_type == RetentionQueryType.TARGET_FIRST_TIME
else f"{self.EVENT_TABLE_ALIAS}.event AS event"
),
]
if self._filter.breakdowns and self._filter.breakdown_type:
# NOTE: `get_single_or_multi_property_string_expr` doesn't
# support breakdowns with different types e.g. a person property
# then an event property, so for now we just take the type of
# the self._filter.breakdown_type.
# TODO: update 'get_single_or_multi_property_string_expr` to take
# `Breakdown` type
breakdown_type = self._filter.breakdown_type
table = "events"
if breakdown_type == "person":
table = "person"
breakdown_values_expression = get_single_or_multi_property_string_expr(
breakdown=[breakdown["property"] for breakdown in self._filter.breakdowns],
table=cast(Union[Literal["events"], Literal["person"]], table),
query_alias=None,
)
if self._event_query_type == RetentionQueryType.TARGET_FIRST_TIME:
_fields += [f"argMin({breakdown_values_expression}, e.timestamp) AS breakdown_values"]
else:
_fields += [f"{breakdown_values_expression} AS breakdown_values"]
else:
# If we didn't have a breakdown specified, we default to the
# initial event interval
# NOTE: we wrap as an array to maintain the same structure as
# for typical breakdowns
# NOTE: we could add support for specifying expressions to
# `get_single_or_multi_property_string_expr` or an abstraction
# over the top somehow
# NOTE: we use the datediff rather than the date to make our
# lives easier when zero filling the response. We could however
# handle this WITH FILL within the query.
if self._event_query_type == RetentionQueryType.TARGET_FIRST_TIME:
_fields += [
f"""
[
dateDiff(
%(period)s,
{self._trunc_func}(toDateTime(%(start_date)s)),
{self._trunc_func}(min(e.timestamp))
)
] as breakdown_values
"""
]
elif self._event_query_type == RetentionQueryType.TARGET:
_fields += [
f"""
[
dateDiff(
%(period)s,
{self._trunc_func}(toDateTime(%(start_date)s)),
{self._trunc_func}(e.timestamp)
)
] as breakdown_values
"""
]
self.params.update({"start_date": self._filter.date_from, "period": self._filter.period})
date_query, date_params = self._get_date_filter()
self.params.update(date_params)
prop_filters = [*self._filter.properties]
prop_query, prop_params = self._get_props(prop_filters)
self.params.update(prop_params)
entity_query, entity_params = self._get_entity_query(
entity=self._filter.target_entity
if self._event_query_type == RetentionQueryType.TARGET
or self._event_query_type == RetentionQueryType.TARGET_FIRST_TIME
else self._filter.returning_entity
)
self.params.update(entity_params)
person_query, person_params = self._get_person_query()
self.params.update(person_params)
groups_query, groups_params = self._get_groups_query()
self.params.update(groups_params)
query = f"""
SELECT {','.join(_fields)} FROM events {self.EVENT_TABLE_ALIAS}
{self._get_distinct_id_query()}
{person_query}
{groups_query}
WHERE team_id = %(team_id)s
{f"AND {entity_query}"}
{f"AND {date_query}" if self._event_query_type != RetentionQueryType.TARGET_FIRST_TIME else ''}
{prop_query}
{f"GROUP BY target HAVING {date_query}" if self._event_query_type == RetentionQueryType.TARGET_FIRST_TIME else ''}
"""
return query, self.params
def get_timestamp_field(self) -> str:
if self._event_query_type == RetentionQueryType.TARGET:
return f"DISTINCT {self._trunc_func}({self.EVENT_TABLE_ALIAS}.timestamp) AS event_date"
elif self._event_query_type == RetentionQueryType.TARGET_FIRST_TIME:
return f"min({self._trunc_func}(e.timestamp)) as event_date"
else:
return f"{self._trunc_func}({self.EVENT_TABLE_ALIAS}.timestamp) AS event_date"
def _determine_should_join_distinct_ids(self) -> None:
if self._filter.aggregation_group_type_index is not None:
self._should_join_distinct_ids = False
else:
self._should_join_distinct_ids = True
def _get_entity_query(self, entity: Entity):
prepend = self._event_query_type
if entity.type == TREND_FILTER_TYPE_ACTIONS:
action = Action.objects.get(pk=entity.id)
action_query, params = format_action_filter(action, prepend=prepend, use_loop=False)
condition = action_query
elif entity.type == TREND_FILTER_TYPE_EVENTS:
condition = f"{self.EVENT_TABLE_ALIAS}.event = %({prepend}_event)s"
params = {f"{prepend}_event": entity.id}
else:
condition = f"{self.EVENT_TABLE_ALIAS}.event = %({prepend}_event)s"
params = {f"{prepend}_event": PAGEVIEW_EVENT}
return condition, params
def _get_date_filter(self):
query = (
f"event_date >= toDateTime(%({self._event_query_type}_start_date)s) AND event_date <= toDateTime(%({self._event_query_type}_end_date)s)"
if self._event_query_type == RetentionQueryType.TARGET_FIRST_TIME
else f"toDateTime({self.EVENT_TABLE_ALIAS}.timestamp) >= toDateTime(%({self._event_query_type}_start_date)s) AND toDateTime({self.EVENT_TABLE_ALIAS}.timestamp) <= toDateTime(%({self._event_query_type}_end_date)s)"
)
params = {
f"{self._event_query_type}_start_date": self._filter.date_from.strftime(
"%Y-%m-%d{}".format(" %H:%M:%S" if self._filter.period == "Hour" else " 00:00:00")
),
f"{self._event_query_type}_end_date": (
(self._filter.date_from + self._filter.period_increment)
if self._filter.display == TRENDS_LINEAR and self._event_query_type == RetentionQueryType.TARGET
else self._filter.date_to
).strftime("%Y-%m-%d{}".format(" %H:%M:%S" if self._filter.period == "Hour" else " 00:00:00")),
}
return query, params