mirror of
https://github.com/PostHog/posthog.git
synced 2024-12-01 04:12:23 +01:00
c595976779
* fix(retention): fix breakdown people urls This change returns people_url for each breakdown cohort in the response. We also merge the initial and returning queries together, as this makes it easier to align the people query also. Note that I'm talking about person_id as opposed to actor_type etc. but perhaps that can be a followup. * clean up clickhouse params * tidy up a little * remove import * remove non-breakdown specific code * make cohort by initial event date a special breakdown case * keep date for backwards compat * Remove unused sql * make test stable * wip * Get most of the tests working * test(retention): remove graph retention test We no longer need this, we have all the information we need from the table response for retention, and can construct this on the frontend. * revert any changes to posthog/queries/retention.py * revert any changes to ee/clickhouse/models/person.py * Revert posthog/queries/retention.py to merge-base * Ensure actor id is a str * Add type for actor serialiser for type narrowing * run black * sort imports * Remove retention_actors.py * fix typings * format * reverse str type * sort imports * rename * split out functions * remove deuplicate logic * working * fix type * don't stringify * fix test * ordering doesn't matter * trigger ci Co-authored-by: eric <eeoneric@gmail.com>
168 lines
6.6 KiB
Python
168 lines
6.6 KiB
Python
from typing import Any, Dict, List, NamedTuple, Tuple, Union
|
|
from urllib.parse import urlencode
|
|
|
|
from ee.clickhouse.client import substitute_params, sync_execute
|
|
from ee.clickhouse.queries.retention.retention_event_query import RetentionEventsQuery
|
|
from ee.clickhouse.sql.retention.retention import RETENTION_BREAKDOWN_SQL
|
|
from posthog.constants import RETENTION_FIRST_TIME, RetentionQueryType
|
|
from posthog.models.filters import RetentionFilter
|
|
from posthog.models.filters.retention_filter import RetentionFilter
|
|
from posthog.models.team import Team
|
|
|
|
BreakdownValues = Tuple[Union[str, int], ...]
|
|
CohortKey = NamedTuple("CohortKey", (("breakdown_values", BreakdownValues), ("period", int)))
|
|
|
|
|
|
class ClickhouseRetention:
|
|
def __init__(self, base_uri="/"):
|
|
self._base_uri = base_uri
|
|
|
|
def run(self, filter: RetentionFilter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]:
|
|
retention_by_breakdown = self._get_retention_by_breakdown_values(filter, team)
|
|
if filter.breakdowns:
|
|
return self.process_breakdown_table_result(retention_by_breakdown, filter)
|
|
else:
|
|
return self.process_table_result(retention_by_breakdown, filter)
|
|
|
|
def _get_retention_by_breakdown_values(
|
|
self, filter: RetentionFilter, team: Team,
|
|
) -> Dict[CohortKey, Dict[str, Any]]:
|
|
from ee.clickhouse.queries.retention.retention_actors import build_actor_activity_query
|
|
|
|
actor_query = build_actor_activity_query(filter=filter, team=team)
|
|
|
|
result = sync_execute(RETENTION_BREAKDOWN_SQL.format(actor_query=actor_query,))
|
|
|
|
result_dict = {
|
|
CohortKey(tuple(breakdown_values), intervals_from_base): {
|
|
"count": count,
|
|
"people": [],
|
|
"people_url": self._construct_people_url_for_trend_breakdown_interval(
|
|
filter=filter, breakdown_values=breakdown_values, selected_interval=intervals_from_base,
|
|
),
|
|
}
|
|
for (breakdown_values, intervals_from_base, count) in result
|
|
}
|
|
|
|
return result_dict
|
|
|
|
def _construct_people_url_for_trend_breakdown_interval(
|
|
self, filter: RetentionFilter, selected_interval: int, breakdown_values: BreakdownValues,
|
|
):
|
|
params = RetentionFilter(
|
|
{**filter._data, "breakdown_values": breakdown_values, "selected_interval": selected_interval}
|
|
).to_params()
|
|
return f"{self._base_uri}api/person/retention/?{urlencode(params)}"
|
|
|
|
def process_breakdown_table_result(
|
|
self, resultset: Dict[CohortKey, Dict[str, Any]], filter: RetentionFilter,
|
|
):
|
|
result = [
|
|
{
|
|
"values": [
|
|
resultset.get(CohortKey(breakdown_values, interval), {"count": 0, "people": []})
|
|
for interval in range(filter.total_intervals)
|
|
],
|
|
"label": "::".join(map(str, breakdown_values)),
|
|
"breakdown_values": breakdown_values,
|
|
"people_url": (
|
|
"/api/person/retention/?"
|
|
f"{urlencode(RetentionFilter({**filter._data, 'display': 'ActionsTable', 'breakdown_values': breakdown_values}).to_params())}"
|
|
),
|
|
}
|
|
for breakdown_values in set(cohort_key.breakdown_values for cohort_key in resultset.keys())
|
|
]
|
|
|
|
return result
|
|
|
|
def process_table_result(
|
|
self, resultset: Dict[CohortKey, Dict[str, Any]], filter: RetentionFilter,
|
|
):
|
|
"""
|
|
Constructs a response for the rest api when there is no breakdown specified
|
|
|
|
We process the non-breakdown case separately from the breakdown case so
|
|
we can easily maintain compatability from when we didn't have
|
|
breakdowns. The key difference is that we "zero fill" the cohorts as we
|
|
want to have a result for each cohort between the specified date range.
|
|
"""
|
|
|
|
def construct_url(first_day):
|
|
params = RetentionFilter(
|
|
{**filter._data, "display": "ActionsTable", "breakdown_values": [first_day]}
|
|
).to_params()
|
|
return "/api/person/retention/?" f"{urlencode(params)}"
|
|
|
|
result = [
|
|
{
|
|
"values": [
|
|
resultset.get(CohortKey((first_day,), day), {"count": 0, "people": []})
|
|
for day in range(filter.total_intervals - first_day)
|
|
],
|
|
"label": "{} {}".format(filter.period, first_day),
|
|
"date": (filter.date_from + RetentionFilter.determine_time_delta(first_day, filter.period)[0]),
|
|
"people_url": construct_url(first_day),
|
|
}
|
|
for first_day in range(filter.total_intervals)
|
|
]
|
|
|
|
return result
|
|
|
|
def actors(self, filter: RetentionFilter, team: Team):
|
|
from ee.clickhouse.queries.retention.retention_actors import ClickhouseRetentionActors
|
|
|
|
_, serialized_actors = ClickhouseRetentionActors(team=team, filter=filter).get_actors()
|
|
|
|
return serialized_actors
|
|
|
|
def actors_in_period(self, filter: RetentionFilter, team: Team):
|
|
"""
|
|
Creates a response of the form
|
|
|
|
```
|
|
[
|
|
{
|
|
"person": {"distinct_id": ..., ...},
|
|
"appearance_count": 3,
|
|
"appearances": [1, 0, 1, 1, 0, 0]
|
|
}
|
|
...
|
|
]
|
|
```
|
|
|
|
where appearances values represent if the person was active in an
|
|
interval, where the index of the list is the interval it refers to.
|
|
"""
|
|
|
|
from ee.clickhouse.queries.retention.retention_actors import ClickhouseRetentionActorsByPeriod
|
|
|
|
return ClickhouseRetentionActorsByPeriod(team=team, filter=filter).actors()
|
|
|
|
|
|
def build_returning_event_query(filter: RetentionFilter, team: Team):
|
|
returning_event_query_templated, returning_event_params = RetentionEventsQuery(
|
|
filter=filter.with_data({"breakdowns": []}), # Avoid pulling in breakdown values from returning event query
|
|
team_id=team.pk,
|
|
event_query_type=RetentionQueryType.RETURNING,
|
|
).get_query()
|
|
|
|
query = substitute_params(returning_event_query_templated, returning_event_params)
|
|
|
|
return query
|
|
|
|
|
|
def build_target_event_query(filter: RetentionFilter, team: Team):
|
|
target_event_query_templated, target_event_params = RetentionEventsQuery(
|
|
filter=filter,
|
|
team_id=team.pk,
|
|
event_query_type=(
|
|
RetentionQueryType.TARGET_FIRST_TIME
|
|
if (filter.retention_type == RETENTION_FIRST_TIME)
|
|
else RetentionQueryType.TARGET
|
|
),
|
|
).get_query()
|
|
|
|
query = substitute_params(target_event_query_templated, target_event_params)
|
|
|
|
return query
|