mirror of
https://github.com/PostHog/posthog.git
synced 2024-12-01 12:21:02 +01:00
c595976779
* fix(retention): fix breakdown people urls This change returns people_url for each breakdown cohort in the response. We also merge the initial and returning queries together, as this makes it easier to align the people query also. Note that I'm talking about person_id as opposed to actor_type etc. but perhaps that can be a followup. * clean up clickhouse params * tidy up a little * remove import * remove non-breakdown specific code * make cohort by initial event date a special breakdown case * keep date for backwards compat * Remove unused sql * make test stable * wip * Get most of the tests working * test(retention): remove graph retention test We no longer need this, we have all the information we need from the table response for retention, and can construct this on the frontend. * revert any changes to posthog/queries/retention.py * revert any changes to ee/clickhouse/models/person.py * Revert posthog/queries/retention.py to merge-base * Ensure actor id is a str * Add type for actor serialiser for type narrowing * run black * sort imports * Remove retention_actors.py * fix typings * format * reverse str type * sort imports * rename * split out functions * remove deuplicate logic * working * fix type * don't stringify * fix test * ordering doesn't matter * trigger ci Co-authored-by: eric <eeoneric@gmail.com>
124 lines
4.2 KiB
Python
124 lines
4.2 KiB
Python
import uuid
|
|
from typing import (
|
|
Any,
|
|
Dict,
|
|
List,
|
|
Literal,
|
|
Optional,
|
|
Tuple,
|
|
TypedDict,
|
|
Union,
|
|
)
|
|
|
|
from django.db.models.query import QuerySet
|
|
|
|
from ee.clickhouse.client import sync_execute
|
|
from posthog.models import Entity, Filter, Team
|
|
from posthog.models.filters.mixins.utils import cached_property
|
|
from posthog.models.filters.retention_filter import RetentionFilter
|
|
from posthog.models.filters.stickiness_filter import StickinessFilter
|
|
from posthog.models.group import Group
|
|
from posthog.models.person import Person
|
|
|
|
|
|
class CommonAttributes(TypedDict):
|
|
id: Union[uuid.UUID, str]
|
|
created_at: Optional[str]
|
|
properties: Dict[str, Any]
|
|
|
|
|
|
class SerializedPerson(CommonAttributes):
|
|
type: Literal["person"]
|
|
is_identified: Optional[bool]
|
|
name: str
|
|
distinct_ids: List[str]
|
|
|
|
|
|
class SerializedGroup(CommonAttributes):
|
|
type: Literal["group"]
|
|
group_key: str
|
|
group_type_index: int
|
|
|
|
|
|
SerializedActor = Union[SerializedGroup, SerializedPerson]
|
|
|
|
|
|
class ActorBaseQuery:
|
|
aggregating_by_groups = False
|
|
entity: Optional[Entity] = None
|
|
|
|
def __init__(
|
|
self, team: Team, filter: Union[Filter, StickinessFilter, RetentionFilter], entity: Optional[Entity] = None
|
|
):
|
|
self._team = team
|
|
self.entity = entity
|
|
self._filter = filter
|
|
|
|
def actor_query(self) -> Tuple[str, Dict]:
|
|
""" Implemented by subclasses. Must provide query and params. The query must return list of uuids. Can be group uuids (group_key) or person uuids """
|
|
raise NotImplementedError()
|
|
|
|
@cached_property
|
|
def is_aggregating_by_groups(self) -> bool:
|
|
"""Override in child class with insight specific logic to determine group aggregation"""
|
|
return False
|
|
|
|
def get_actors(
|
|
self,
|
|
) -> Tuple[Union[QuerySet[Person], QuerySet[Group]], Union[List[SerializedGroup], List[SerializedPerson]]]:
|
|
""" Get actors in data model and dict formats. Builds query and executes """
|
|
query, params = self.actor_query()
|
|
raw_result = sync_execute(query, params)
|
|
return self.get_actors_from_result(raw_result)
|
|
|
|
def get_actors_from_result(
|
|
self, raw_result
|
|
) -> Tuple[Union[QuerySet[Person], QuerySet[Group]], Union[List[SerializedGroup], List[SerializedPerson]]]:
|
|
actors: Union[QuerySet[Person], QuerySet[Group]]
|
|
serialized_actors: Union[List[SerializedGroup], List[SerializedPerson]]
|
|
|
|
if self.is_aggregating_by_groups:
|
|
actors, serialized_actors = self._get_groups(raw_result)
|
|
else:
|
|
actors, serialized_actors = self._get_people(raw_result)
|
|
return actors, serialized_actors
|
|
|
|
def _get_groups(self, results) -> Tuple[QuerySet[Group], List[SerializedGroup]]:
|
|
""" Get groups from raw SQL results in data model and dict formats """
|
|
groups: QuerySet[Group] = Group.objects.filter(team_id=self._team.pk, group_key__in=[val[0] for val in results])
|
|
return groups, self._serialize_groups(groups)
|
|
|
|
def _get_people(self, results) -> Tuple[QuerySet[Person], List[SerializedPerson]]:
|
|
""" Get people from raw SQL results in data model and dict formats """
|
|
persons: QuerySet[Person] = Person.objects.filter(team_id=self._team.pk, uuid__in=[val[0] for val in results])
|
|
return persons, self._serialize_people(persons)
|
|
|
|
def _serialize_people(self, data: QuerySet[Person]) -> List[SerializedPerson]:
|
|
from posthog.api.person import get_person_name
|
|
|
|
return [
|
|
SerializedPerson(
|
|
type="person",
|
|
id=person.uuid,
|
|
created_at=person.created_at,
|
|
properties=person.properties,
|
|
is_identified=person.is_identified,
|
|
name=get_person_name(person),
|
|
distinct_ids=person.distinct_ids,
|
|
)
|
|
for person in data
|
|
]
|
|
|
|
def _serialize_groups(self, data: QuerySet[Group]) -> List[SerializedGroup]:
|
|
return [
|
|
SerializedGroup(
|
|
id=group.group_key,
|
|
type="group",
|
|
group_type_index=group.group_type_index,
|
|
group_key=group.group_key,
|
|
created_at=group.created_at,
|
|
properties=group.group_properties,
|
|
)
|
|
for group in data
|
|
]
|