0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 18:26:15 +01:00
posthog/ee/clickhouse/queries/actor_base_query.py
Eric Duong 7979f52e8a
[groups persons] API for returning groups based on trend results (#7144)
* working for unique_groups math

* fix types

* add null check

* update snapshots

* update payload

* update snapshots

* use constructor

* adjust queries

* introduce base class

* consolidate querying

* shared serializer and typed

* sort imports

* snapshots

* typing

* change name

* Add group model

```sql
BEGIN;
--
-- Create model Group
--
CREATE TABLE "posthog_group" ("id" serial NOT NULL PRIMARY KEY, "group_key" varchar(400) NOT NULL, "group_type_index" integer NOT NULL, "group_properties" jsonb NOT NULL, "created_at" timestamp with time zone NOT NULL, "properties_last_updated_at" jsonb NOT NULL, "properties_last_operation" jsonb NOT NULL, "version" bigint NOT NULL, "team_id" integer NOT NULL);
--
-- Create constraint unique team_id/group_key/group_type_index combo on model group
--
ALTER TABLE "posthog_group" ADD CONSTRAINT "unique team_id/group_key/group_type_index combo" UNIQUE ("team_id", "group_key", "group_type_index");
ALTER TABLE "posthog_group" ADD CONSTRAINT "posthog_group_team_id_b3aed896_fk_posthog_team_id" FOREIGN KEY ("team_id") REFERENCES "posthog_team" ("id") DEFERRABLE INITIALLY DEFERRED;
CREATE INDEX "posthog_group_team_id_b3aed896" ON "posthog_group" ("team_id");
COMMIT;
```

* Remove a dead import

* Improve typing for groups

* Make groups updating more generic, avoid mutation

This simplifies using the same logic for groups

Note there's a behavioral change: We don't produce a new kafka message
if nothing has been updated anymore.

* Rename a function

* WIP: Handle group property updates

... by storing them in postgres

Uses identical pattern to person property updates, except we handle
first-seen case within updates as well.

* Get rid of boolean option

* WIP continued

* fetchGroup() and upsertGroup()

* Test more edge cases

* Add tests for upsertGroup() in properties-updater

* Rename to PropertyUpdateOperation

* Followup

* Solve typing issues

* changed implementation to use pg

* unusd

* update type

* update snapshots

* rename and remove inlining

* restore bad merge code

* adjust types

* add flag

* remove var

* misnamed

* change to uuid

* make sure to use string when passing result

* remove from columnoptimizer logic and have group join logic implemented by event query classes per insight

* remove unnecessary logic

* typing

* remove dead imports

* remove verbosity

* update snapshots

* typos

* remove signals

* remove plugin excess

Co-authored-by: Karl-Aksel Puulmann <oxymaccy@gmail.com>
2021-11-18 11:58:48 -05:00

107 lines
3.5 KiB
Python

import uuid
from typing import (
Any,
Dict,
List,
Literal,
Optional,
Tuple,
TypedDict,
Union,
)
from django.db.models.query import QuerySet
from ee.clickhouse.client import sync_execute
from posthog.models import Entity, Filter, Team
from posthog.models.filters.mixins.utils import cached_property
from posthog.models.group import Group
from posthog.models.person import Person
class SerializedPerson(TypedDict):
type: Literal["person"]
id: uuid.UUID
created_at: Optional[str]
properties: Dict[str, Any]
is_identified: Optional[bool]
name: str
distinct_ids: List[str]
class SerializedGroup(TypedDict):
type: Literal["group"]
group_key: str
created_at: Optional[str]
properties: Dict[str, Any]
SerializedActor = Union[SerializedPerson, SerializedGroup]
Actor = Union[Person, Group]
class ActorBaseQuery:
aggregating_by_groups = False
def __init__(self, team: Team, filter: Filter, entity: Optional[Entity] = None):
self.team = team
self.entity = entity
self.filter = filter
def actor_query(self) -> Tuple[str, Dict]:
""" Implemented by subclasses. Must return list of uuids. Can be group uuids (group_key) or person uuids """
raise NotImplementedError()
@cached_property
def is_aggregating_by_groups(self) -> bool:
if self.entity and self.entity.math == "unique_group":
return True
else:
return False
def get_actors(self) -> Tuple[QuerySet[Actor], List[SerializedActor]]:
""" Get actors in data model and dict formats. Builds query and executes """
query, params = self.actor_query()
raw_result = sync_execute(query, params)
actors: QuerySet[Actor]
serialized_actors: List[SerializedActor] = []
if self.is_aggregating_by_groups:
actors, serialized_actors = self._get_groups(raw_result)
else:
actors, serialized_actors = self._get_people(raw_result)
return actors, serialized_actors
def _get_groups(self, results) -> Tuple[QuerySet[Actor], List[SerializedActor]]:
""" Get groups from raw SQL results in data model and dict formats """
groups: QuerySet[Group] = Group.objects.filter(team_id=self.team.pk, group_key__in=[val[0] for val in results])
return groups, self._serialize_groups(groups)
def _get_people(self, results) -> Tuple[QuerySet[Actor], List[SerializedActor]]:
""" Get people from raw SQL results in data model and dict formats """
persons: QuerySet[Person] = Person.objects.filter(team_id=self.team.pk, uuid__in=[val[0] for val in results])
return persons, self._serialize_people(persons)
def _serialize_people(self, data: QuerySet[Person]) -> List[SerializedActor]:
from posthog.api.person import get_person_name
return [
SerializedPerson(
type="person",
id=person.uuid,
created_at=person.created_at,
properties=person.properties,
is_identified=person.is_identified,
name=get_person_name(person),
distinct_ids=person.distinct_ids,
)
for person in data
]
def _serialize_groups(self, data: QuerySet[Group]) -> List[SerializedActor]:
return [
SerializedGroup(
type="group", group_key=group.group_key, created_at=group.created_at, properties=group.group_properties
)
for group in data
]