mirror of
https://github.com/PostHog/posthog.git
synced 2024-11-28 18:26:15 +01:00
7979f52e8a
* working for unique_groups math * fix types * add null check * update snapshots * update payload * update snapshots * use constructor * adjust queries * introduce base class * consolidate querying * shared serializer and typed * sort imports * snapshots * typing * change name * Add group model ```sql BEGIN; -- -- Create model Group -- CREATE TABLE "posthog_group" ("id" serial NOT NULL PRIMARY KEY, "group_key" varchar(400) NOT NULL, "group_type_index" integer NOT NULL, "group_properties" jsonb NOT NULL, "created_at" timestamp with time zone NOT NULL, "properties_last_updated_at" jsonb NOT NULL, "properties_last_operation" jsonb NOT NULL, "version" bigint NOT NULL, "team_id" integer NOT NULL); -- -- Create constraint unique team_id/group_key/group_type_index combo on model group -- ALTER TABLE "posthog_group" ADD CONSTRAINT "unique team_id/group_key/group_type_index combo" UNIQUE ("team_id", "group_key", "group_type_index"); ALTER TABLE "posthog_group" ADD CONSTRAINT "posthog_group_team_id_b3aed896_fk_posthog_team_id" FOREIGN KEY ("team_id") REFERENCES "posthog_team" ("id") DEFERRABLE INITIALLY DEFERRED; CREATE INDEX "posthog_group_team_id_b3aed896" ON "posthog_group" ("team_id"); COMMIT; ``` * Remove a dead import * Improve typing for groups * Make groups updating more generic, avoid mutation This simplifies using the same logic for groups Note there's a behavioral change: We don't produce a new kafka message if nothing has been updated anymore. * Rename a function * WIP: Handle group property updates ... by storing them in postgres Uses identical pattern to person property updates, except we handle first-seen case within updates as well. * Get rid of boolean option * WIP continued * fetchGroup() and upsertGroup() * Test more edge cases * Add tests for upsertGroup() in properties-updater * Rename to PropertyUpdateOperation * Followup * Solve typing issues * changed implementation to use pg * unusd * update type * update snapshots * rename and remove inlining * restore bad merge code * adjust types * add flag * remove var * misnamed * change to uuid * make sure to use string when passing result * remove from columnoptimizer logic and have group join logic implemented by event query classes per insight * remove unnecessary logic * typing * remove dead imports * remove verbosity * update snapshots * typos * remove signals * remove plugin excess Co-authored-by: Karl-Aksel Puulmann <oxymaccy@gmail.com>
107 lines
3.5 KiB
Python
107 lines
3.5 KiB
Python
import uuid
|
|
from typing import (
|
|
Any,
|
|
Dict,
|
|
List,
|
|
Literal,
|
|
Optional,
|
|
Tuple,
|
|
TypedDict,
|
|
Union,
|
|
)
|
|
|
|
from django.db.models.query import QuerySet
|
|
|
|
from ee.clickhouse.client import sync_execute
|
|
from posthog.models import Entity, Filter, Team
|
|
from posthog.models.filters.mixins.utils import cached_property
|
|
from posthog.models.group import Group
|
|
from posthog.models.person import Person
|
|
|
|
|
|
class SerializedPerson(TypedDict):
|
|
type: Literal["person"]
|
|
id: uuid.UUID
|
|
created_at: Optional[str]
|
|
properties: Dict[str, Any]
|
|
is_identified: Optional[bool]
|
|
name: str
|
|
distinct_ids: List[str]
|
|
|
|
|
|
class SerializedGroup(TypedDict):
|
|
type: Literal["group"]
|
|
group_key: str
|
|
created_at: Optional[str]
|
|
properties: Dict[str, Any]
|
|
|
|
|
|
SerializedActor = Union[SerializedPerson, SerializedGroup]
|
|
Actor = Union[Person, Group]
|
|
|
|
|
|
class ActorBaseQuery:
|
|
aggregating_by_groups = False
|
|
|
|
def __init__(self, team: Team, filter: Filter, entity: Optional[Entity] = None):
|
|
self.team = team
|
|
self.entity = entity
|
|
self.filter = filter
|
|
|
|
def actor_query(self) -> Tuple[str, Dict]:
|
|
""" Implemented by subclasses. Must return list of uuids. Can be group uuids (group_key) or person uuids """
|
|
raise NotImplementedError()
|
|
|
|
@cached_property
|
|
def is_aggregating_by_groups(self) -> bool:
|
|
if self.entity and self.entity.math == "unique_group":
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def get_actors(self) -> Tuple[QuerySet[Actor], List[SerializedActor]]:
|
|
""" Get actors in data model and dict formats. Builds query and executes """
|
|
query, params = self.actor_query()
|
|
raw_result = sync_execute(query, params)
|
|
actors: QuerySet[Actor]
|
|
serialized_actors: List[SerializedActor] = []
|
|
if self.is_aggregating_by_groups:
|
|
actors, serialized_actors = self._get_groups(raw_result)
|
|
else:
|
|
actors, serialized_actors = self._get_people(raw_result)
|
|
return actors, serialized_actors
|
|
|
|
def _get_groups(self, results) -> Tuple[QuerySet[Actor], List[SerializedActor]]:
|
|
""" Get groups from raw SQL results in data model and dict formats """
|
|
groups: QuerySet[Group] = Group.objects.filter(team_id=self.team.pk, group_key__in=[val[0] for val in results])
|
|
return groups, self._serialize_groups(groups)
|
|
|
|
def _get_people(self, results) -> Tuple[QuerySet[Actor], List[SerializedActor]]:
|
|
""" Get people from raw SQL results in data model and dict formats """
|
|
persons: QuerySet[Person] = Person.objects.filter(team_id=self.team.pk, uuid__in=[val[0] for val in results])
|
|
return persons, self._serialize_people(persons)
|
|
|
|
def _serialize_people(self, data: QuerySet[Person]) -> List[SerializedActor]:
|
|
from posthog.api.person import get_person_name
|
|
|
|
return [
|
|
SerializedPerson(
|
|
type="person",
|
|
id=person.uuid,
|
|
created_at=person.created_at,
|
|
properties=person.properties,
|
|
is_identified=person.is_identified,
|
|
name=get_person_name(person),
|
|
distinct_ids=person.distinct_ids,
|
|
)
|
|
for person in data
|
|
]
|
|
|
|
def _serialize_groups(self, data: QuerySet[Group]) -> List[SerializedActor]:
|
|
return [
|
|
SerializedGroup(
|
|
type="group", group_key=group.group_key, created_at=group.created_at, properties=group.group_properties
|
|
)
|
|
for group in data
|
|
]
|