mirror of
https://github.com/PostHog/posthog.git
synced 2024-12-01 12:21:02 +01:00
7979f52e8a
* working for unique_groups math * fix types * add null check * update snapshots * update payload * update snapshots * use constructor * adjust queries * introduce base class * consolidate querying * shared serializer and typed * sort imports * snapshots * typing * change name * Add group model ```sql BEGIN; -- -- Create model Group -- CREATE TABLE "posthog_group" ("id" serial NOT NULL PRIMARY KEY, "group_key" varchar(400) NOT NULL, "group_type_index" integer NOT NULL, "group_properties" jsonb NOT NULL, "created_at" timestamp with time zone NOT NULL, "properties_last_updated_at" jsonb NOT NULL, "properties_last_operation" jsonb NOT NULL, "version" bigint NOT NULL, "team_id" integer NOT NULL); -- -- Create constraint unique team_id/group_key/group_type_index combo on model group -- ALTER TABLE "posthog_group" ADD CONSTRAINT "unique team_id/group_key/group_type_index combo" UNIQUE ("team_id", "group_key", "group_type_index"); ALTER TABLE "posthog_group" ADD CONSTRAINT "posthog_group_team_id_b3aed896_fk_posthog_team_id" FOREIGN KEY ("team_id") REFERENCES "posthog_team" ("id") DEFERRABLE INITIALLY DEFERRED; CREATE INDEX "posthog_group_team_id_b3aed896" ON "posthog_group" ("team_id"); COMMIT; ``` * Remove a dead import * Improve typing for groups * Make groups updating more generic, avoid mutation This simplifies using the same logic for groups Note there's a behavioral change: We don't produce a new kafka message if nothing has been updated anymore. * Rename a function * WIP: Handle group property updates ... by storing them in postgres Uses identical pattern to person property updates, except we handle first-seen case within updates as well. * Get rid of boolean option * WIP continued * fetchGroup() and upsertGroup() * Test more edge cases * Add tests for upsertGroup() in properties-updater * Rename to PropertyUpdateOperation * Followup * Solve typing issues * changed implementation to use pg * unusd * update type * update snapshots * rename and remove inlining * restore bad merge code * adjust types * add flag * remove var * misnamed * change to uuid * make sure to use string when passing result * remove from columnoptimizer logic and have group join logic implemented by event query classes per insight * remove unnecessary logic * typing * remove dead imports * remove verbosity * update snapshots * typos * remove signals * remove plugin excess Co-authored-by: Karl-Aksel Puulmann <oxymaccy@gmail.com>
90 lines
3.4 KiB
Python
90 lines
3.4 KiB
Python
from typing import Any, Dict, Tuple
|
|
|
|
from ee.clickhouse.models.group import get_aggregation_target_field
|
|
from ee.clickhouse.queries.event_query import ClickhouseEventQuery
|
|
from posthog.constants import TREND_FILTER_TYPE_ACTIONS
|
|
|
|
|
|
class FunnelEventQuery(ClickhouseEventQuery):
|
|
def get_query(self, entities=None, entity_name="events", skip_entity_filter=False) -> Tuple[str, Dict[str, Any]]:
|
|
_fields = [
|
|
f"{self.EVENT_TABLE_ALIAS}.event as event",
|
|
f"{self.EVENT_TABLE_ALIAS}.team_id as team_id",
|
|
f"{self.EVENT_TABLE_ALIAS}.distinct_id as distinct_id",
|
|
f"{self.EVENT_TABLE_ALIAS}.timestamp as timestamp",
|
|
(
|
|
f"{self.EVENT_TABLE_ALIAS}.elements_chain as elements_chain"
|
|
if self._column_optimizer.should_query_elements_chain_column
|
|
else ""
|
|
),
|
|
f"{get_aggregation_target_field(self._filter.aggregation_group_type_index, self.EVENT_TABLE_ALIAS, self.DISTINCT_ID_TABLE_ALIAS)} as aggregation_target",
|
|
]
|
|
|
|
_fields.extend(
|
|
f"{self.EVENT_TABLE_ALIAS}.{column_name} as {column_name}"
|
|
for column_name in self._column_optimizer.event_columns_to_query
|
|
)
|
|
|
|
_fields.extend(
|
|
f"groups_{group_index}.group_properties_{group_index} as group_properties_{group_index}"
|
|
for group_index in self._column_optimizer.group_types_to_query
|
|
)
|
|
|
|
if self._should_join_persons:
|
|
_fields.extend(
|
|
f"{self.PERSON_TABLE_ALIAS}.{column_name} as {column_name}" for column_name in self._person_query.fields
|
|
)
|
|
|
|
_fields = list(filter(None, _fields))
|
|
|
|
date_query, date_params = self._get_date_filter()
|
|
self.params.update(date_params)
|
|
|
|
prop_filters = self._filter.properties
|
|
prop_query, prop_params = self._get_props(prop_filters)
|
|
self.params.update(prop_params)
|
|
|
|
if skip_entity_filter:
|
|
entity_query = ""
|
|
entity_params: Dict[str, Any] = {}
|
|
else:
|
|
entity_query, entity_params = self._get_entity_query(entities, entity_name)
|
|
|
|
self.params.update(entity_params)
|
|
|
|
person_query, person_params = self._get_person_query()
|
|
self.params.update(person_params)
|
|
|
|
groups_query, groups_params = self._get_groups_query()
|
|
self.params.update(groups_params)
|
|
|
|
query = f"""
|
|
SELECT {', '.join(_fields)} FROM events {self.EVENT_TABLE_ALIAS}
|
|
{self._get_distinct_id_query()}
|
|
{person_query}
|
|
{groups_query}
|
|
WHERE team_id = %(team_id)s
|
|
{entity_query}
|
|
{date_query}
|
|
{prop_query}
|
|
"""
|
|
|
|
return query, self.params
|
|
|
|
def _determine_should_join_distinct_ids(self) -> None:
|
|
self._should_join_distinct_ids = True
|
|
|
|
def _get_entity_query(self, entities=None, entity_name="events") -> Tuple[str, Dict[str, Any]]:
|
|
events = set()
|
|
entities_to_use = entities or self._filter.entities
|
|
|
|
for entity in entities_to_use:
|
|
if entity.type == TREND_FILTER_TYPE_ACTIONS:
|
|
action = entity.get_action()
|
|
for action_step in action.steps.all():
|
|
events.add(action_step.event)
|
|
else:
|
|
events.add(entity.id)
|
|
|
|
return f"AND event IN %({entity_name})s", {entity_name: sorted(list(events))}
|