0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-12-01 12:21:02 +01:00
posthog/ee/clickhouse/queries/groups_join_query.py

54 lines
1.9 KiB
Python
Raw Normal View History

[groups persons] API for returning groups based on trend results (#7144) * working for unique_groups math * fix types * add null check * update snapshots * update payload * update snapshots * use constructor * adjust queries * introduce base class * consolidate querying * shared serializer and typed * sort imports * snapshots * typing * change name * Add group model ```sql BEGIN; -- -- Create model Group -- CREATE TABLE "posthog_group" ("id" serial NOT NULL PRIMARY KEY, "group_key" varchar(400) NOT NULL, "group_type_index" integer NOT NULL, "group_properties" jsonb NOT NULL, "created_at" timestamp with time zone NOT NULL, "properties_last_updated_at" jsonb NOT NULL, "properties_last_operation" jsonb NOT NULL, "version" bigint NOT NULL, "team_id" integer NOT NULL); -- -- Create constraint unique team_id/group_key/group_type_index combo on model group -- ALTER TABLE "posthog_group" ADD CONSTRAINT "unique team_id/group_key/group_type_index combo" UNIQUE ("team_id", "group_key", "group_type_index"); ALTER TABLE "posthog_group" ADD CONSTRAINT "posthog_group_team_id_b3aed896_fk_posthog_team_id" FOREIGN KEY ("team_id") REFERENCES "posthog_team" ("id") DEFERRABLE INITIALLY DEFERRED; CREATE INDEX "posthog_group_team_id_b3aed896" ON "posthog_group" ("team_id"); COMMIT; ``` * Remove a dead import * Improve typing for groups * Make groups updating more generic, avoid mutation This simplifies using the same logic for groups Note there's a behavioral change: We don't produce a new kafka message if nothing has been updated anymore. * Rename a function * WIP: Handle group property updates ... by storing them in postgres Uses identical pattern to person property updates, except we handle first-seen case within updates as well. * Get rid of boolean option * WIP continued * fetchGroup() and upsertGroup() * Test more edge cases * Add tests for upsertGroup() in properties-updater * Rename to PropertyUpdateOperation * Followup * Solve typing issues * changed implementation to use pg * unusd * update type * update snapshots * rename and remove inlining * restore bad merge code * adjust types * add flag * remove var * misnamed * change to uuid * make sure to use string when passing result * remove from columnoptimizer logic and have group join logic implemented by event query classes per insight * remove unnecessary logic * typing * remove dead imports * remove verbosity * update snapshots * typos * remove signals * remove plugin excess Co-authored-by: Karl-Aksel Puulmann <oxymaccy@gmail.com>
2021-11-18 17:58:48 +01:00
from typing import Dict, Optional, Tuple, Union
from ee.clickhouse.queries.column_optimizer import ColumnOptimizer
from posthog.models import Filter
from posthog.models.filters.path_filter import PathFilter
from posthog.models.filters.retention_filter import RetentionFilter
class GroupsJoinQuery:
"""
Query class responsible for joining with `groups` clickhouse table based on filters
"""
_filter: Union[Filter, PathFilter, RetentionFilter]
_team_id: int
_column_optimizer: ColumnOptimizer
def __init__(
self,
filter: Union[Filter, PathFilter, RetentionFilter],
team_id: int,
column_optimizer: Optional[ColumnOptimizer] = None,
join_key: Optional[str] = None,
) -> None:
self._filter = filter
self._team_id = team_id
self._column_optimizer = column_optimizer or ColumnOptimizer(self._filter, self._team_id)
self._join_key = join_key
def get_join_query(self) -> Tuple[str, Dict]:
join_queries, params = [], {}
for group_type_index in self._column_optimizer.group_types_to_query:
var = f"group_index_{group_type_index}"
group_join_key = self._join_key or f"$group_{group_type_index}"
join_queries.append(
f"""
INNER JOIN (
SELECT
group_key,
argMax(group_properties, _timestamp) AS group_properties_{group_type_index}
FROM groups
WHERE team_id = %(team_id)s AND group_type_index = %({var})s
GROUP BY group_key
) groups_{group_type_index}
ON {group_join_key} == groups_{group_type_index}.group_key
"""
)
params["team_id"] = self._team_id
params[var] = group_type_index
return "\n".join(join_queries), params