[groups persons] API for returning groups based on trend results (#7144)
* working for unique_groups math
* fix types
* add null check
* update snapshots
* update payload
* update snapshots
* use constructor
* adjust queries
* introduce base class
* consolidate querying
* shared serializer and typed
* sort imports
* snapshots
* typing
* change name
* Add group model
```sql
BEGIN;
--
-- Create model Group
--
CREATE TABLE "posthog_group" ("id" serial NOT NULL PRIMARY KEY, "group_key" varchar(400) NOT NULL, "group_type_index" integer NOT NULL, "group_properties" jsonb NOT NULL, "created_at" timestamp with time zone NOT NULL, "properties_last_updated_at" jsonb NOT NULL, "properties_last_operation" jsonb NOT NULL, "version" bigint NOT NULL, "team_id" integer NOT NULL);
--
-- Create constraint unique team_id/group_key/group_type_index combo on model group
--
ALTER TABLE "posthog_group" ADD CONSTRAINT "unique team_id/group_key/group_type_index combo" UNIQUE ("team_id", "group_key", "group_type_index");
ALTER TABLE "posthog_group" ADD CONSTRAINT "posthog_group_team_id_b3aed896_fk_posthog_team_id" FOREIGN KEY ("team_id") REFERENCES "posthog_team" ("id") DEFERRABLE INITIALLY DEFERRED;
CREATE INDEX "posthog_group_team_id_b3aed896" ON "posthog_group" ("team_id");
COMMIT;
```
* Remove a dead import
* Improve typing for groups
* Make groups updating more generic, avoid mutation
This simplifies using the same logic for groups
Note there's a behavioral change: We don't produce a new kafka message
if nothing has been updated anymore.
* Rename a function
* WIP: Handle group property updates
... by storing them in postgres
Uses identical pattern to person property updates, except we handle
first-seen case within updates as well.
* Get rid of boolean option
* WIP continued
* fetchGroup() and upsertGroup()
* Test more edge cases
* Add tests for upsertGroup() in properties-updater
* Rename to PropertyUpdateOperation
* Followup
* Solve typing issues
* changed implementation to use pg
* unusd
* update type
* update snapshots
* rename and remove inlining
* restore bad merge code
* adjust types
* add flag
* remove var
* misnamed
* change to uuid
* make sure to use string when passing result
* remove from columnoptimizer logic and have group join logic implemented by event query classes per insight
* remove unnecessary logic
* typing
* remove dead imports
* remove verbosity
* update snapshots
* typos
* remove signals
* remove plugin excess
Co-authored-by: Karl-Aksel Puulmann <oxymaccy@gmail.com>
2021-11-18 17:58:48 +01:00
|
|
|
from typing import Dict, Optional, Tuple, Union
|
2021-11-05 12:47:41 +01:00
|
|
|
|
|
|
|
from ee.clickhouse.queries.column_optimizer import ColumnOptimizer
|
|
|
|
from posthog.models import Filter
|
|
|
|
from posthog.models.filters.path_filter import PathFilter
|
|
|
|
from posthog.models.filters.retention_filter import RetentionFilter
|
|
|
|
|
|
|
|
|
|
|
|
class GroupsJoinQuery:
|
|
|
|
"""
|
|
|
|
Query class responsible for joining with `groups` clickhouse table based on filters
|
|
|
|
"""
|
|
|
|
|
|
|
|
_filter: Union[Filter, PathFilter, RetentionFilter]
|
|
|
|
_team_id: int
|
|
|
|
_column_optimizer: ColumnOptimizer
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
filter: Union[Filter, PathFilter, RetentionFilter],
|
|
|
|
team_id: int,
|
|
|
|
column_optimizer: Optional[ColumnOptimizer] = None,
|
2021-11-12 12:32:55 +01:00
|
|
|
join_key: Optional[str] = None,
|
2021-11-05 12:47:41 +01:00
|
|
|
) -> None:
|
|
|
|
self._filter = filter
|
|
|
|
self._team_id = team_id
|
|
|
|
self._column_optimizer = column_optimizer or ColumnOptimizer(self._filter, self._team_id)
|
2021-11-12 12:32:55 +01:00
|
|
|
self._join_key = join_key
|
2021-11-05 12:47:41 +01:00
|
|
|
|
|
|
|
def get_join_query(self) -> Tuple[str, Dict]:
|
|
|
|
join_queries, params = [], {}
|
|
|
|
|
|
|
|
for group_type_index in self._column_optimizer.group_types_to_query:
|
|
|
|
var = f"group_index_{group_type_index}"
|
2021-11-12 12:32:55 +01:00
|
|
|
group_join_key = self._join_key or f"$group_{group_type_index}"
|
2021-11-05 12:47:41 +01:00
|
|
|
join_queries.append(
|
|
|
|
f"""
|
|
|
|
INNER JOIN (
|
|
|
|
SELECT
|
|
|
|
group_key,
|
|
|
|
argMax(group_properties, _timestamp) AS group_properties_{group_type_index}
|
|
|
|
FROM groups
|
|
|
|
WHERE team_id = %(team_id)s AND group_type_index = %({var})s
|
|
|
|
GROUP BY group_key
|
|
|
|
) groups_{group_type_index}
|
2021-11-12 12:32:55 +01:00
|
|
|
ON {group_join_key} == groups_{group_type_index}.group_key
|
2021-11-05 12:47:41 +01:00
|
|
|
"""
|
|
|
|
)
|
|
|
|
|
|
|
|
params["team_id"] = self._team_id
|
|
|
|
params[var] = group_type_index
|
|
|
|
|
|
|
|
return "\n".join(join_queries), params
|