0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 09:16:49 +01:00

Related groups query refactor (#7978)

* Resolve weird SQL formatting issue

* Use a discriminated union for ActorType

* Use standard response types for related groups

* Update typing

* Always filter related actors by group type index

* Update snapshots & typing
This commit is contained in:
Karl-Aksel Puulmann 2022-01-12 13:15:43 +02:00 committed by GitHub
parent 1ccb45f678
commit 784c7d3b08
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 181 additions and 167 deletions

View File

@ -79,9 +79,13 @@ class ActorBaseQuery:
raise NotImplementedError()
@cached_property
def is_aggregating_by_groups(self) -> bool:
def aggregation_group_type_index(self) -> Optional[int]:
"""Override in child class with insight specific logic to determine group aggregation"""
return False
return None
@property
def is_aggregating_by_groups(self) -> bool:
return self.aggregation_group_type_index is not None
@cached_property
def should_include_recordings(self) -> bool:
@ -108,8 +112,7 @@ class ActorBaseQuery:
def query_for_session_ids_with_recordings(self, session_ids: Set[str]) -> Set[str]:
""" Filters a list of session_ids to those that actually have recordings """
query = """
SELECT
distinct session_id
SELECT DISTINCT session_id
FROM session_recording_events
WHERE
team_id = %(team_id)s
@ -166,47 +169,57 @@ class ActorBaseQuery:
actor_ids = [row[0] for row in raw_result]
if self.is_aggregating_by_groups:
actors, serialized_actors = self._get_groups(actor_ids)
actors, serialized_actors = get_groups(
self._team.pk, cast(int, self.aggregation_group_type_index), actor_ids
)
else:
actors, serialized_actors = self._get_people(actor_ids)
actors, serialized_actors = get_people(self._team.pk, actor_ids)
return actors, serialized_actors
def _get_groups(self, group_ids) -> Tuple[QuerySet[Group], List[SerializedGroup]]:
""" Get groups from raw SQL results in data model and dict formats """
groups: QuerySet[Group] = Group.objects.filter(team_id=self._team.pk, group_key__in=group_ids)
return groups, self._serialize_groups(groups)
def _get_people(self, people_ids) -> Tuple[QuerySet[Person], List[SerializedPerson]]:
""" Get people from raw SQL results in data model and dict formats """
persons: QuerySet[Person] = Person.objects.filter(team_id=self._team.pk, uuid__in=people_ids)
return persons, self._serialize_people(persons)
def get_groups(
team_id: int, group_type_index: int, group_ids: List[Any]
) -> Tuple[QuerySet[Group], List[SerializedGroup]]:
""" Get groups from raw SQL results in data model and dict formats """
groups: QuerySet[Group] = Group.objects.filter(
team_id=team_id, group_type_index=group_type_index, group_key__in=group_ids
)
return groups, serialize_groups(groups)
def _serialize_people(self, data: QuerySet[Person]) -> List[SerializedPerson]:
from posthog.api.person import get_person_name
return [
SerializedPerson(
type="person",
id=person.uuid,
created_at=person.created_at,
properties=person.properties,
is_identified=person.is_identified,
name=get_person_name(person),
distinct_ids=person.distinct_ids,
)
for person in data
]
def get_people(team_id: int, people_ids: List[Any]) -> Tuple[QuerySet[Person], List[SerializedPerson]]:
""" Get people from raw SQL results in data model and dict formats """
persons: QuerySet[Person] = Person.objects.filter(team_id=team_id, uuid__in=people_ids)
return persons, serialize_people(persons)
def _serialize_groups(self, data: QuerySet[Group]) -> List[SerializedGroup]:
return [
SerializedGroup(
id=group.group_key,
type="group",
group_type_index=group.group_type_index,
group_key=group.group_key,
created_at=group.created_at,
properties=group.group_properties,
)
for group in data
]
def serialize_people(data: QuerySet[Person]) -> List[SerializedPerson]:
from posthog.api.person import get_person_name
return [
SerializedPerson(
type="person",
id=person.uuid,
created_at=person.created_at,
properties=person.properties,
is_identified=person.is_identified,
name=get_person_name(person),
distinct_ids=person.distinct_ids,
)
for person in data
]
def serialize_groups(data: QuerySet[Group]) -> List[SerializedGroup]:
return [
SerializedGroup(
id=group.group_key,
type="group",
group_type_index=group.group_type_index,
group_key=group.group_key,
created_at=group.created_at,
properties=group.group_properties,
)
for group in data
]

View File

@ -30,8 +30,8 @@ class FunnelCorrelationActors(ActorBaseQuery):
self._filter = self._filter.with_data({FUNNEL_CORRELATION_PERSON_LIMIT: 100})
@cached_property
def is_aggregating_by_groups(self) -> bool:
return self._filter.aggregation_group_type_index is not None
def aggregation_group_type_index(self):
return self._filter.aggregation_group_type_index
def actor_query(self, limit_actors: Optional[bool] = True):
if self._filter.correlation_type == FunnelCorrelationType.PROPERTIES:
@ -60,8 +60,8 @@ class _FunnelEventsCorrelationActors(ActorBaseQuery):
super().__init__(team, filter)
@cached_property
def is_aggregating_by_groups(self) -> bool:
return self._filter.aggregation_group_type_index is not None
def aggregation_group_type_index(self):
return self._filter.aggregation_group_type_index
def actor_query(self, limit_actors: Optional[bool] = True):
@ -123,8 +123,8 @@ class _FunnelPropertyCorrelationActors(ActorBaseQuery):
super().__init__(team, filter)
@cached_property
def is_aggregating_by_groups(self) -> bool:
return self._filter.aggregation_group_type_index is not None
def aggregation_group_type_index(self):
return self._filter.aggregation_group_type_index
def actor_query(self, limit_actors: Optional[bool] = True, extra_fields: Optional[List[str]] = None):
if not self._filter.correlation_property_values:

View File

@ -12,8 +12,8 @@ class ClickhouseFunnelActors(ClickhouseFunnel, ActorBaseQuery):
_filter: Filter
@cached_property
def is_aggregating_by_groups(self) -> bool:
return self._filter.aggregation_group_type_index is not None
def aggregation_group_type_index(self):
return self._filter.aggregation_group_type_index
def actor_query(self, limit_actors: Optional[bool] = True, extra_fields: Optional[List[str]] = None):
extra_fields_string = ", ".join([self._get_timestamp_outer_select()] + (extra_fields or []))

View File

@ -11,8 +11,8 @@ class ClickhouseFunnelStrictActors(ClickhouseFunnelStrict, ActorBaseQuery):
_filter: Filter
@cached_property
def is_aggregating_by_groups(self) -> bool:
return self._filter.aggregation_group_type_index is not None
def aggregation_group_type_index(self):
return self._filter.aggregation_group_type_index
def actor_query(self, limit_actors: Optional[bool] = True, extra_fields: Optional[List[str]] = None):
extra_fields_string = ", ".join([self._get_timestamp_outer_select()] + (extra_fields or []))

View File

@ -14,8 +14,8 @@ class ClickhouseFunnelTrendsActors(ClickhouseFunnelTrends, ActorBaseQuery):
_filter: Filter
@cached_property
def is_aggregating_by_groups(self) -> bool:
return self._filter.aggregation_group_type_index is not None
def aggregation_group_type_index(self):
return self._filter.aggregation_group_type_index
def actor_query(self, limit_actors: Optional[bool] = True):
drop_off = self._filter.drop_off

View File

@ -11,8 +11,8 @@ class ClickhouseFunnelUnorderedActors(ClickhouseFunnelUnordered, ActorBaseQuery)
_filter: Filter
@cached_property
def is_aggregating_by_groups(self) -> bool:
return self._filter.aggregation_group_type_index is not None
def aggregation_group_type_index(self):
return self._filter.aggregation_group_type_index
def actor_query(self, limit_actors: Optional[bool] = True, extra_fields: Optional[List[str]] = None):
extra_fields_string = ", ".join([self._get_timestamp_outer_select()] + (extra_fields or []))

View File

@ -26,10 +26,6 @@ class ClickhousePathsActors(ClickhousePaths, ActorBaseQuery): # type: ignore
other path item between start and end key.
"""
@cached_property
def is_aggregating_by_groups(self) -> bool:
return False
def actor_query(self, limit_actors: Optional[bool] = True) -> Tuple[str, Dict]:
paths_per_person_query = self.get_paths_per_person_query()
person_path_filter = self.get_person_path_filter()

View File

@ -1,24 +1,23 @@
import json
from datetime import timedelta
from functools import cached_property
from typing import Dict, List, Literal, Optional, TypedDict, Union
from typing import List, Optional, Union
from django.utils.timezone import now
from ee.clickhouse.client import sync_execute
from ee.clickhouse.queries.actor_base_query import (
SerializedActor,
SerializedGroup,
SerializedPerson,
get_groups,
get_people,
)
from ee.clickhouse.queries.person_distinct_id_query import get_team_distinct_ids_query
from posthog.models.filters.utils import validate_group_type_index
from posthog.models.group_type_mapping import GroupTypeMapping
from posthog.models.property import GroupTypeIndex
class RelatedActorsResponse(TypedDict):
type: Literal["person", "group"]
group_type_index: Optional[GroupTypeIndex]
id: str
person: Optional[Dict]
class RelatedActorsQuery:
"""
This query calculates other groups and persons that are related to a person or a group.
@ -31,8 +30,9 @@ class RelatedActorsQuery:
self.group_type_index = validate_group_type_index("group_type_index", group_type_index)
self.id = id
def run(self) -> List[RelatedActorsResponse]:
results = self._query_related_people()
def run(self) -> List[SerializedActor]:
results: List[SerializedActor] = []
results.extend(self._query_related_people())
for group_type_mapping in GroupTypeMapping.objects.filter(team_id=self.team_id):
results.extend(self._query_related_groups(group_type_mapping.group_type_index))
return results
@ -41,48 +41,36 @@ class RelatedActorsQuery:
def is_aggregating_by_groups(self) -> bool:
return self.group_type_index is not None
def _query_related_people(self) -> List[RelatedActorsResponse]:
def _query_related_people(self) -> List[SerializedPerson]:
if not self.is_aggregating_by_groups:
return []
# :KLUDGE: We need to fetch distinct_id + person properties to be able to link to user properly.
rows = sync_execute(
f"""
SELECT person_id, any(e.distinct_id), any(person_props)
person_ids = self._take_first(
sync_execute(
f"""
SELECT DISTINCT person_id
FROM events e
{self._distinct_ids_join}
JOIN (
SELECT id, any(properties) as person_props
FROM person
WHERE team_id = %(team_id)s
GROUP BY id
HAVING max(is_deleted) = 0
) person ON pdi.person_id = person.id
WHERE team_id = %(team_id)s
AND timestamp > %(after)s
AND timestamp < %(before)s
AND {self._filter_clause}
GROUP BY person_id
""",
self._params,
self._params,
)
)
return [
RelatedActorsResponse(
type="person",
group_type_index=None,
id=person_id,
person={"distinct_ids": [distinct_id], "properties": json.loads(person_props)},
)
for (person_id, distinct_id, person_props) in rows
]
_, serialized_people = get_people(self.team_id, person_ids)
return serialized_people
def _query_related_groups(self, group_type_index: GroupTypeIndex) -> List[RelatedActorsResponse]:
def _query_related_groups(self, group_type_index: GroupTypeIndex) -> List[SerializedGroup]:
if group_type_index == self.group_type_index:
return []
rows = sync_execute(
f"""
group_ids = self._take_first(
sync_execute(
f"""
SELECT DISTINCT $group_{group_type_index} AS group_key
FROM events e
{'' if self.is_aggregating_by_groups else self._distinct_ids_join}
@ -99,12 +87,15 @@ class RelatedActorsQuery:
AND {self._filter_clause}
ORDER BY group_key
""",
{**self._params, "group_type_index": group_type_index},
{**self._params, "group_type_index": group_type_index},
)
)
return [
RelatedActorsResponse(type="group", group_type_index=group_type_index, id=group_key, person=None)
for (group_key,) in rows
]
_, serialize_groups = get_groups(self.team_id, group_type_index, group_ids)
return serialize_groups
def _take_first(self, rows: List) -> List:
return [row[0] for row in rows]
@property
def _filter_clause(self):

View File

@ -22,8 +22,8 @@ class ClickhouseRetentionActors(ActorBaseQuery):
super().__init__(team, filter)
@cached_property
def is_aggregating_by_groups(self) -> bool:
return self._filter.aggregation_group_type_index is not None
def aggregation_group_type_index(self):
return self._filter.aggregation_group_type_index
def actor_query(self, limit_actors: Optional[bool] = True) -> Tuple[str, Dict]:
actor_query = _build_actor_query(
@ -44,8 +44,8 @@ class ClickhouseRetentionActorsByPeriod(ActorBaseQuery):
super().__init__(team, filter)
@cached_property
def is_aggregating_by_groups(self) -> bool:
return self._filter.aggregation_group_type_index is not None
def aggregation_group_type_index(self):
return self._filter.aggregation_group_type_index
def actors(self):
"""
@ -152,7 +152,7 @@ def _build_actor_query(
-- NOTE: relies on ids being monotonic
ORDER BY actor_id
LIMIT 100
LIMIT 100
OFFSET %(offset)s
"""

View File

@ -16,8 +16,10 @@ class ClickhouseStickinessActors(ActorBaseQuery):
super().__init__(team, filter, entity, **kwargs)
@cached_property
def is_aggregating_by_groups(self) -> bool:
return self.entity.math == "unique_group"
def aggregation_group_type_index(self):
if self.entity.math == "unique_group":
return self.entity.math_group_type_index
return None
def actor_query(self, limit_actors: Optional[bool] = True) -> Tuple[str, Dict]:
events_query, event_params = StickinessEventsQuery(

View File

@ -47,8 +47,10 @@ class ClickhouseTrendsActors(ActorBaseQuery):
super().__init__(team, filter, entity, **kwargs)
@cached_property
def is_aggregating_by_groups(self) -> bool:
return self.entity.math == "unique_group"
def aggregation_group_type_index(self):
if self.entity.math == "unique_group":
return self.entity.math_group_type_index
return None
def actor_query(self, limit_actors: Optional[bool] = True) -> Tuple[str, Dict]:
if self._filter.breakdown_type == "cohort" and self._filter.breakdown_value != "all":

View File

@ -27,7 +27,7 @@
# name: TestPerson.test_group_query_includes_recording_events.1
'
SELECT distinct session_id
SELECT DISTINCT session_id
FROM session_recording_events
WHERE team_id = 2
and has_full_snapshot = 1

View File

@ -45,7 +45,7 @@
# name: TestActionPeople.test_trends_people_endpoint_includes_recordings.1
'
/* request:api_projects_(?P<parent_lookup_team_id>[^_.]+)_actions_people_?$ (ClickhouseActionsViewSet) */
SELECT distinct session_id
SELECT DISTINCT session_id
FROM session_recording_events
WHERE team_id = 2
and has_full_snapshot = 1

View File

@ -1,9 +1,7 @@
# name: ClickhouseTestGroupsApi.test_related_groups
'
/* request:api_projects_(?P<parent_lookup_team_id>[^_.]+)_groups_related_?$ (ClickhouseGroupsView) */
SELECT person_id,
any(e.distinct_id),
any(person_props)
SELECT DISTINCT person_id
FROM events e
JOIN
(SELECT distinct_id,
@ -12,18 +10,10 @@
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) pdi on e.distinct_id = pdi.distinct_id
JOIN
(SELECT id,
any(properties) as person_props
FROM person
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0) person ON pdi.person_id = person.id
WHERE team_id = 2
AND timestamp > '2021-02-09T00:00:00.000000'
AND timestamp < '2021-05-10T00:00:00.000000'
AND $group_0 = '0::0'
GROUP BY person_id
'
---
# name: ClickhouseTestGroupsApi.test_related_groups.1

View File

@ -105,13 +105,30 @@ class ClickhouseTestGroupsApi(ClickhouseTestMixin, APIBaseTest):
response,
[
{
"created_at": "2021-05-10T00:00:00Z",
"distinct_ids": ["1", "2"],
"id": "01795392-cc00-0003-7dc7-67a694604d72",
"is_identified": False,
"name": "1",
"properties": {},
"type": "person",
"id": str(uuid),
"group_type_index": None,
"person": {"distinct_ids": ["1"], "properties": {}},
},
{"type": "group", "id": "1::2", "group_type_index": 1, "person": None},
{"type": "group", "id": "1::3", "group_type_index": 1, "person": None},
{
"created_at": "2021-05-10T00:00:00Z",
"group_key": "1::2",
"group_type_index": 1,
"id": "1::2",
"properties": {},
"type": "group",
},
{
"created_at": "2021-05-10T00:00:00Z",
"group_key": "1::3",
"group_type_index": 1,
"id": "1::3",
"properties": {},
"type": "group",
},
],
)
@ -124,10 +141,38 @@ class ClickhouseTestGroupsApi(ClickhouseTestMixin, APIBaseTest):
self.assertEqual(
response,
[
{"type": "group", "id": "0::0", "group_type_index": 0, "person": None},
{"type": "group", "id": "0::1", "group_type_index": 0, "person": None},
{"type": "group", "id": "1::2", "group_type_index": 1, "person": None},
{"type": "group", "id": "1::3", "group_type_index": 1, "person": None},
{
"created_at": "2021-05-10T00:00:00Z",
"group_key": "0::0",
"group_type_index": 0,
"id": "0::0",
"properties": {},
"type": "group",
},
{
"created_at": "2021-05-10T00:00:00Z",
"group_key": "0::1",
"group_type_index": 0,
"id": "0::1",
"properties": {},
"type": "group",
},
{
"created_at": "2021-05-10T00:00:00Z",
"group_key": "1::2",
"group_type_index": 1,
"id": "1::2",
"properties": {},
"type": "group",
},
{
"created_at": "2021-05-10T00:00:00Z",
"group_key": "1::3",
"group_type_index": 1,
"id": "1::3",
"properties": {},
"type": "group",
},
],
)

View File

@ -1,15 +1,14 @@
import React from 'react'
import { useValues } from 'kea'
import { LemonTable, LemonTableColumns } from 'lib/components/LemonTable'
import { RelatedActor } from '~/types'
import { ActorType } from '~/types'
import { Skeleton } from 'antd'
import { groupsModel } from '~/models/groupsModel'
import UserOutlined from '@ant-design/icons/lib/icons/UserOutlined'
import { capitalizeFirstLetter } from 'lib/utils'
import { urls } from 'scenes/urls'
import { Link } from 'lib/components/Link'
import { asDisplay } from 'scenes/persons/PersonHeader'
import { PersonHeader } from 'scenes/persons/PersonHeader'
import { relatedGroupsLogic } from 'scenes/groups/relatedGroupsLogic'
import { GroupActorHeader } from 'scenes/persons/GroupActorHeader'
interface Props {
groupTypeIndex: number | null
@ -24,11 +23,11 @@ export function RelatedGroups({ groupTypeIndex, id }: Props): JSX.Element {
return <Skeleton paragraph={{ rows: 2 }} active />
}
const columns: LemonTableColumns<RelatedActor> = [
const columns: LemonTableColumns<ActorType> = [
{
title: 'Type',
key: 'type',
render: function RenderCount(_, actor: RelatedActor) {
render: function RenderActor(_, actor: ActorType) {
if (actor.type === 'group') {
return <>{capitalizeFirstLetter(aggregationLabel(actor.group_type_index).singular)}</>
} else {
@ -43,22 +42,11 @@ export function RelatedGroups({ groupTypeIndex, id }: Props): JSX.Element {
{
title: 'id',
key: 'id',
render: function RenderCount(_, actor: RelatedActor) {
let url: string
render: function RenderActor(_, actor: ActorType) {
if (actor.type == 'group') {
url = urls.group(actor.group_type_index, actor.id)
return (
<Link to={url} data-attr="related-group-link">
{actor.id}
</Link>
)
return <GroupActorHeader actor={actor} />
} else {
url = urls.person(actor.person.distinct_ids[0])
return (
<Link to={url} data-attr="related-person-link">
{asDisplay(actor.person)}
</Link>
)
return <PersonHeader person={actor} withIcon={false} />
}
},
},

View File

@ -2,7 +2,7 @@ import { kea } from 'kea'
import api from 'lib/api'
import { toParams } from 'lib/utils'
import { teamLogic } from 'scenes/teamLogic'
import { RelatedActor } from '~/types'
import { ActorType } from '~/types'
import { relatedGroupsLogicType } from './relatedGroupsLogicType'
export const relatedGroupsLogic = kea<relatedGroupsLogicType>({
@ -20,7 +20,7 @@ export const relatedGroupsLogic = kea<relatedGroupsLogicType>({
}),
loaders: ({ values, props }) => ({
relatedActors: [
[] as RelatedActor[],
[] as ActorType[],
{
loadRelatedActors: async () => {
const url = `api/projects/${values.currentTeamId}/groups/related?${toParams({

View File

@ -499,7 +499,6 @@ export interface MatchedRecording {
}
interface CommonActorType {
type: 'group' | 'person'
id?: string | number
properties: Record<string, any>
created_at?: string
@ -507,6 +506,7 @@ interface CommonActorType {
}
export interface PersonActorType extends CommonActorType {
type: 'person'
uuid?: string
name?: string
distinct_ids: string[]
@ -514,6 +514,7 @@ export interface PersonActorType extends CommonActorType {
}
export interface GroupActorType extends CommonActorType {
type: 'group'
group_key: string
group_type_index: number
}
@ -1401,20 +1402,6 @@ export interface ExperimentResults {
noData?: boolean
}
interface RelatedPerson {
type: 'person'
id: string
person: Pick<PersonType, 'distinct_ids' | 'properties'>
}
interface RelatedGroup {
type: 'group'
group_type_index: number
id: string
}
export type RelatedActor = RelatedPerson | RelatedGroup
export interface SelectOption {
value: string
label?: string