0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 18:26:15 +01:00
posthog/ee/clickhouse/queries/breakdown_props.py
Tim Glaser f47b1308b5
[Proposal] Add automatic swagger doc generation (#8148)
* Add automatic swagger doc generation

* Fix test and mypy

* formatting

* fix formatting

* formatting

* update

* fix mypy
2022-01-24 17:21:56 +00:00

183 lines
6.7 KiB
Python

from typing import (
Any,
Dict,
List,
Literal,
Optional,
Tuple,
Union,
cast,
)
from ee.clickhouse.client import sync_execute
from ee.clickhouse.models.cohort import format_filter_query
from ee.clickhouse.models.entity import get_entity_filtering_params
from ee.clickhouse.models.property import (
get_property_string_expr,
get_single_or_multi_property_string_expr,
parse_prop_clauses,
)
from ee.clickhouse.models.util import PersonPropertiesMode
from ee.clickhouse.queries.column_optimizer import ColumnOptimizer
from ee.clickhouse.queries.groups_join_query import GroupsJoinQuery
from ee.clickhouse.queries.person_distinct_id_query import get_team_distinct_ids_query
from ee.clickhouse.queries.person_query import ClickhousePersonQuery
from ee.clickhouse.queries.util import parse_timestamps
from ee.clickhouse.sql.trends.top_elements import TOP_ELEMENTS_ARRAY_OF_KEY_SQL
from posthog.constants import BREAKDOWN_TYPES
from posthog.models.cohort import Cohort
from posthog.models.entity import Entity
from posthog.models.filters.filter import Filter
from posthog.models.filters.utils import GroupTypeIndex
ALL_USERS_COHORT_ID = 0
def get_breakdown_prop_values(
filter: Filter,
entity: Entity,
aggregate_operation: str,
team_id: int,
limit: int = 25,
extra_params={},
column_optimizer: Optional[ColumnOptimizer] = None,
):
"""
Returns the top N breakdown prop values for event/person breakdown
e.g. for Browser with limit 3 might return ['Chrome', 'Safari', 'Firefox', 'Other']
"""
parsed_date_from, parsed_date_to, date_params = parse_timestamps(filter=filter, team_id=team_id)
prop_filters, prop_filter_params = parse_prop_clauses(
filter.properties + entity.properties,
table_name="e",
prepend="e_brkdwn",
person_properties_mode=PersonPropertiesMode.EXCLUDE,
allow_denormalized_props=True,
)
entity_params, entity_format_params = get_entity_filtering_params(entity, team_id, table_name="e")
value_expression = _to_value_expression(filter.breakdown_type, filter.breakdown, filter.breakdown_group_type_index)
person_join_clauses = ""
person_join_params: Dict = {}
person_query = ClickhousePersonQuery(filter, team_id, column_optimizer=column_optimizer, entity=entity)
if person_query.is_used:
person_subquery, person_join_params = person_query.get_query()
person_join_clauses = f"""
INNER JOIN ({get_team_distinct_ids_query(team_id)}) AS pdi ON e.distinct_id = pdi.distinct_id
INNER JOIN ({person_subquery}) person ON pdi.person_id = person.id
"""
groups_join_condition, groups_join_params = GroupsJoinQuery(filter, team_id, column_optimizer).get_join_query()
elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format(
value_expression=value_expression,
parsed_date_from=parsed_date_from,
parsed_date_to=parsed_date_to,
prop_filters=prop_filters,
aggregate_operation=aggregate_operation,
person_join_clauses=person_join_clauses,
groups_join_clauses=groups_join_condition,
**entity_format_params,
)
return sync_execute(
elements_query,
{
"key": filter.breakdown,
"limit": limit,
"team_id": team_id,
"offset": filter.offset,
**prop_filter_params,
**entity_params,
**person_join_params,
**groups_join_params,
**extra_params,
**date_params,
},
)[0][0]
def _to_value_expression(
breakdown_type: Optional[BREAKDOWN_TYPES],
breakdown: Union[str, List[Union[str, int]], None],
breakdown_group_type_index: Optional[GroupTypeIndex],
) -> str:
if breakdown_type == "person":
return get_single_or_multi_property_string_expr(breakdown, table="person", query_alias="value")
elif breakdown_type == "group":
value_expression, _ = get_property_string_expr(
table="groups",
property_name=cast(str, breakdown),
var="%(key)s",
column=f"group_properties_{breakdown_group_type_index}",
)
return f"{value_expression} AS value"
else:
return get_single_or_multi_property_string_expr(breakdown, table="events", query_alias="value")
def _format_all_query(team_id: int, filter: Filter, **kwargs) -> Tuple[str, Dict]:
entity = kwargs.pop("entity", None)
parsed_date_from, parsed_date_to, date_params = parse_timestamps(
filter=filter, team_id=team_id, table="all_events."
)
props_to_filter = [*filter.properties]
if entity and isinstance(entity, Entity):
props_to_filter = [*props_to_filter, *entity.properties]
prop_filters, prop_filter_params = parse_prop_clauses(
props_to_filter, prepend="all_cohort_", table_name="all_events"
)
query = f"""
SELECT DISTINCT distinct_id, {ALL_USERS_COHORT_ID} as value
FROM events all_events
WHERE team_id = {team_id}
{parsed_date_from}
{parsed_date_to}
{prop_filters}
"""
return query, {**date_params, **prop_filter_params}
def format_breakdown_cohort_join_query(team_id: int, filter: Filter, **kwargs) -> Tuple[str, List, Dict]:
entity = kwargs.pop("entity", None)
cohorts = (
Cohort.objects.filter(team_id=team_id, pk__in=[b for b in filter.breakdown if b != "all"])
if isinstance(filter.breakdown, list)
else Cohort.objects.filter(team_id=team_id, pk=filter.breakdown)
)
cohort_queries, params = _parse_breakdown_cohorts(list(cohorts))
ids = [cohort.pk for cohort in cohorts]
if isinstance(filter.breakdown, list) and "all" in filter.breakdown:
all_query, all_params = _format_all_query(team_id, filter, entity=entity)
cohort_queries.append(all_query)
params = {**params, **all_params}
ids.append(ALL_USERS_COHORT_ID)
return " UNION ALL ".join(cohort_queries), ids, params
def _parse_breakdown_cohorts(cohorts: List[Cohort]) -> Tuple[List[str], Dict]:
queries = []
params: Dict[str, Any] = {}
for idx, cohort in enumerate(cohorts):
person_id_query, cohort_filter_params = format_filter_query(cohort, idx)
params = {**params, **cohort_filter_params}
cohort_query = person_id_query.replace(
"SELECT distinct_id", f"SELECT distinct_id, {cohort.pk} as value", 1
) # only replace the first top level occurrence
queries.append(cohort_query)
return queries, params
def get_breakdown_cohort_name(cohort_id: int) -> str:
if cohort_id == ALL_USERS_COHORT_ID:
return "all users"
else:
return Cohort.objects.get(pk=cohort_id).name