0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 18:26:15 +01:00
posthog/ee/clickhouse/queries/column_optimizer.py

86 lines
4.2 KiB
Python
Raw Normal View History

from typing import Counter, List, Set, cast
from posthog.constants import TREND_FILTER_TYPE_ACTIONS, FunnelCorrelationType
from posthog.models.action.util import get_action_tables_and_properties
from posthog.models.entity import Entity
from posthog.models.filters.mixins.utils import cached_property
from posthog.models.filters.stickiness_filter import StickinessFilter
from posthog.models.filters.utils import GroupTypeIndex
from posthog.models.property import PropertyIdentifier
from posthog.models.property.util import box_value, extract_tables_and_properties
from posthog.queries.column_optimizer.foss_column_optimizer import FOSSColumnOptimizer
class EnterpriseColumnOptimizer(FOSSColumnOptimizer):
@cached_property
def group_types_to_query(self) -> Set[GroupTypeIndex]:
used_properties = self._used_properties_with_type("group")
return set(cast(GroupTypeIndex, group_type_index) for _, _, group_type_index in used_properties)
@cached_property
def properties_used_in_filter(self) -> Counter[PropertyIdentifier]:
"Returns collection of properties + types that this query would use"
counter: Counter[PropertyIdentifier] = extract_tables_and_properties(self.filter.property_groups.flat)
if not isinstance(self.filter, StickinessFilter):
# Some breakdown types read properties
#
# See ee/clickhouse/queries/trends/breakdown.py#get_query or
# ee/clickhouse/queries/breakdown_props.py#get_breakdown_prop_values
if self.filter.breakdown_type in ["event", "person"]:
Multi property breakdown for person and event queries on Funnels (#7074) * implement multi property breakdown as an array from the spike * correct type hint on method * really resolve the conflict * don't break groups * refactor test assertions for breakdown cases * adds a test to prove that funnels can receive a string and not an array * protect saved dashboards from multi property changeover * WIP * multi breakdown working with funnel step breakdown * prove funnel step person breakdown works with multi property breakdown * don't need to protect cached dashboards from multi property breakdowns when they can't be set from the UI * capitalise keywords in SQL * convert a single test to journey helper * wip * account for funnel step breakdown sometimes being an array sent as a string * safer handling of funnel step breakdown * convert a test * revert commits that made things worse * simpler handling of funnel step breakdown * no need to change funnel step breakdown type hint * update imports * guard against integer properties * compare funnel step breakdown differently now there are arrays involved * look for strict intersection for funnel step breakdown * update test snapshots * need to set breakdown_values earlier in processing * remove tests that cover speculative functionality * update snapshot * move setup of breakdown values back out of update_filters * update snapshots * remove a sql parameter that was never assigned to * Update ee/clickhouse/models/test/test_property.py Co-authored-by: Harry Waye <harry@posthog.com> * Update ee/clickhouse/queries/funnels/base.py Co-authored-by: Harry Waye <harry@posthog.com> * address review comment to simplify reading json expressions for breakdown * clarify why some uses of get_property_string_expr escape params before passing * add keyword arguments for calls to getting property string expressions in funnels * switch to keyword arguments in test helper method * fix parameterised test * add multi property materialized column tests * introduce the shim to allow new API for breakdown properties * can't remove the naive funnel step breakdown list detection * move funnel step breakdown list handling * better handling of numeric funnel step breakdown values * update snapshots Co-authored-by: Harry Waye <harry@posthog.com>
2021-11-22 12:20:01 +01:00
boxed_breakdown = box_value(self.filter.breakdown)
for b in boxed_breakdown:
if isinstance(b, str):
counter[(b, self.filter.breakdown_type, self.filter.breakdown_group_type_index)] += 1
elif self.filter.breakdown_type == "group":
# :TRICKY: We only support string breakdown for group properties
assert isinstance(self.filter.breakdown, str)
counter[
(self.filter.breakdown, self.filter.breakdown_type, self.filter.breakdown_group_type_index)
] += 1
# If we have a breakdowns attribute then make sure we pull in everything we
# need to calculate it
for breakdown in self.filter.breakdowns or []:
counter[(breakdown["property"], breakdown["type"], self.filter.breakdown_group_type_index)] += 1
# Both entities and funnel exclusions can contain nested property filters
for entity in self.filter.entities + cast(List[Entity], self.filter.exclusions):
counter += extract_tables_and_properties(entity.property_groups.flat)
# Math properties are also implicitly used.
#
# See ee/clickhouse/queries/trends/util.py#process_math
if entity.math_property:
counter[(entity.math_property, "event", None)] += 1
# If groups are involved, they're also used
#
# See ee/clickhouse/queries/trends/util.py#process_math
if entity.math == "unique_group":
counter[(f"$group_{entity.math_group_type_index}", "event", None)] += 1
if entity.math == "unique_session":
counter[(f"$session_id", "event", None)] += 1
# :TRICKY: If action contains property filters, these need to be included
#
# See ee/clickhouse/models/action.py#format_action_filter for an example
if entity.type == TREND_FILTER_TYPE_ACTIONS:
counter += get_action_tables_and_properties(entity.get_action())
if (
not isinstance(self.filter, StickinessFilter)
and self.filter.correlation_type == FunnelCorrelationType.PROPERTIES
and self.filter.correlation_property_names
):
if self.filter.aggregation_group_type_index is not None:
for prop_value in self.filter.correlation_property_names:
counter[(prop_value, "group", self.filter.aggregation_group_type_index)] += 1
else:
for prop_value in self.filter.correlation_property_names:
counter[(prop_value, "person", None)] += 1
return counter