mirror of
https://github.com/PostHog/posthog.git
synced 2024-12-01 12:21:02 +01:00
b94d02bc10
* test(retention): add test for retention breakdown with materialized property * remove unused imports * Join tables based on breakdowns properties
149 lines
7.0 KiB
Python
149 lines
7.0 KiB
Python
from typing import Counter, List, Set, Union, cast
|
|
|
|
from ee.clickhouse.materialized_columns.columns import ColumnName, get_materialized_columns
|
|
from ee.clickhouse.models.action import get_action_tables_and_properties, uses_elements_chain
|
|
from ee.clickhouse.models.property import box_value, extract_tables_and_properties
|
|
from posthog.constants import TREND_FILTER_TYPE_ACTIONS, FunnelCorrelationType
|
|
from posthog.models.entity import Entity
|
|
from posthog.models.filters import Filter
|
|
from posthog.models.filters.mixins.utils import cached_property
|
|
from posthog.models.filters.path_filter import PathFilter
|
|
from posthog.models.filters.retention_filter import RetentionFilter
|
|
from posthog.models.filters.stickiness_filter import StickinessFilter
|
|
from posthog.models.filters.utils import GroupTypeIndex
|
|
from posthog.models.property import PropertyIdentifier, PropertyType, TableWithProperties
|
|
|
|
|
|
class ColumnOptimizer:
|
|
"""
|
|
This class is responsible for figuring out what columns can and should be materialized based on the query filter.
|
|
|
|
This speeds up queries since clickhouse ends up selecting less data.
|
|
"""
|
|
|
|
def __init__(self, filter: Union[Filter, PathFilter, RetentionFilter, StickinessFilter], team_id: int):
|
|
self.filter = filter
|
|
self.team_id = team_id
|
|
|
|
@cached_property
|
|
def event_columns_to_query(self) -> Set[ColumnName]:
|
|
"Returns a list of event table columns containing materialized properties that this query needs"
|
|
|
|
return self.columns_to_query("events", set(self._used_properties_with_type("event")))
|
|
|
|
@cached_property
|
|
def person_columns_to_query(self) -> Set[ColumnName]:
|
|
"Returns a list of person table columns containing materialized properties that this query needs"
|
|
|
|
return self.columns_to_query("person", set(self._used_properties_with_type("person")))
|
|
|
|
def columns_to_query(self, table: TableWithProperties, used_properties: Set[PropertyIdentifier]) -> Set[ColumnName]:
|
|
"Transforms a list of property names to what columns are needed for that query"
|
|
|
|
materialized_columns = get_materialized_columns(table)
|
|
return set(materialized_columns.get(property_name, "properties") for property_name, _, _ in used_properties)
|
|
|
|
@cached_property
|
|
def is_using_person_properties(self) -> bool:
|
|
return len(self._used_properties_with_type("person")) > 0
|
|
|
|
@cached_property
|
|
def group_types_to_query(self) -> Set[GroupTypeIndex]:
|
|
used_properties = self._used_properties_with_type("group")
|
|
return set(cast(GroupTypeIndex, group_type_index) for _, _, group_type_index in used_properties)
|
|
|
|
@cached_property
|
|
def should_query_elements_chain_column(self) -> bool:
|
|
"Returns whether this query uses elements_chain"
|
|
has_element_type_property = lambda properties: any(prop.type == "element" for prop in properties)
|
|
|
|
if has_element_type_property(self.filter.properties):
|
|
return True
|
|
|
|
# Both entities and funnel exclusions can contain nested elements_chain inclusions
|
|
for entity in self.filter.entities + cast(List[Entity], self.filter.exclusions):
|
|
if has_element_type_property(entity.properties):
|
|
return True
|
|
|
|
# :TRICKY: Action definition may contain elements_chain usage
|
|
#
|
|
# See ee/clickhouse/models/action.py#format_action_filter for an example
|
|
if entity.type == TREND_FILTER_TYPE_ACTIONS:
|
|
if uses_elements_chain(entity.get_action()):
|
|
return True
|
|
|
|
return False
|
|
|
|
@cached_property
|
|
def properties_used_in_filter(self) -> Counter[PropertyIdentifier]:
|
|
"Returns collection of properties + types that this query would use"
|
|
counter: Counter[PropertyIdentifier] = extract_tables_and_properties(self.filter.properties)
|
|
|
|
if not isinstance(self.filter, StickinessFilter):
|
|
# Some breakdown types read properties
|
|
#
|
|
# See ee/clickhouse/queries/trends/breakdown.py#get_query or
|
|
# ee/clickhouse/queries/breakdown_props.py#get_breakdown_prop_values
|
|
if self.filter.breakdown_type in ["event", "person"]:
|
|
boxed_breakdown = box_value(self.filter.breakdown)
|
|
for b in boxed_breakdown:
|
|
if isinstance(b, str):
|
|
counter[(b, self.filter.breakdown_type, self.filter.breakdown_group_type_index)] += 1
|
|
elif self.filter.breakdown_type == "group":
|
|
# :TRICKY: We only support string breakdown for group properties
|
|
assert isinstance(self.filter.breakdown, str)
|
|
counter[
|
|
(self.filter.breakdown, self.filter.breakdown_type, self.filter.breakdown_group_type_index)
|
|
] += 1
|
|
|
|
# If we have a breakdowns attribute then make sure we pull in everything we
|
|
# need to calculate it
|
|
for breakdown in self.filter.breakdowns or []:
|
|
counter[(breakdown["property"], breakdown["type"], self.filter.breakdown_group_type_index)] += 1
|
|
|
|
# Both entities and funnel exclusions can contain nested property filters
|
|
for entity in self.filter.entities + cast(List[Entity], self.filter.exclusions):
|
|
counter += extract_tables_and_properties(entity.properties)
|
|
|
|
# Math properties are also implicitly used.
|
|
#
|
|
# See ee/clickhouse/queries/trends/util.py#process_math
|
|
if entity.math_property:
|
|
counter[(entity.math_property, "event", None)] += 1
|
|
|
|
# If groups are involved, they're also used
|
|
#
|
|
# See ee/clickhouse/queries/trends/util.py#process_math
|
|
if entity.math == "unique_group":
|
|
counter[(f"$group_{entity.math_group_type_index}", "event", None)] += 1
|
|
|
|
# :TRICKY: If action contains property filters, these need to be included
|
|
#
|
|
# See ee/clickhouse/models/action.py#format_action_filter for an example
|
|
if entity.type == TREND_FILTER_TYPE_ACTIONS:
|
|
counter += get_action_tables_and_properties(entity.get_action())
|
|
|
|
if (
|
|
not isinstance(self.filter, StickinessFilter)
|
|
and self.filter.correlation_type == FunnelCorrelationType.PROPERTIES
|
|
and self.filter.correlation_property_names
|
|
):
|
|
|
|
if self.filter.aggregation_group_type_index is not None:
|
|
for prop_value in self.filter.correlation_property_names:
|
|
counter[(prop_value, "group", self.filter.aggregation_group_type_index)] += 1
|
|
else:
|
|
for prop_value in self.filter.correlation_property_names:
|
|
counter[(prop_value, "person", None)] += 1
|
|
|
|
return counter
|
|
|
|
def _used_properties_with_type(self, property_type: PropertyType) -> Counter[PropertyIdentifier]:
|
|
return Counter(
|
|
{
|
|
(name, type, group_type_index): count
|
|
for (name, type, group_type_index), count in self.properties_used_in_filter.items()
|
|
if type == property_type
|
|
}
|
|
)
|