0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 00:46:45 +01:00

fix(insights): breakdown with special properties (#19597)

This commit is contained in:
Marius Andra 2024-01-04 16:35:33 +01:00 committed by GitHub
parent 93fe15b0f8
commit 96a596274a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 63 additions and 35 deletions

View File

@ -1198,21 +1198,30 @@ TEST_BREAKDOWN_PROCESSING = [
"events",
"prop",
"properties",
"replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^\"|\"$', '') AS prop",
(
"replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', '') AS prop",
{"breakdown_param_1": "$browser"},
),
),
(
["$browser"],
"events",
"value",
"properties",
"array(replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^\"|\"$', '')) AS value",
(
"array(replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', '')) AS value",
{"breakdown_param_1": "$browser"},
),
),
(
["$browser", "$browser_version"],
"events",
"prop",
"properties",
"array(replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^\"|\"$', ''),replaceRegexpAll(JSONExtractRaw(properties, '$browser_version'), '^\"|\"$', '')) AS prop",
(
"array(replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', ''),replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_2)s), '^\"|\"$', '')) AS prop",
{"breakdown_param_1": "$browser", "breakdown_param_2": "$browser_version"},
),
),
]
@ -1239,8 +1248,11 @@ TEST_BREAKDOWN_PROCESSING_MATERIALIZED = [
"value",
"properties",
"person_properties",
"array(replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^\"|\"$', '')) AS value",
'array("mat_pp_$browser") AS value',
(
"array(replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', '')) AS value",
{"breakdown_param_1": "$browser"},
),
('array("mat_pp_$browser") AS value', {"breakdown_param_1": "$browser"}),
),
(
["$browser", "$browser_version"],
@ -1248,8 +1260,14 @@ TEST_BREAKDOWN_PROCESSING_MATERIALIZED = [
"prop",
"properties",
"group2_properties",
"array(replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^\"|\"$', ''),replaceRegexpAll(JSONExtractRaw(properties, '$browser_version'), '^\"|\"$', '')) AS prop",
"""array("mat_gp2_$browser",replaceRegexpAll(JSONExtractRaw(properties, '$browser_version'), '^\"|\"$', '')) AS prop""",
(
"array(replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', ''),replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_2)s), '^\"|\"$', '')) AS prop",
{"breakdown_param_1": "$browser", "breakdown_param_2": "$browser_version"},
),
(
"""array("mat_gp2_$browser",replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_2)s), '^\"|\"$', '')) AS prop""",
{"breakdown_param_1": "$browser", "breakdown_param_2": "$browser_version"},
),
),
]
@ -1282,7 +1300,6 @@ def test_breakdown_query_expression_materialised(
column,
materialised_table_column=materialise_column,
)
assert actual == expected_with
materialize(table, breakdown[0], table_column=materialise_column) # type: ignore

View File

@ -15,7 +15,6 @@ from typing import (
from rest_framework import exceptions
from posthog.clickhouse.client.escape import escape_param_for_clickhouse
from posthog.clickhouse.kafka_engine import trim_quotes_expr
from posthog.clickhouse.materialized_columns import (
TableWithProperties,
@ -652,7 +651,7 @@ def get_single_or_multi_property_string_expr(
allow_denormalized_props=True,
materialised_table_column: str = "properties",
normalize_url: bool = False,
):
) -> Tuple[str, Dict[str, Any]]:
"""
When querying for breakdown properties:
* If the breakdown provided is a string, we extract the JSON from the properties object stored in the DB
@ -666,12 +665,16 @@ def get_single_or_multi_property_string_expr(
no alias will be appended.
"""
breakdown_params: Dict[str, Any] = {}
if isinstance(breakdown, str) or isinstance(breakdown, int):
breakdown_key = f"breakdown_param_{len(breakdown_params) + 1}"
breakdown_key = f"breakdown_param_{len(breakdown_params) + 1}"
breakdown_params[breakdown_key] = breakdown
expression, _ = get_property_string_expr(
table,
str(breakdown),
escape_param_for_clickhouse(breakdown),
f"%({breakdown_key})s",
column,
allow_denormalized_props,
materialised_table_column=materialised_table_column,
@ -681,10 +684,12 @@ def get_single_or_multi_property_string_expr(
else:
expressions = []
for b in breakdown:
breakdown_key = f"breakdown_param_{len(breakdown_params) + 1}"
breakdown_params[breakdown_key] = b
expr, _ = get_property_string_expr(
table,
b,
escape_param_for_clickhouse(b),
f"%({breakdown_key})s",
column,
allow_denormalized_props,
materialised_table_column=materialised_table_column,
@ -694,9 +699,9 @@ def get_single_or_multi_property_string_expr(
expression = f"array({','.join(expressions)})"
if query_alias is None:
return expression
return expression, breakdown_params
return f"{expression} AS {query_alias}"
return f"{expression} AS {query_alias}", breakdown_params
def normalize_url_breakdown(breakdown_value, breakdown_normalize_url: bool = True):

View File

@ -164,7 +164,7 @@ def get_breakdown_prop_values(
hogql_context=filter.hogql_context,
)
value_expression = _to_value_expression(
breakdown_expression, breakdown_params = _to_value_expression(
filter.breakdown_type,
filter.breakdown,
filter.breakdown_group_type_index,
@ -185,7 +185,7 @@ def get_breakdown_prop_values(
bucketing_expression = _to_bucketing_expression(cast(int, filter.breakdown_histogram_bin_count))
elements_query = HISTOGRAM_ELEMENTS_ARRAY_OF_KEY_SQL.format(
bucketing_expression=bucketing_expression,
value_expression=value_expression,
breakdown_expression=breakdown_expression,
parsed_date_from=parsed_date_from,
parsed_date_to=parsed_date_to,
prop_filters=prop_filters,
@ -199,7 +199,7 @@ def get_breakdown_prop_values(
)
else:
elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format(
value_expression=value_expression,
breakdown_expression=breakdown_expression,
parsed_date_from=parsed_date_from,
parsed_date_to=parsed_date_to,
prop_filters=prop_filters,
@ -222,6 +222,7 @@ def get_breakdown_prop_values(
"timezone": team.timezone,
**prop_filter_params,
**entity_params,
**breakdown_params,
**person_join_params,
**groups_join_params,
**sessions_join_params,
@ -251,7 +252,8 @@ def _to_value_expression(
breakdown_normalize_url: bool = False,
direct_on_events: bool = False,
cast_as_float: bool = False,
) -> str:
) -> Tuple[str, Dict]:
params: Dict[str, Any] = {}
if breakdown_type == "session":
if breakdown == "$session_duration":
# Return the session duration expression right away because it's already an number,
@ -260,7 +262,7 @@ def _to_value_expression(
else:
raise ValidationError(f'Invalid breakdown "{breakdown}" for breakdown type "session"')
elif breakdown_type == "person":
value_expression = get_single_or_multi_property_string_expr(
value_expression, params = get_single_or_multi_property_string_expr(
breakdown,
query_alias=None,
table="events" if direct_on_events else "person",
@ -289,7 +291,7 @@ def _to_value_expression(
else:
value_expression = translate_hogql(cast(str, breakdown), hogql_context)
else:
value_expression = get_single_or_multi_property_string_expr(
value_expression, params = get_single_or_multi_property_string_expr(
breakdown,
table="events",
query_alias=None,
@ -300,7 +302,7 @@ def _to_value_expression(
if cast_as_float:
value_expression = f"toFloat64OrNull(toString({value_expression}))"
return f"{value_expression} AS value"
return f"{value_expression} AS value", params
def _to_bucketing_expression(bin_count: int) -> str:

View File

@ -459,7 +459,8 @@ class ClickhouseFunnelBase(ABC):
# where i is the starting step for exclusion on that entity
all_step_cols.extend(step_cols)
breakdown_select_prop = self._get_breakdown_select_prop()
breakdown_select_prop, breakdown_select_prop_params = self._get_breakdown_select_prop()
self.params.update(breakdown_select_prop_params)
if breakdown_select_prop:
all_step_cols.append(breakdown_select_prop)
@ -718,15 +719,16 @@ class ClickhouseFunnelBase(ABC):
def get_step_counts_without_aggregation_query(self) -> str:
raise NotImplementedError()
def _get_breakdown_select_prop(self) -> str:
def _get_breakdown_select_prop(self) -> Tuple[str, Dict[str, Any]]:
basic_prop_selector = ""
basic_prop_params: Dict[str, Any] = {}
if not self._filter.breakdown:
return basic_prop_selector
return basic_prop_selector, basic_prop_params
self.params.update({"breakdown": self._filter.breakdown})
if self._filter.breakdown_type == "person":
if self._team.person_on_events_mode != PersonOnEventsMode.DISABLED:
basic_prop_selector = get_single_or_multi_property_string_expr(
basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr(
self._filter.breakdown,
table="events",
query_alias="prop_basic",
@ -735,14 +737,14 @@ class ClickhouseFunnelBase(ABC):
materialised_table_column="person_properties",
)
else:
basic_prop_selector = get_single_or_multi_property_string_expr(
basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr(
self._filter.breakdown,
table="person",
query_alias="prop_basic",
column="person_props",
)
elif self._filter.breakdown_type == "event":
basic_prop_selector = get_single_or_multi_property_string_expr(
basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr(
self._filter.breakdown,
table="events",
query_alias="prop_basic",
@ -799,7 +801,7 @@ class ClickhouseFunnelBase(ABC):
prop_window = "groupUniqArray(prop) over (PARTITION by aggregation_target) as prop_vals"
return ",".join([basic_prop_selector, *select_columns, final_select, prop_window])
return ",".join([basic_prop_selector, *select_columns, final_select, prop_window]), basic_prop_params
elif self._filter.breakdown_attribution_type in [
BreakdownAttributionType.FIRST_TOUCH,
BreakdownAttributionType.LAST_TOUCH,
@ -818,10 +820,10 @@ class ClickhouseFunnelBase(ABC):
breakdown_window_selector = f"{aggregate_operation}(prop, timestamp, {prop_conditional})"
prop_window = f"{breakdown_window_selector} over (PARTITION by aggregation_target) as prop_vals"
return ",".join([basic_prop_selector, "prop_basic as prop", prop_window])
return ",".join([basic_prop_selector, "prop_basic as prop", prop_window]), basic_prop_params
else:
# ALL_EVENTS
return ",".join([basic_prop_selector, "prop_basic as prop"])
return ",".join([basic_prop_selector, "prop_basic as prop"]), basic_prop_params
def _get_cohort_breakdown_join(self) -> str:
cohort_queries, ids, cohort_params = format_breakdown_cohort_join_query(self._team, self._filter)

View File

@ -104,7 +104,7 @@ class PropertiesTimeline:
event_query_sql, event_query_params = event_query.get_query()
crucial_property_keys = self.extract_crucial_property_keys(filter)
crucial_property_columns = get_single_or_multi_property_string_expr(
crucial_property_columns, crucial_property_params = get_single_or_multi_property_string_expr(
crucial_property_keys,
query_alias=None,
table="events",
@ -127,6 +127,7 @@ class PropertiesTimeline:
params = {
**event_query_params,
**crucial_property_params,
"actor_id": actor.uuid if isinstance(actor, Person) else actor.group_key,
}
raw_query_result = insight_sync_execute(

View File

@ -66,13 +66,14 @@ class RetentionEventsQuery(EventQuery):
"properties" if self._person_on_events_mode == PersonOnEventsMode.DISABLED else "person_properties"
)
breakdown_values_expression = get_single_or_multi_property_string_expr(
breakdown_values_expression, breakdown_values_params = get_single_or_multi_property_string_expr(
breakdown=[breakdown["property"] for breakdown in self._filter.breakdowns],
table=cast(Union[Literal["events"], Literal["person"]], table),
query_alias=None,
column=column,
materialised_table_column=materalised_table_column,
)
self.params.update(breakdown_values_params)
if self._event_query_type == RetentionQueryType.TARGET_FIRST_TIME:
_fields += [f"argMin({breakdown_values_expression}, e.timestamp) AS breakdown_values"]

View File

@ -99,7 +99,7 @@ GROUP BY actor_id
TOP_ELEMENTS_ARRAY_OF_KEY_SQL = """
SELECT
{value_expression},
{breakdown_expression},
{aggregate_operation} as count
FROM events e
{sample_clause}
@ -116,7 +116,7 @@ TOP_ELEMENTS_ARRAY_OF_KEY_SQL = """
HISTOGRAM_ELEMENTS_ARRAY_OF_KEY_SQL = """
SELECT {bucketing_expression} FROM (
SELECT
{value_expression},
{breakdown_expression},
{aggregate_operation} as count
FROM events e
{sample_clause}