diff --git a/ee/clickhouse/models/test/test_property.py b/ee/clickhouse/models/test/test_property.py index f55578878f9..d12d4daf3ee 100644 --- a/ee/clickhouse/models/test/test_property.py +++ b/ee/clickhouse/models/test/test_property.py @@ -1198,21 +1198,30 @@ TEST_BREAKDOWN_PROCESSING = [ "events", "prop", "properties", - "replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^\"|\"$', '') AS prop", + ( + "replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', '') AS prop", + {"breakdown_param_1": "$browser"}, + ), ), ( ["$browser"], "events", "value", "properties", - "array(replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^\"|\"$', '')) AS value", + ( + "array(replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', '')) AS value", + {"breakdown_param_1": "$browser"}, + ), ), ( ["$browser", "$browser_version"], "events", "prop", "properties", - "array(replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^\"|\"$', ''),replaceRegexpAll(JSONExtractRaw(properties, '$browser_version'), '^\"|\"$', '')) AS prop", + ( + "array(replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', ''),replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_2)s), '^\"|\"$', '')) AS prop", + {"breakdown_param_1": "$browser", "breakdown_param_2": "$browser_version"}, + ), ), ] @@ -1239,8 +1248,11 @@ TEST_BREAKDOWN_PROCESSING_MATERIALIZED = [ "value", "properties", "person_properties", - "array(replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^\"|\"$', '')) AS value", - 'array("mat_pp_$browser") AS value', + ( + "array(replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', '')) AS value", + {"breakdown_param_1": "$browser"}, + ), + ('array("mat_pp_$browser") AS value', {"breakdown_param_1": "$browser"}), ), ( ["$browser", "$browser_version"], @@ -1248,8 +1260,14 @@ TEST_BREAKDOWN_PROCESSING_MATERIALIZED = [ "prop", "properties", "group2_properties", - "array(replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^\"|\"$', ''),replaceRegexpAll(JSONExtractRaw(properties, '$browser_version'), '^\"|\"$', '')) AS prop", - """array("mat_gp2_$browser",replaceRegexpAll(JSONExtractRaw(properties, '$browser_version'), '^\"|\"$', '')) AS prop""", + ( + "array(replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', ''),replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_2)s), '^\"|\"$', '')) AS prop", + {"breakdown_param_1": "$browser", "breakdown_param_2": "$browser_version"}, + ), + ( + """array("mat_gp2_$browser",replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_2)s), '^\"|\"$', '')) AS prop""", + {"breakdown_param_1": "$browser", "breakdown_param_2": "$browser_version"}, + ), ), ] @@ -1282,7 +1300,6 @@ def test_breakdown_query_expression_materialised( column, materialised_table_column=materialise_column, ) - assert actual == expected_with materialize(table, breakdown[0], table_column=materialise_column) # type: ignore diff --git a/posthog/models/property/util.py b/posthog/models/property/util.py index c67a3d8cf31..cfefbb94dc1 100644 --- a/posthog/models/property/util.py +++ b/posthog/models/property/util.py @@ -15,7 +15,6 @@ from typing import ( from rest_framework import exceptions -from posthog.clickhouse.client.escape import escape_param_for_clickhouse from posthog.clickhouse.kafka_engine import trim_quotes_expr from posthog.clickhouse.materialized_columns import ( TableWithProperties, @@ -652,7 +651,7 @@ def get_single_or_multi_property_string_expr( allow_denormalized_props=True, materialised_table_column: str = "properties", normalize_url: bool = False, -): +) -> Tuple[str, Dict[str, Any]]: """ When querying for breakdown properties: * If the breakdown provided is a string, we extract the JSON from the properties object stored in the DB @@ -666,12 +665,16 @@ def get_single_or_multi_property_string_expr( no alias will be appended. """ - + breakdown_params: Dict[str, Any] = {} if isinstance(breakdown, str) or isinstance(breakdown, int): + breakdown_key = f"breakdown_param_{len(breakdown_params) + 1}" + breakdown_key = f"breakdown_param_{len(breakdown_params) + 1}" + breakdown_params[breakdown_key] = breakdown + expression, _ = get_property_string_expr( table, str(breakdown), - escape_param_for_clickhouse(breakdown), + f"%({breakdown_key})s", column, allow_denormalized_props, materialised_table_column=materialised_table_column, @@ -681,10 +684,12 @@ def get_single_or_multi_property_string_expr( else: expressions = [] for b in breakdown: + breakdown_key = f"breakdown_param_{len(breakdown_params) + 1}" + breakdown_params[breakdown_key] = b expr, _ = get_property_string_expr( table, b, - escape_param_for_clickhouse(b), + f"%({breakdown_key})s", column, allow_denormalized_props, materialised_table_column=materialised_table_column, @@ -694,9 +699,9 @@ def get_single_or_multi_property_string_expr( expression = f"array({','.join(expressions)})" if query_alias is None: - return expression + return expression, breakdown_params - return f"{expression} AS {query_alias}" + return f"{expression} AS {query_alias}", breakdown_params def normalize_url_breakdown(breakdown_value, breakdown_normalize_url: bool = True): diff --git a/posthog/queries/breakdown_props.py b/posthog/queries/breakdown_props.py index 059e1998f67..64a550edf54 100644 --- a/posthog/queries/breakdown_props.py +++ b/posthog/queries/breakdown_props.py @@ -164,7 +164,7 @@ def get_breakdown_prop_values( hogql_context=filter.hogql_context, ) - value_expression = _to_value_expression( + breakdown_expression, breakdown_params = _to_value_expression( filter.breakdown_type, filter.breakdown, filter.breakdown_group_type_index, @@ -185,7 +185,7 @@ def get_breakdown_prop_values( bucketing_expression = _to_bucketing_expression(cast(int, filter.breakdown_histogram_bin_count)) elements_query = HISTOGRAM_ELEMENTS_ARRAY_OF_KEY_SQL.format( bucketing_expression=bucketing_expression, - value_expression=value_expression, + breakdown_expression=breakdown_expression, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, prop_filters=prop_filters, @@ -199,7 +199,7 @@ def get_breakdown_prop_values( ) else: elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format( - value_expression=value_expression, + breakdown_expression=breakdown_expression, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, prop_filters=prop_filters, @@ -222,6 +222,7 @@ def get_breakdown_prop_values( "timezone": team.timezone, **prop_filter_params, **entity_params, + **breakdown_params, **person_join_params, **groups_join_params, **sessions_join_params, @@ -251,7 +252,8 @@ def _to_value_expression( breakdown_normalize_url: bool = False, direct_on_events: bool = False, cast_as_float: bool = False, -) -> str: +) -> Tuple[str, Dict]: + params: Dict[str, Any] = {} if breakdown_type == "session": if breakdown == "$session_duration": # Return the session duration expression right away because it's already an number, @@ -260,7 +262,7 @@ def _to_value_expression( else: raise ValidationError(f'Invalid breakdown "{breakdown}" for breakdown type "session"') elif breakdown_type == "person": - value_expression = get_single_or_multi_property_string_expr( + value_expression, params = get_single_or_multi_property_string_expr( breakdown, query_alias=None, table="events" if direct_on_events else "person", @@ -289,7 +291,7 @@ def _to_value_expression( else: value_expression = translate_hogql(cast(str, breakdown), hogql_context) else: - value_expression = get_single_or_multi_property_string_expr( + value_expression, params = get_single_or_multi_property_string_expr( breakdown, table="events", query_alias=None, @@ -300,7 +302,7 @@ def _to_value_expression( if cast_as_float: value_expression = f"toFloat64OrNull(toString({value_expression}))" - return f"{value_expression} AS value" + return f"{value_expression} AS value", params def _to_bucketing_expression(bin_count: int) -> str: diff --git a/posthog/queries/funnels/base.py b/posthog/queries/funnels/base.py index 482e821fd5d..03702bf2632 100644 --- a/posthog/queries/funnels/base.py +++ b/posthog/queries/funnels/base.py @@ -459,7 +459,8 @@ class ClickhouseFunnelBase(ABC): # where i is the starting step for exclusion on that entity all_step_cols.extend(step_cols) - breakdown_select_prop = self._get_breakdown_select_prop() + breakdown_select_prop, breakdown_select_prop_params = self._get_breakdown_select_prop() + self.params.update(breakdown_select_prop_params) if breakdown_select_prop: all_step_cols.append(breakdown_select_prop) @@ -718,15 +719,16 @@ class ClickhouseFunnelBase(ABC): def get_step_counts_without_aggregation_query(self) -> str: raise NotImplementedError() - def _get_breakdown_select_prop(self) -> str: + def _get_breakdown_select_prop(self) -> Tuple[str, Dict[str, Any]]: basic_prop_selector = "" + basic_prop_params: Dict[str, Any] = {} if not self._filter.breakdown: - return basic_prop_selector + return basic_prop_selector, basic_prop_params self.params.update({"breakdown": self._filter.breakdown}) if self._filter.breakdown_type == "person": if self._team.person_on_events_mode != PersonOnEventsMode.DISABLED: - basic_prop_selector = get_single_or_multi_property_string_expr( + basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr( self._filter.breakdown, table="events", query_alias="prop_basic", @@ -735,14 +737,14 @@ class ClickhouseFunnelBase(ABC): materialised_table_column="person_properties", ) else: - basic_prop_selector = get_single_or_multi_property_string_expr( + basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr( self._filter.breakdown, table="person", query_alias="prop_basic", column="person_props", ) elif self._filter.breakdown_type == "event": - basic_prop_selector = get_single_or_multi_property_string_expr( + basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr( self._filter.breakdown, table="events", query_alias="prop_basic", @@ -799,7 +801,7 @@ class ClickhouseFunnelBase(ABC): prop_window = "groupUniqArray(prop) over (PARTITION by aggregation_target) as prop_vals" - return ",".join([basic_prop_selector, *select_columns, final_select, prop_window]) + return ",".join([basic_prop_selector, *select_columns, final_select, prop_window]), basic_prop_params elif self._filter.breakdown_attribution_type in [ BreakdownAttributionType.FIRST_TOUCH, BreakdownAttributionType.LAST_TOUCH, @@ -818,10 +820,10 @@ class ClickhouseFunnelBase(ABC): breakdown_window_selector = f"{aggregate_operation}(prop, timestamp, {prop_conditional})" prop_window = f"{breakdown_window_selector} over (PARTITION by aggregation_target) as prop_vals" - return ",".join([basic_prop_selector, "prop_basic as prop", prop_window]) + return ",".join([basic_prop_selector, "prop_basic as prop", prop_window]), basic_prop_params else: # ALL_EVENTS - return ",".join([basic_prop_selector, "prop_basic as prop"]) + return ",".join([basic_prop_selector, "prop_basic as prop"]), basic_prop_params def _get_cohort_breakdown_join(self) -> str: cohort_queries, ids, cohort_params = format_breakdown_cohort_join_query(self._team, self._filter) diff --git a/posthog/queries/properties_timeline/properties_timeline.py b/posthog/queries/properties_timeline/properties_timeline.py index 578a9dee856..fbf7cff2402 100644 --- a/posthog/queries/properties_timeline/properties_timeline.py +++ b/posthog/queries/properties_timeline/properties_timeline.py @@ -104,7 +104,7 @@ class PropertiesTimeline: event_query_sql, event_query_params = event_query.get_query() crucial_property_keys = self.extract_crucial_property_keys(filter) - crucial_property_columns = get_single_or_multi_property_string_expr( + crucial_property_columns, crucial_property_params = get_single_or_multi_property_string_expr( crucial_property_keys, query_alias=None, table="events", @@ -127,6 +127,7 @@ class PropertiesTimeline: params = { **event_query_params, + **crucial_property_params, "actor_id": actor.uuid if isinstance(actor, Person) else actor.group_key, } raw_query_result = insight_sync_execute( diff --git a/posthog/queries/retention/retention_events_query.py b/posthog/queries/retention/retention_events_query.py index f003e194958..eef0d101452 100644 --- a/posthog/queries/retention/retention_events_query.py +++ b/posthog/queries/retention/retention_events_query.py @@ -66,13 +66,14 @@ class RetentionEventsQuery(EventQuery): "properties" if self._person_on_events_mode == PersonOnEventsMode.DISABLED else "person_properties" ) - breakdown_values_expression = get_single_or_multi_property_string_expr( + breakdown_values_expression, breakdown_values_params = get_single_or_multi_property_string_expr( breakdown=[breakdown["property"] for breakdown in self._filter.breakdowns], table=cast(Union[Literal["events"], Literal["person"]], table), query_alias=None, column=column, materialised_table_column=materalised_table_column, ) + self.params.update(breakdown_values_params) if self._event_query_type == RetentionQueryType.TARGET_FIRST_TIME: _fields += [f"argMin({breakdown_values_expression}, e.timestamp) AS breakdown_values"] diff --git a/posthog/queries/trends/sql.py b/posthog/queries/trends/sql.py index be4a56cc1de..0f9d08040b3 100644 --- a/posthog/queries/trends/sql.py +++ b/posthog/queries/trends/sql.py @@ -99,7 +99,7 @@ GROUP BY actor_id TOP_ELEMENTS_ARRAY_OF_KEY_SQL = """ SELECT - {value_expression}, + {breakdown_expression}, {aggregate_operation} as count FROM events e {sample_clause} @@ -116,7 +116,7 @@ TOP_ELEMENTS_ARRAY_OF_KEY_SQL = """ HISTOGRAM_ELEMENTS_ARRAY_OF_KEY_SQL = """ SELECT {bucketing_expression} FROM ( SELECT - {value_expression}, + {breakdown_expression}, {aggregate_operation} as count FROM events e {sample_clause}