From 01c6daecdbfaf77602b0bf96e8de4c17d871cdb6 Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Wed, 20 Nov 2024 13:00:59 -0800 Subject: [PATCH 1/5] Abstract --- posthog/hogql/property.py | 116 +++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index 9536b70fa5f..70c7a7bf849 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -238,6 +238,44 @@ def _expr_to_compare_op( raise NotImplementedError(f"PropertyOperator {operator} not implemented") +def _handle_property_values( + property: Property, + value: list, + team: Team, + scope: str, + field: ast.Expr, + operator: PropertyOperator, + is_json_field: bool, +) -> ast.Expr: + if len(value) == 0: + return ast.Constant(value=1) + elif len(value) == 1: + return _expr_to_compare_op( + expr=field, + value=value[0], + operator=operator, + team=team, + property=property, + is_json_field=is_json_field, + ) + else: + # Using an AND here instead of `in()` or `notIn()`, due to Clickhouses poor handling of `null` values + exprs = [ + _expr_to_compare_op( + expr=field, + value=v, + operator=operator, + team=team, + property=property, + is_json_field=is_json_field, + ) + for v in value + ] + if operator in (PropertyOperator.NOT_ICONTAINS, PropertyOperator.NOT_REGEX, PropertyOperator.IS_NOT): + return ast.And(exprs=exprs) + return ast.Or(exprs=exprs) + + def property_to_expr( property: ( list @@ -354,38 +392,18 @@ def property_to_expr( else: raise QueryError("Data warehouse property filter value must be a string") - # For data warehouse properties, we split the key on dots to get the table chain and property key - # e.g. "foobars.properties.$feature/flag" becomes chain=["foobars", "properties"] and property_key="$feature/flag" - # This allows us to properly reference nested fields in data warehouse tables while maintaining consistent - # property filtering behavior. The chain represents the path to traverse through nested table structures, - # while the property_key is the actual field name we want to filter on. - if isinstance(value, list): - if len(value) == 0: - return ast.Constant(value=1) - elif len(value) == 1: - value = value[0] - else: - field = ast.Field(chain=[*chain, property_key]) - exprs = [ - _expr_to_compare_op( - expr=field, - value=v, - operator=operator, - team=team, - property=property, - is_json_field=False, - ) - for v in value - ] - if ( - operator == PropertyOperator.NOT_ICONTAINS - or operator == PropertyOperator.NOT_REGEX - or operator == PropertyOperator.IS_NOT - ): - return ast.And(exprs=exprs) - return ast.Or(exprs=exprs) - field = ast.Field(chain=[*chain, property_key]) + if isinstance(value, list): + return _handle_property_values( + property=property, + value=value, + team=team, + scope=scope, + field=field, + operator=operator, + is_json_field=False, + ) + return _expr_to_compare_op( expr=field, value=value, @@ -420,33 +438,15 @@ def property_to_expr( expr = ast.Call(name="argMinMerge", args=[field]) if isinstance(value, list): - if len(value) == 0: - return ast.Constant(value=1) - elif len(value) == 1: - value = value[0] - else: - # Using an AND here instead of `in()` or `notIn()`, due to Clickhouses poor handling of `null` values - exprs = [ - property_to_expr( - Property( - type=property.type, - key=property.key, - operator=property.operator, - group_type_index=property.group_type_index, - value=v, - ), - team, - scope, - ) - for v in value - ] - if ( - operator == PropertyOperator.NOT_ICONTAINS - or operator == PropertyOperator.NOT_REGEX - or operator == PropertyOperator.IS_NOT - ): - return ast.And(exprs=exprs) - return ast.Or(exprs=exprs) + return _handle_property_values( + property=property, + value=value, + team=team, + scope=scope, + field=expr, + operator=operator, + is_json_field=property.type != "session", + ) return _expr_to_compare_op( expr=expr, From becfb7fefbab29418d5a3d09f6cecef492091569 Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Wed, 20 Nov 2024 13:04:11 -0800 Subject: [PATCH 2/5] Revert "Abstract" This reverts commit 01c6daecdbfaf77602b0bf96e8de4c17d871cdb6. --- posthog/hogql/property.py | 114 +++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index 70c7a7bf849..9536b70fa5f 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -238,44 +238,6 @@ def _expr_to_compare_op( raise NotImplementedError(f"PropertyOperator {operator} not implemented") -def _handle_property_values( - property: Property, - value: list, - team: Team, - scope: str, - field: ast.Expr, - operator: PropertyOperator, - is_json_field: bool, -) -> ast.Expr: - if len(value) == 0: - return ast.Constant(value=1) - elif len(value) == 1: - return _expr_to_compare_op( - expr=field, - value=value[0], - operator=operator, - team=team, - property=property, - is_json_field=is_json_field, - ) - else: - # Using an AND here instead of `in()` or `notIn()`, due to Clickhouses poor handling of `null` values - exprs = [ - _expr_to_compare_op( - expr=field, - value=v, - operator=operator, - team=team, - property=property, - is_json_field=is_json_field, - ) - for v in value - ] - if operator in (PropertyOperator.NOT_ICONTAINS, PropertyOperator.NOT_REGEX, PropertyOperator.IS_NOT): - return ast.And(exprs=exprs) - return ast.Or(exprs=exprs) - - def property_to_expr( property: ( list @@ -392,18 +354,38 @@ def property_to_expr( else: raise QueryError("Data warehouse property filter value must be a string") - field = ast.Field(chain=[*chain, property_key]) + # For data warehouse properties, we split the key on dots to get the table chain and property key + # e.g. "foobars.properties.$feature/flag" becomes chain=["foobars", "properties"] and property_key="$feature/flag" + # This allows us to properly reference nested fields in data warehouse tables while maintaining consistent + # property filtering behavior. The chain represents the path to traverse through nested table structures, + # while the property_key is the actual field name we want to filter on. if isinstance(value, list): - return _handle_property_values( - property=property, - value=value, - team=team, - scope=scope, - field=field, - operator=operator, - is_json_field=False, - ) + if len(value) == 0: + return ast.Constant(value=1) + elif len(value) == 1: + value = value[0] + else: + field = ast.Field(chain=[*chain, property_key]) + exprs = [ + _expr_to_compare_op( + expr=field, + value=v, + operator=operator, + team=team, + property=property, + is_json_field=False, + ) + for v in value + ] + if ( + operator == PropertyOperator.NOT_ICONTAINS + or operator == PropertyOperator.NOT_REGEX + or operator == PropertyOperator.IS_NOT + ): + return ast.And(exprs=exprs) + return ast.Or(exprs=exprs) + field = ast.Field(chain=[*chain, property_key]) return _expr_to_compare_op( expr=field, value=value, @@ -438,15 +420,33 @@ def property_to_expr( expr = ast.Call(name="argMinMerge", args=[field]) if isinstance(value, list): - return _handle_property_values( - property=property, - value=value, - team=team, - scope=scope, - field=expr, - operator=operator, - is_json_field=property.type != "session", - ) + if len(value) == 0: + return ast.Constant(value=1) + elif len(value) == 1: + value = value[0] + else: + # Using an AND here instead of `in()` or `notIn()`, due to Clickhouses poor handling of `null` values + exprs = [ + property_to_expr( + Property( + type=property.type, + key=property.key, + operator=property.operator, + group_type_index=property.group_type_index, + value=v, + ), + team, + scope, + ) + for v in value + ] + if ( + operator == PropertyOperator.NOT_ICONTAINS + or operator == PropertyOperator.NOT_REGEX + or operator == PropertyOperator.IS_NOT + ): + return ast.And(exprs=exprs) + return ast.Or(exprs=exprs) return _expr_to_compare_op( expr=expr, From 1d496ba493c0394f1f99cb342a0556b2283fe408 Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Wed, 20 Nov 2024 13:05:15 -0800 Subject: [PATCH 3/5] Revert "Revert "Abstract"" This reverts commit becfb7fefbab29418d5a3d09f6cecef492091569. --- posthog/hogql/property.py | 116 +++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index 9536b70fa5f..70c7a7bf849 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -238,6 +238,44 @@ def _expr_to_compare_op( raise NotImplementedError(f"PropertyOperator {operator} not implemented") +def _handle_property_values( + property: Property, + value: list, + team: Team, + scope: str, + field: ast.Expr, + operator: PropertyOperator, + is_json_field: bool, +) -> ast.Expr: + if len(value) == 0: + return ast.Constant(value=1) + elif len(value) == 1: + return _expr_to_compare_op( + expr=field, + value=value[0], + operator=operator, + team=team, + property=property, + is_json_field=is_json_field, + ) + else: + # Using an AND here instead of `in()` or `notIn()`, due to Clickhouses poor handling of `null` values + exprs = [ + _expr_to_compare_op( + expr=field, + value=v, + operator=operator, + team=team, + property=property, + is_json_field=is_json_field, + ) + for v in value + ] + if operator in (PropertyOperator.NOT_ICONTAINS, PropertyOperator.NOT_REGEX, PropertyOperator.IS_NOT): + return ast.And(exprs=exprs) + return ast.Or(exprs=exprs) + + def property_to_expr( property: ( list @@ -354,38 +392,18 @@ def property_to_expr( else: raise QueryError("Data warehouse property filter value must be a string") - # For data warehouse properties, we split the key on dots to get the table chain and property key - # e.g. "foobars.properties.$feature/flag" becomes chain=["foobars", "properties"] and property_key="$feature/flag" - # This allows us to properly reference nested fields in data warehouse tables while maintaining consistent - # property filtering behavior. The chain represents the path to traverse through nested table structures, - # while the property_key is the actual field name we want to filter on. - if isinstance(value, list): - if len(value) == 0: - return ast.Constant(value=1) - elif len(value) == 1: - value = value[0] - else: - field = ast.Field(chain=[*chain, property_key]) - exprs = [ - _expr_to_compare_op( - expr=field, - value=v, - operator=operator, - team=team, - property=property, - is_json_field=False, - ) - for v in value - ] - if ( - operator == PropertyOperator.NOT_ICONTAINS - or operator == PropertyOperator.NOT_REGEX - or operator == PropertyOperator.IS_NOT - ): - return ast.And(exprs=exprs) - return ast.Or(exprs=exprs) - field = ast.Field(chain=[*chain, property_key]) + if isinstance(value, list): + return _handle_property_values( + property=property, + value=value, + team=team, + scope=scope, + field=field, + operator=operator, + is_json_field=False, + ) + return _expr_to_compare_op( expr=field, value=value, @@ -420,33 +438,15 @@ def property_to_expr( expr = ast.Call(name="argMinMerge", args=[field]) if isinstance(value, list): - if len(value) == 0: - return ast.Constant(value=1) - elif len(value) == 1: - value = value[0] - else: - # Using an AND here instead of `in()` or `notIn()`, due to Clickhouses poor handling of `null` values - exprs = [ - property_to_expr( - Property( - type=property.type, - key=property.key, - operator=property.operator, - group_type_index=property.group_type_index, - value=v, - ), - team, - scope, - ) - for v in value - ] - if ( - operator == PropertyOperator.NOT_ICONTAINS - or operator == PropertyOperator.NOT_REGEX - or operator == PropertyOperator.IS_NOT - ): - return ast.And(exprs=exprs) - return ast.Or(exprs=exprs) + return _handle_property_values( + property=property, + value=value, + team=team, + scope=scope, + field=expr, + operator=operator, + is_json_field=property.type != "session", + ) return _expr_to_compare_op( expr=expr, From cc53bb60c9017d555b68d009a1bb8e17a9b10209 Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Wed, 20 Nov 2024 13:19:29 -0800 Subject: [PATCH 4/5] Consolidated implementation --- posthog/hogql/property.py | 123 ++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 72 deletions(-) diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index 70c7a7bf849..e372b446b5c 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -238,44 +238,6 @@ def _expr_to_compare_op( raise NotImplementedError(f"PropertyOperator {operator} not implemented") -def _handle_property_values( - property: Property, - value: list, - team: Team, - scope: str, - field: ast.Expr, - operator: PropertyOperator, - is_json_field: bool, -) -> ast.Expr: - if len(value) == 0: - return ast.Constant(value=1) - elif len(value) == 1: - return _expr_to_compare_op( - expr=field, - value=value[0], - operator=operator, - team=team, - property=property, - is_json_field=is_json_field, - ) - else: - # Using an AND here instead of `in()` or `notIn()`, due to Clickhouses poor handling of `null` values - exprs = [ - _expr_to_compare_op( - expr=field, - value=v, - operator=operator, - team=team, - property=property, - is_json_field=is_json_field, - ) - for v in value - ] - if operator in (PropertyOperator.NOT_ICONTAINS, PropertyOperator.NOT_REGEX, PropertyOperator.IS_NOT): - return ast.And(exprs=exprs) - return ast.Or(exprs=exprs) - - def property_to_expr( property: ( list @@ -385,34 +347,33 @@ def property_to_expr( elif property.type == "session" and scope == "replay_entity": chain = ["events", "session"] elif property.type == "data_warehouse": - if isinstance(property.key, str): + if not isinstance(property.key, str): + raise QueryError("Data warehouse property filter value must be a string") + else: split = property.key.split(".") chain = split[:-1] - property_key = split[-1] - else: - raise QueryError("Data warehouse property filter value must be a string") - - field = ast.Field(chain=[*chain, property_key]) - if isinstance(value, list): - return _handle_property_values( - property=property, - value=value, - team=team, - scope=scope, - field=field, - operator=operator, - is_json_field=False, - ) - - return _expr_to_compare_op( - expr=field, - value=value, - operator=operator, - team=team, - property=property, - is_json_field=False, - ) + property.key = split[-1] + if isinstance(value, list) and len(value) > 1: + field = ast.Field(chain=[*chain, property.key]) + exprs = [ + _expr_to_compare_op( + expr=field, + value=v, + operator=operator, + team=team, + property=property, + is_json_field=False, + ) + for v in value + ] + if ( + operator == PropertyOperator.NOT_ICONTAINS + or operator == PropertyOperator.NOT_REGEX + or operator == PropertyOperator.IS_NOT + ): + return ast.And(exprs=exprs) + return ast.Or(exprs=exprs) elif property.type == "data_warehouse_person_property": if isinstance(property.key, str): table, key = property.key.split(".") @@ -438,15 +399,33 @@ def property_to_expr( expr = ast.Call(name="argMinMerge", args=[field]) if isinstance(value, list): - return _handle_property_values( - property=property, - value=value, - team=team, - scope=scope, - field=expr, - operator=operator, - is_json_field=property.type != "session", - ) + if len(value) == 0: + return ast.Constant(value=1) + elif len(value) == 1: + value = value[0] + else: + # Using an AND here instead of `in()` or `notIn()`, due to Clickhouses poor handling of `null` values + exprs = [ + property_to_expr( + Property( + type=property.type, + key=property.key, + operator=property.operator, + group_type_index=property.group_type_index, + value=v, + ), + team, + scope, + ) + for v in value + ] + if ( + operator == PropertyOperator.NOT_ICONTAINS + or operator == PropertyOperator.NOT_REGEX + or operator == PropertyOperator.IS_NOT + ): + return ast.And(exprs=exprs) + return ast.Or(exprs=exprs) return _expr_to_compare_op( expr=expr, From 8508e3bb38d9e369984e38c2b9fa6cc023ca2816 Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Wed, 20 Nov 2024 13:22:27 -0800 Subject: [PATCH 5/5] Fix linting issues --- posthog/hogql/test/test_property.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/posthog/hogql/test/test_property.py b/posthog/hogql/test/test_property.py index 56f342c3444..eee278e83bd 100644 --- a/posthog/hogql/test/test_property.py +++ b/posthog/hogql/test/test_property.py @@ -812,17 +812,17 @@ class TestProperty(BaseTest): ) self.assertIsInstance(expr, ast.Or) - self.assertEqual(len(expr.exprs), 2) + self.assertEqual(len(getattr(expr, 'exprs')), 2) # First expression - compare_op_1 = expr.exprs[0] + compare_op_1 = getattr(expr, 'exprs')[0] self.assertIsInstance(compare_op_1, ast.CompareOperation) self.assertIsInstance(compare_op_1.left, ast.Field) self.assertEqual(compare_op_1.left.chain, ["foobars", "properties", "$feature/test"]) self.assertEqual(compare_op_1.right.value, "control") # Second expression - compare_op_2 = expr.exprs[1] + compare_op_2 = getattr(expr, 'exprs')[1] self.assertIsInstance(compare_op_2, ast.CompareOperation) self.assertIsInstance(compare_op_2.left, ast.Field) self.assertEqual(compare_op_2.left.chain, ["foobars", "properties", "$feature/test"])