diff --git a/ee/clickhouse/models/property.py b/ee/clickhouse/models/property.py index 5804e02de05..06a3627aea1 100644 --- a/ee/clickhouse/models/property.py +++ b/ee/clickhouse/models/property.py @@ -29,6 +29,8 @@ from ee.clickhouse.sql.person import ( GET_DISTINCT_IDS_BY_PERSON_ID_FILTER, GET_DISTINCT_IDS_BY_PROPERTY_SQL, GET_TEAM_PERSON_DISTINCT_IDS, + SELECT_PERSON_PROP_VALUES_SQL, + SELECT_PERSON_PROP_VALUES_SQL_WITH_FILTER, ) from posthog.models.cohort import Cohort from posthog.models.event import Selector @@ -419,6 +421,14 @@ def get_property_values_for_key(key: str, team: Team, value: Optional[str] = Non ) +def get_person_property_values_for_key(key: str, team: Team, value: Optional[str] = None): + if value: + return sync_execute( + SELECT_PERSON_PROP_VALUES_SQL_WITH_FILTER, {"team_id": team.pk, "key": key, "value": "%{}%".format(value)}, + ) + return sync_execute(SELECT_PERSON_PROP_VALUES_SQL, {"team_id": team.pk, "key": key},) + + def filter_element(filters: Dict, *, operator: Optional[OperatorType] = None, prepend: str = "") -> Tuple[str, Dict]: if not operator: operator = "exact" diff --git a/ee/clickhouse/sql/events.py b/ee/clickhouse/sql/events.py index c7f58f81263..64fb283790a 100644 --- a/ee/clickhouse/sql/events.py +++ b/ee/clickhouse/sql/events.py @@ -114,11 +114,29 @@ FROM events WHERE team_id = %(team_id)s """ SELECT_PROP_VALUES_SQL = """ -SELECT DISTINCT trim(BOTH '\"' FROM JSONExtractRaw(properties, %(key)s)) FROM events where JSONHas(properties, %(key)s) AND team_id = %(team_id)s {parsed_date_from} {parsed_date_to} LIMIT 10 +SELECT + DISTINCT trim(BOTH '\"' FROM JSONExtractRaw(properties, %(key)s)) +FROM + events +WHERE + team_id = %(team_id)s AND + JSONHas(properties, %(key)s) + {parsed_date_from} + {parsed_date_to} +LIMIT 10 """ SELECT_PROP_VALUES_SQL_WITH_FILTER = """ -SELECT DISTINCT trim(BOTH '\"' FROM JSONExtractRaw(properties, %(key)s)) FROM events where team_id = %(team_id)s AND trim(BOTH '\"' FROM JSONExtractRaw(properties, %(key)s)) ILIKE %(value)s {parsed_date_from} {parsed_date_to} LIMIT 10 +SELECT + DISTINCT trim(BOTH '\"' FROM JSONExtractRaw(properties, %(key)s)) +FROM + events +WHERE + team_id = %(team_id)s AND + trim(BOTH '\"' FROM JSONExtractRaw(properties, %(key)s)) ILIKE %(value)s + {parsed_date_from} + {parsed_date_to} +LIMIT 10 """ SELECT_EVENT_BY_TEAM_AND_CONDITIONS_SQL = """ diff --git a/ee/clickhouse/sql/person.py b/ee/clickhouse/sql/person.py index de225bd79a0..2cc3c47b0bf 100644 --- a/ee/clickhouse/sql/person.py +++ b/ee/clickhouse/sql/person.py @@ -384,3 +384,44 @@ GROUP BY actor_id COMMENT_DISTINCT_ID_COLUMN_SQL = ( lambda: f"ALTER TABLE person_distinct_id ON CLUSTER {CLICKHOUSE_CLUSTER} COMMENT COLUMN distinct_id 'skip_0003_fill_person_distinct_id2'" ) + + +SELECT_PERSON_PROP_VALUES_SQL = """ +SELECT + value, + count(value) +FROM ( + SELECT + trim(BOTH '\"' FROM JSONExtractRaw(properties, %(key)s)) as value + FROM + person + WHERE + team_id = %(team_id)s AND + JSONHas(properties, %(key)s) + ORDER BY id DESC + LIMIT 100000 +) +GROUP BY value +ORDER BY count(value) DESC +LIMIT 20 +""" + +SELECT_PERSON_PROP_VALUES_SQL_WITH_FILTER = """ +SELECT + value, + count(value) +FROM ( + SELECT + trim(BOTH '\"' FROM JSONExtractRaw(properties, %(key)s)) as value + FROM + person + WHERE + team_id = %(team_id)s AND + trim(BOTH '\"' FROM JSONExtractRaw(properties, %(key)s)) ILIKE %(value)s + ORDER BY id DESC + LIMIT 100000 +) +GROUP BY value +ORDER BY count(value) DESC +LIMIT 20 +""" diff --git a/posthog/api/event.py b/posthog/api/event.py index 097cc546712..7632b52318e 100644 --- a/posthog/api/event.py +++ b/posthog/api/event.py @@ -223,7 +223,6 @@ class EventViewSet(StructuredViewSetMixin, mixins.RetrieveModelMixin, mixins.Lis def values(self, request: request.Request, **kwargs) -> response.Response: key = request.GET.get("key") team = self.team - result = [] flattened = [] if key == "custom_event": events = sync_execute(GET_CUSTOM_EVENTS, {"team_id": team.pk}) diff --git a/posthog/api/person.py b/posthog/api/person.py index a506ee6b972..a076cc6ea36 100644 --- a/posthog/api/person.py +++ b/posthog/api/person.py @@ -22,9 +22,11 @@ from rest_framework.permissions import IsAuthenticated from rest_framework.settings import api_settings from rest_framework.utils.serializer_helpers import ReturnDict from rest_framework_csv import renderers as csvrenderers +from statshog.defaults.django import statsd from ee.clickhouse.client import sync_execute from ee.clickhouse.models.person import delete_person +from ee.clickhouse.models.property import get_person_property_values_for_key from ee.clickhouse.queries.funnels import ClickhouseFunnelActors, ClickhouseFunnelTrendsActors from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase from ee.clickhouse.queries.funnels.funnel_correlation_persons import FunnelCorrelationActors @@ -52,11 +54,14 @@ from posthog.models.filters.path_filter import PathFilter from posthog.models.filters.retention_filter import RetentionFilter from posthog.models.filters.stickiness_filter import StickinessFilter from posthog.permissions import ProjectMembershipNecessaryPermissions, TeamMemberAccessPermission -from posthog.queries.lifecycle import LifecycleTrend -from posthog.queries.retention import Retention -from posthog.queries.stickiness import Stickiness from posthog.tasks.split_person import split_person -from posthog.utils import convert_property_value, format_query_params_absolute_url, is_anonymous_id, relative_date_parse +from posthog.utils import ( + convert_property_value, + flatten, + format_query_params_absolute_url, + is_anonymous_id, + relative_date_parse, +) class PersonCursorPagination(CursorPagination): @@ -353,18 +358,33 @@ class PersonViewSet(StructuredViewSetMixin, viewsets.ModelViewSet): @action(methods=["GET"], detail=False) def values(self, request: request.Request, **kwargs) -> response.Response: - people = self.get_queryset() - key = "properties__{}".format(request.GET.get("key")) - people = people.values(key).annotate(count=Count("id")).filter(**{f"{key}__isnull": False}).order_by("-count") + key = request.GET.get("key") + value = request.GET.get("value") + flattened = [] + if key: + timer = statsd.timer("get_person_property_values_for_key_timer").start() + try: + result = get_person_property_values_for_key(key, self.team, value) + statsd.incr( + "get_person_property_values_for_key_success", + tags={"key": key, "value": value, "team_id": self.team.id}, + ) + except Exception as e: + statsd.incr( + "get_person_property_values_for_key_error", + tags={"error": str(e), "key": key, "value": value, "team_id": self.team.id}, + ) + raise e + finally: + timer.stop() - if request.GET.get("value"): - people = people.extra( - where=["properties ->> %s LIKE %s"], params=[request.GET["key"], "%{}%".format(request.GET["value"])], - ) - - return response.Response( - [{"name": convert_property_value(event[key]), "count": event["count"]} for event in people[:50]] - ) + for (value, count) in result: + try: + # Try loading as json for dicts or arrays + flattened.append({"name": convert_property_value(json.loads(value)), "count": count}) # type: ignore + except json.decoder.JSONDecodeError: + flattened.append({"name": convert_property_value(value), "count": count}) + return response.Response(flattened) @action(methods=["POST"], detail=True) def merge(self, request: request.Request, pk=None, **kwargs) -> response.Response: diff --git a/posthog/utils.py b/posthog/utils.py index 463e600d810..39673f3f6ac 100644 --- a/posthog/utils.py +++ b/posthog/utils.py @@ -353,7 +353,7 @@ def dict_from_cursor_fetchall(cursor): return [dict(zip(columns, row)) for row in cursor.fetchall()] -def convert_property_value(input: Union[str, bool, dict, list, int]) -> str: +def convert_property_value(input: Union[str, bool, dict, list, int, Optional[str]]) -> str: if isinstance(input, bool): if input is True: return "true"