From d7d03b59856f4a92f2b1630b3dafbd68e11029a0 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Wed, 24 Apr 2024 13:39:43 +0200 Subject: [PATCH] feat(insights): HogQL calculation of saved legacy insights v4 (#21781) * Revert "revert(insights): HogQL calculation of saved legacy insights v3 (#21778)" This reverts commit c0be1d1412d7cbe7d1f4f1990b6ea3c9fdd93de1. * Move HogQL in insight serialization to its own flag --- ...build-schema.mjs => build-schema-json.mjs} | 0 bin/build-schema-python.sh | 28 +++ frontend/src/lib/constants.tsx | 1 + frontend/src/queries/schema.json | 3 +- frontend/src/queries/schema.ts | 1 - frontend/src/types.ts | 1 + mypy-baseline.txt | 33 ++-- package.json | 4 +- posthog/api/insight.py | 45 +++-- posthog/api/query.py | 9 +- posthog/api/services/query.py | 12 +- posthog/api/test/dashboards/test_dashboard.py | 20 +- posthog/api/test/test_insight.py | 183 ++++++++++++++++-- posthog/api/test/test_insight_query.py | 68 ++----- posthog/caching/calculate_results.py | 81 ++++---- posthog/caching/fetch_from_cache.py | 7 +- posthog/caching/insight_cache.py | 9 +- posthog/caching/test/test_insight_cache.py | 11 +- posthog/clickhouse/client/execute_async.py | 9 +- .../hogql_queries/apply_dashboard_filters.py | 3 + .../insights/test/test_paths_query_runner.py | 26 +++ .../test/test_paths_query_runner_ee.py | 56 ++++++ .../insights/trends/trends_query_runner.py | 8 +- .../legacy_compatibility/feature_flag.py | 26 +-- .../legacy_compatibility/process_insight.py | 51 ----- posthog/hogql_queries/query_runner.py | 98 ++++++++-- .../test/test_events_query_runner.py | 6 +- .../hogql_queries/test/test_query_runner.py | 19 +- posthog/models/insight.py | 24 +-- posthog/models/test/test_insight_model.py | 45 +---- posthog/schema.py | 2 +- 31 files changed, 556 insertions(+), 333 deletions(-) rename bin/{build-schema.mjs => build-schema-json.mjs} (100%) create mode 100755 bin/build-schema-python.sh delete mode 100644 posthog/hogql_queries/legacy_compatibility/process_insight.py diff --git a/bin/build-schema.mjs b/bin/build-schema-json.mjs similarity index 100% rename from bin/build-schema.mjs rename to bin/build-schema-json.mjs diff --git a/bin/build-schema-python.sh b/bin/build-schema-python.sh new file mode 100755 index 00000000000..4d9f66616fb --- /dev/null +++ b/bin/build-schema-python.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -e + +# Generate schema.py from schema.json +datamodel-codegen \ + --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.10 --disable-timestamp \ + --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum \ + --input frontend/src/queries/schema.json --input-file-type jsonschema \ + --output posthog/schema.py --output-model-type pydantic_v2.BaseModel + +# Format schema.py +ruff format posthog/schema.py + +# Check schema.py and autofix +ruff check --fix posthog/schema.py + +# HACK: Datamodel-codegen output for enum-type fields with a default is invalid – the default value is a plain string, +# and not the expected enum member. We fix this using sed, which is pretty hacky, but does the job. +# Specifically, we need to replace `Optional[PropertyOperator] = "exact"` +# with `Optional[PropertyOperator] = PropertyOperator("exact")` to make the default value valid. +# Remove this when https://github.com/koxudaxi/datamodel-code-generator/issues/1929 is resolved. +if [[ "$OSTYPE" == "darwin"* ]]; then + # sed needs `-i` to be followed by `''` on macOS + sed -i '' -e 's/Optional\[PropertyOperator\] = \("[A-Za-z_]*"\)/Optional[PropertyOperator] = PropertyOperator(\1)/g' posthog/schema.py +else + sed -i -e 's/Optional\[PropertyOperator\] = \("[A-Za-z_]*"\)/Optional[PropertyOperator] = PropertyOperator(\1)/g' posthog/schema.py +fi diff --git a/frontend/src/lib/constants.tsx b/frontend/src/lib/constants.tsx index 4525b3c83b1..0db68836f76 100644 --- a/frontend/src/lib/constants.tsx +++ b/frontend/src/lib/constants.tsx @@ -177,6 +177,7 @@ export const FEATURE_FLAGS = { HOGQL_INSIGHTS_STICKINESS: 'hogql-insights-stickiness', // owner: @Gilbert09 HOGQL_INSIGHTS_FUNNELS: 'hogql-insights-funnels', // owner: @thmsobrmlr HOGQL_INSIGHT_LIVE_COMPARE: 'hogql-insight-live-compare', // owner: @mariusandra + HOGQL_IN_INSIGHT_SERIALIZATION: 'hogql-in-insight-serialization', // owner: @Twixes BI_VIZ: 'bi_viz', // owner: @Gilbert09 WEBHOOKS_DENYLIST: 'webhooks-denylist', // owner: #team-pipeline PERSONS_HOGQL_QUERY: 'persons-hogql-query', // owner: @mariusandra diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 478f0707d1a..868156528ec 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1200,7 +1200,8 @@ "type": "string" }, "operator": { - "$ref": "#/definitions/PropertyOperator" + "$ref": "#/definitions/PropertyOperator", + "default": "exact" }, "type": { "const": "event", diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 5c991e3fb1d..01708078b17 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -66,7 +66,6 @@ export enum NodeKind { SavedInsightNode = 'SavedInsightNode', InsightVizNode = 'InsightVizNode', - // New queries, not yet implemented TrendsQuery = 'TrendsQuery', FunnelsQuery = 'FunnelsQuery', RetentionQuery = 'RetentionQuery', diff --git a/frontend/src/types.ts b/frontend/src/types.ts index d94b7c85359..0bdf702a82e 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -666,6 +666,7 @@ interface BasePropertyFilter { /** Sync with plugin-server/src/types.ts */ export interface EventPropertyFilter extends BasePropertyFilter { type: PropertyFilterType.Event + /** @default 'exact' */ operator: PropertyOperator } diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 4945b638768..9b607b6222c 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -108,7 +108,6 @@ posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Argument posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "PathsFilter"; expected "str": "TrendsFilter" [dict-item] posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "LifecycleFilter"; expected "str": "TrendsFilter" [dict-item] posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "StickinessFilter"; expected "str": "TrendsFilter" [dict-item] -posthog/hogql_queries/legacy_compatibility/feature_flag.py:0: error: Item "AnonymousUser" of "User | AnonymousUser" has no attribute "email" [union-attr] posthog/api/utils.py:0: error: Incompatible types in assignment (expression has type "type[EventDefinition]", variable has type "type[EnterpriseEventDefinition]") [assignment] posthog/api/utils.py:0: error: Argument 1 to "UUID" has incompatible type "int | str"; expected "str | None" [arg-type] ee/billing/quota_limiting.py:0: error: List comprehension has incompatible type List[int]; expected List[str] [misc] @@ -301,7 +300,6 @@ posthog/queries/breakdown_props.py:0: error: Argument 1 to "translate_hogql" has posthog/queries/funnels/base.py:0: error: "HogQLContext" has no attribute "person_on_events_mode" [attr-defined] posthog/queries/funnels/base.py:0: error: Argument 1 to "translate_hogql" has incompatible type "str | int"; expected "str" [arg-type] ee/clickhouse/queries/funnels/funnel_correlation.py:0: error: Statement is unreachable [unreachable] -posthog/caching/calculate_results.py:0: error: Argument 3 to "process_query" has incompatible type "bool"; expected "LimitContext | None" [arg-type] posthog/api/person.py:0: error: Argument 1 to has incompatible type "*tuple[str, ...]"; expected "type[BaseRenderer]" [arg-type] posthog/api/person.py:0: error: Argument 1 to "loads" has incompatible type "str | None"; expected "str | bytes | bytearray" [arg-type] posthog/api/person.py:0: error: Argument "user" to "log_activity" has incompatible type "User | AnonymousUser"; expected "User | None" [arg-type] @@ -343,11 +341,6 @@ posthog/hogql_queries/insights/lifecycle_query_runner.py:0: note: Consider using posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Argument 1 to "sorted" has incompatible type "list[Any] | None"; expected "Iterable[Any]" [arg-type] posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery" has no attribute "select_from" [union-attr] posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Item "None" of "JoinExpr | Any | None" has no attribute "sample" [union-attr] -posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "PathFilter", variable has type "RetentionFilter") [assignment] -posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "StickinessFilter", variable has type "RetentionFilter") [assignment] -posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "Filter", variable has type "RetentionFilter") [assignment] -posthog/api/insight.py:0: error: Argument 1 to "is_insight_with_hogql_support" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type] -posthog/api/insight.py:0: error: Argument 1 to "process_insight" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type] posthog/api/insight.py:0: error: Argument 1 to has incompatible type "*tuple[str, ...]"; expected "type[BaseRenderer]" [arg-type] posthog/api/dashboards/dashboard.py:0: error: Metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases [misc] posthog/api/feature_flag.py:0: error: Metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases [misc] @@ -369,19 +362,6 @@ posthog/tasks/exports/test/test_export_utils.py:0: error: Function is missing a posthog/tasks/exports/test/test_csv_exporter_url_sanitising.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/tasks/exports/test/test_csv_exporter_url_sanitising.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/tasks/exports/test/test_csv_exporter_renders.py:0: error: Function is missing a type annotation [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a return type annotation [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: error: Missing return statement [empty-body] posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: note: If the method is meant to be abstract, use @abc.abstractmethod posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: error: Missing return statement [empty-body] @@ -531,6 +511,19 @@ posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 to "ensure_pendulum_datetime" has incompatible type "DateTime | Date | datetime | date | str | float | int | None"; expected "DateTime | Date | datetime | date | str | float | int" [arg-type] posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Item "None" of "DateTime | None" has no attribute "int_timestamp" [union-attr] posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 to "ensure_pendulum_datetime" has incompatible type "str | None"; expected "DateTime | Date | datetime | date | str | float | int" [arg-type] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a return type annotation [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get" [attr-defined] posthog/queries/trends/test/test_person.py:0: error: Invalid index type "int" for "HttpResponse"; expected type "str | bytes" [index] posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get" [attr-defined] diff --git a/package.json b/package.json index fc5fc302402..6039d555ad9 100644 --- a/package.json +++ b/package.json @@ -33,8 +33,8 @@ "build": "pnpm copy-scripts && pnpm build:esbuild", "build:esbuild": "node frontend/build.mjs", "schema:build": "pnpm run schema:build:json && pnpm run schema:build:python", - "schema:build:json": "ts-node bin/build-schema.mjs && prettier --write frontend/src/queries/schema.json", - "schema:build:python": "datamodel-codegen --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.10 --disable-timestamp --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum --input frontend/src/queries/schema.json --input-file-type jsonschema --output posthog/schema.py --output-model-type pydantic_v2.BaseModel && ruff format posthog/schema.py && ruff check --fix posthog/schema.py", + "schema:build:json": "ts-node bin/build-schema-json.mjs && prettier --write frontend/src/queries/schema.json", + "schema:build:python": "bash bin/build-schema-python.sh", "grammar:build": "npm run grammar:build:python && npm run grammar:build:cpp", "grammar:build:python": "cd posthog/hogql/grammar && antlr -Dlanguage=Python3 HogQLLexer.g4 && antlr -visitor -no-listener -Dlanguage=Python3 HogQLParser.g4", "grammar:build:cpp": "cd posthog/hogql/grammar && antlr -o ../../../hogql_parser -Dlanguage=Cpp HogQLLexer.g4 && antlr -o ../../../hogql_parser -visitor -no-listener -Dlanguage=Cpp HogQLParser.g4", diff --git a/posthog/api/insight.py b/posthog/api/insight.py index 528dc537679..36495a5469b 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -35,11 +35,7 @@ from posthog.api.shared import UserBasicSerializer from posthog.api.tagged_item import TaggedItemSerializerMixin, TaggedItemViewSetMixin from posthog.api.utils import format_paginated_url from posthog.auth import SharingAccessTokenAuthentication -from posthog.caching.fetch_from_cache import ( - InsightResult, - fetch_cached_insight_result, - synchronously_update_cache, -) +from posthog.caching.fetch_from_cache import InsightResult, fetch_cached_insight_result, synchronously_update_cache from posthog.caching.insights_api import should_refresh_insight from posthog.constants import ( INSIGHT, @@ -58,8 +54,8 @@ from posthog.helpers.multi_property_breakdown import ( from posthog.hogql.errors import ExposedHogQLError from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.apply_dashboard_filters import DATA_TABLE_LIKE_NODE_KINDS -from posthog.hogql_queries.legacy_compatibility.feature_flag import hogql_insights_enabled -from posthog.hogql_queries.legacy_compatibility.process_insight import is_insight_with_hogql_support, process_insight +from posthog.hogql_queries.legacy_compatibility.feature_flag import should_use_hogql_backend_in_insight_serialization +from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query from posthog.kafka_client.topics import KAFKA_METRICS_TIME_TO_SEE_DATA from posthog.models import DashboardTile, Filter, Insight, User from posthog.models.activity_logging.activity_log import ( @@ -510,7 +506,7 @@ class InsightSerializer(InsightBasicSerializer, UserPermissionsSerializerMixin): dashboard: Optional[Dashboard] = self.context.get("dashboard") representation["filters"] = instance.dashboard_filters(dashboard=dashboard) - representation["query"] = instance.dashboard_query(dashboard=dashboard) + representation["query"] = instance.get_effective_query(dashboard=dashboard) if "insight" not in representation["filters"] and not representation["query"]: representation["filters"]["insight"] = "TRENDS" @@ -521,14 +517,34 @@ class InsightSerializer(InsightBasicSerializer, UserPermissionsSerializerMixin): @lru_cache(maxsize=1) def insight_result(self, insight: Insight) -> InsightResult: + from posthog.caching.calculate_results import calculate_for_query_based_insight + dashboard = self.context.get("dashboard", None) dashboard_tile = self.dashboard_tile_from_context(insight, dashboard) - target = insight if dashboard is None else dashboard_tile - if hogql_insights_enabled(self.context.get("request", None).user) and is_insight_with_hogql_support( - target or insight + if insight.query: + try: + return calculate_for_query_based_insight( + insight, dashboard=dashboard, refresh_requested=refresh_requested_by_client(self.context["request"]) + ) + except ExposedHogQLError as e: + raise ValidationError(str(e)) + + if not self.context["request"].user.is_anonymous and should_use_hogql_backend_in_insight_serialization( + self.context["request"].user ): - return process_insight(target or insight, insight.team) + # TRICKY: As running `filters`-based insights on the HogQL-based engine is a transitional mechanism, + # we fake the insight being properly `query`-based. + # To prevent the lie from accidentally being saved to Postgres, we roll it back in the `finally` branch. + insight.query = filter_to_query(insight.filters).model_dump() + try: + return calculate_for_query_based_insight( + insight, dashboard=dashboard, refresh_requested=refresh_requested_by_client(self.context["request"]) + ) + except ExposedHogQLError as e: + raise ValidationError(str(e)) + finally: + insight.query = None is_shared = self.context.get("is_shared", False) refresh_insight_now, refresh_frequency = should_refresh_insight( @@ -539,10 +555,9 @@ class InsightSerializer(InsightBasicSerializer, UserPermissionsSerializerMixin): ) if refresh_insight_now: INSIGHT_REFRESH_INITIATED_COUNTER.labels(is_shared=is_shared).inc() - return synchronously_update_cache(insight, dashboard, refresh_frequency) + return synchronously_update_cache(insight, dashboard, refresh_frequency=refresh_frequency) - # :TODO: Clear up if tile can be null or not - return fetch_cached_insight_result(target or insight, refresh_frequency) + return fetch_cached_insight_result(dashboard_tile or insight, refresh_frequency) @lru_cache(maxsize=1) # each serializer instance should only deal with one insight/tile combo def dashboard_tile_from_context(self, insight: Insight, dashboard: Optional[Dashboard]) -> Optional[DashboardTile]: diff --git a/posthog/api/query.py b/posthog/api/query.py index 5309a96459d..197fe79f18e 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -3,6 +3,7 @@ import uuid from django.http import JsonResponse from drf_spectacular.utils import OpenApiResponse +from posthog.hogql_queries.query_runner import ExecutionMode from rest_framework import viewsets from rest_framework.decorators import action from rest_framework.exceptions import ValidationError, NotAuthenticated @@ -75,7 +76,13 @@ class QueryViewSet(TeamAndOrgViewSetMixin, PydanticModelMixin, viewsets.ViewSet) tag_queries(query=request.data["query"]) try: - result = process_query_model(self.team, data.query, refresh_requested=data.refresh) + result = process_query_model( + self.team, + data.query, + execution_mode=ExecutionMode.CALCULATION_ALWAYS + if data.refresh + else ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, + ) return Response(result) except (ExposedHogQLError, ExposedCHQueryError) as e: raise ValidationError(str(e), getattr(e, "code_name", None)) diff --git a/posthog/api/services/query.py b/posthog/api/services/query.py index 75d326afead..09d33759d02 100644 --- a/posthog/api/services/query.py +++ b/posthog/api/services/query.py @@ -11,7 +11,7 @@ from posthog.hogql.database.database import create_hogql_database, serialize_dat from posthog.hogql.autocomplete import get_hogql_autocomplete from posthog.hogql.metadata import get_hogql_metadata from posthog.hogql.modifiers import create_default_modifiers_for_team -from posthog.hogql_queries.query_runner import get_query_runner +from posthog.hogql_queries.query_runner import ExecutionMode, get_query_runner from posthog.models import Team from posthog.queries.time_to_see_data.serializers import SessionEventsQuerySerializer, SessionsQuerySerializer from posthog.queries.time_to_see_data.sessions import get_session_events, get_sessions @@ -59,8 +59,9 @@ QUERY_WITH_RUNNER_NO_CACHE = HogQLQuery | EventsQuery | ActorsQuery | SessionsTi def process_query( team: Team, query_json: dict, + *, limit_context: Optional[LimitContext] = None, - refresh_requested: Optional[bool] = False, + execution_mode: ExecutionMode = ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, ) -> dict: model = QuerySchemaRoot.model_validate(query_json) tag_queries(query=query_json) @@ -68,21 +69,22 @@ def process_query( team, model.root, limit_context=limit_context, - refresh_requested=refresh_requested, + execution_mode=execution_mode, ) def process_query_model( team: Team, query: BaseModel, # mypy has problems with unions and isinstance + *, limit_context: Optional[LimitContext] = None, - refresh_requested: Optional[bool] = False, + execution_mode: ExecutionMode = ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, ) -> dict: result: dict | BaseModel if isinstance(query, QUERY_WITH_RUNNER): # type: ignore query_runner = get_query_runner(query, team, limit_context=limit_context) - result = query_runner.run(refresh_requested=refresh_requested) + result = query_runner.run(execution_mode=execution_mode) elif isinstance(query, QUERY_WITH_RUNNER_NO_CACHE): # type: ignore query_runner = get_query_runner(query, team, limit_context=limit_context) result = query_runner.calculate() diff --git a/posthog/api/test/dashboards/test_dashboard.py b/posthog/api/test/dashboards/test_dashboard.py index 1f7cd4b533f..e4c91f45149 100644 --- a/posthog/api/test/dashboards/test_dashboard.py +++ b/posthog/api/test/dashboards/test_dashboard.py @@ -1232,7 +1232,14 @@ class TestDashboard(APIBaseTest, QueryMatchingTest): "tiles": [ { "type": "INSIGHT", - "query": {"kind": "a datatable"}, + "query": { + "kind": "DataTableNode", + "columns": ["person", "id", "created_at", "person.$delete"], + "source": { + "kind": "EventsQuery", + "select": ["*"], + }, + }, "filters": {"date_from": None}, "layouts": {}, } @@ -1277,8 +1284,15 @@ class TestDashboard(APIBaseTest, QueryMatchingTest): "name": None, "next_allowed_client_refresh": None, "order": None, - "query": {"kind": "a datatable"}, - "result": None, + "query": { + "kind": "DataTableNode", + "columns": ["person", "id", "created_at", "person.$delete"], + "source": { + "kind": "EventsQuery", + "select": ["*"], + }, + }, + "result": [], "saved": False, "short_id": ANY, "tags": [], diff --git a/posthog/api/test/test_insight.py b/posthog/api/test/test_insight.py index b13bf0d6f91..f707f0330b7 100644 --- a/posthog/api/test/test_insight.py +++ b/posthog/api/test/test_insight.py @@ -9,6 +9,7 @@ from zoneinfo import ZoneInfo from django.test import override_settings from django.utils import timezone from freezegun import freeze_time +from posthog.hogql.query import execute_hogql_query from rest_framework import status from posthog.api.test.dashboards import DashboardAPI @@ -27,7 +28,16 @@ from posthog.models import ( OrganizationMembership, Text, ) -from posthog.schema import DataTableNode, DataVisualizationNode, DateRange, HogQLFilters, HogQLQuery +from posthog.schema import ( + DataTableNode, + DataVisualizationNode, + DateRange, + EventPropertyFilter, + EventsNode, + HogQLFilters, + HogQLQuery, + TrendsQuery, +) from posthog.test.base import ( APIBaseTest, ClickhouseTestMixin, @@ -995,11 +1005,8 @@ class TestInsight(ClickhouseTestMixin, APIBaseTest, QueryMatchingTest): self.assertEqual(objects[0].filters["layout"], "horizontal") self.assertEqual(len(objects[0].short_id), 8) - @patch( - "posthog.api.insight.synchronously_update_cache", - wraps=synchronously_update_cache, - ) - def test_insight_refreshing(self, spy_update_insight_cache) -> None: + @patch("posthog.api.insight.synchronously_update_cache", wraps=synchronously_update_cache) + def test_insight_refreshing_legacy(self, spy_update_insight_cache) -> None: dashboard_id, _ = self.dashboard_api.create_dashboard({"filters": {"date_from": "-14d"}}) with freeze_time("2012-01-14T03:21:34.000Z"): @@ -1124,6 +1131,153 @@ class TestInsight(ClickhouseTestMixin, APIBaseTest, QueryMatchingTest): ], ) + @patch("posthog.hogql_queries.insights.trends.trends_query_runner.execute_hogql_query", wraps=execute_hogql_query) + def test_insight_refreshing_query(self, spy_execute_hogql_query) -> None: + dashboard_id, _ = self.dashboard_api.create_dashboard({"filters": {"date_from": "-14d"}}) + + with freeze_time("2012-01-14T03:21:34.000Z"): + _create_event( + team=self.team, + event="$pageview", + distinct_id="1", + properties={"prop": "val"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="2", + properties={"prop": "another_val"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="2", + properties={"prop": "val", "another": "never_return_this"}, + ) + + query_dict = TrendsQuery( + series=[ + EventsNode( + event="$pageview", + properties=[EventPropertyFilter(key="another", value="never_return_this", operator="is_not")], + ) + ] + ).model_dump() + + with freeze_time("2012-01-15T04:01:34.000Z"): + response = self.client.post( + f"/api/projects/{self.team.id}/insights", + data={ + "query": query_dict, + "dashboards": [dashboard_id], + }, + ).json() + self.assertNotIn("code", response) # Watching out for an error code + self.assertEqual(response["last_refresh"], None) + insight_id = response["id"] + + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true").json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 1) + self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 0]) + self.assertEqual(response["last_refresh"], "2012-01-15T04:01:34Z") + self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") + self.assertFalse(response["is_cached"]) + + with freeze_time("2012-01-15T05:01:34.000Z"): + _create_event(team=self.team, event="$pageview", distinct_id="1") + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true").json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 2) + self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1]) + self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") + self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change + self.assertFalse(response["is_cached"]) + + with freeze_time("2012-01-15T05:17:34.000Z"): + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight_id}/").json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 2) + self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1]) + self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") # Using cached result + self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change + self.assertTrue(response["is_cached"]) + + with freeze_time("2012-01-15T05:17:39.000Z"): + # Make sure the /query/ endpoint reuses the same cached result + response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": query_dict}).json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 2) + self.assertEqual(response["results"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1]) + self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") # Using cached result + self.assertTrue(response["is_cached"]) + + with freeze_time("2012-01-16T05:01:34.000Z"): + # load it in the context of the dashboard, so has last 14 days as filter + response = self.client.get( + f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true&from_dashboard={dashboard_id}" + ).json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 3) + self.assertEqual( + response["result"][0]["data"], + [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 2.0, + 1.0, + 0.0, + ], + ) + self.assertEqual(response["last_refresh"], "2012-01-16T05:01:34Z") + self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change + self.assertFalse(response["is_cached"]) + + #  Test property filter + + dashboard = Dashboard.objects.get(pk=dashboard_id) + dashboard.filters = { + "properties": [{"key": "prop", "value": "val"}], + "date_from": "-14d", + } + dashboard.save() + with freeze_time("2012-01-16T05:01:34.000Z"): + response = self.client.get( + f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true&from_dashboard={dashboard_id}" + ).json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 4) + self.assertEqual( + response["result"][0]["data"], + [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, + 0.0, + 0.0, + ], + ) + def test_dashboard_filters_applied_to_data_table_node(self): dashboard_id, _ = self.dashboard_api.create_dashboard( {"name": "the dashboard", "filters": {"date_from": "-180d"}} @@ -1973,7 +2127,7 @@ class TestInsight(ClickhouseTestMixin, APIBaseTest, QueryMatchingTest): "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -2014,7 +2168,7 @@ class TestInsight(ClickhouseTestMixin, APIBaseTest, QueryMatchingTest): "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -2953,22 +3107,19 @@ class TestInsight(ClickhouseTestMixin, APIBaseTest, QueryMatchingTest): def test_insight_with_filters_via_hogql(self) -> None: filter_dict = {"insight": "LIFECYCLE", "events": [{"id": "$pageview"}]} - Insight.objects.create( + insight = Insight.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team, short_id="xyz123", ) # fresh response - response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123") + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight.id}/?refresh=true") self.assertEqual(response.status_code, status.HTTP_200_OK) - - self.assertEqual(len(response.json()["results"]), 1) - self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) + self.assertEqual(response.json()["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) # cached response - response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123") + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight.id}/?refresh=true") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(len(response.json()["results"]), 1) - self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) + self.assertEqual(response.json()["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) diff --git a/posthog/api/test/test_insight_query.py b/posthog/api/test/test_insight_query.py index 79a1785c284..6279999bbef 100644 --- a/posthog/api/test/test_insight_query.py +++ b/posthog/api/test/test_insight_query.py @@ -25,7 +25,7 @@ class TestInsight(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest, QueryMatc "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -55,7 +55,7 @@ class TestInsight(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest, QueryMatc "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -82,15 +82,8 @@ class TestInsight(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest, QueryMatc "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -105,15 +98,8 @@ class TestInsight(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest, QueryMatc "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -178,15 +164,6 @@ class TestInsight(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest, QueryMatc "name": "$pageview", "custom_name": "Views", "event": "$pageview", - "properties": [ - { - "type": "event", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - }, - {"type": "cohort", "key": "id", "value": 2}, - ], "limit": 100, } ], @@ -212,15 +189,8 @@ class TestInsight(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest, QueryMatc "query": { "kind": "DataTableNode", "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -236,15 +206,8 @@ class TestInsight(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest, QueryMatc "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -266,15 +229,8 @@ class TestInsight(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest, QueryMatc "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, diff --git a/posthog/caching/calculate_results.py b/posthog/caching/calculate_results.py index 1dd9d805385..ae4d6d8104f 100644 --- a/posthog/caching/calculate_results.py +++ b/posthog/caching/calculate_results.py @@ -1,5 +1,6 @@ -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from posthog.api.services.query import ExecutionMode import structlog from sentry_sdk import capture_exception @@ -29,10 +30,7 @@ from posthog.models.filters import PathFilter from posthog.models.filters.stickiness_filter import StickinessFilter from posthog.models.filters.utils import get_filter from posthog.models.insight import generate_insight_cache_key -from posthog.queries.funnels import ( - ClickhouseFunnelTimeToConvert, - ClickhouseFunnelTrends, -) +from posthog.queries.funnels import ClickhouseFunnelTimeToConvert, ClickhouseFunnelTrends from posthog.queries.funnels.utils import get_funnel_order_class from posthog.queries.paths import Paths from posthog.queries.retention import Retention @@ -40,6 +38,9 @@ from posthog.queries.stickiness import Stickiness from posthog.queries.trends.trends import Trends from posthog.types import FilterType +if TYPE_CHECKING: + from posthog.caching.fetch_from_cache import InsightResult + CACHE_TYPE_TO_INSIGHT_CLASS = { CacheType.TRENDS: Trends, CacheType.STICKINESS: Stickiness, @@ -54,7 +55,7 @@ def calculate_cache_key(target: Union[DashboardTile, Insight]) -> Optional[str]: insight = target if isinstance(target, Insight) else target.insight dashboard = target.dashboard if isinstance(target, DashboardTile) else None - if insight is None or (not insight.filters and insight.query is None): + if insight is None or not insight.filters: return None return generate_insight_cache_key(insight, dashboard) @@ -106,57 +107,59 @@ def get_cache_type(cacheable: Optional[FilterType] | Optional[Dict]) -> CacheTyp raise Exception("Could not determine cache type. Must provide a filter or a query") -def calculate_result_by_insight( - team: Team, insight: Insight, dashboard: Optional[Dashboard] -) -> Tuple[str, str, List | Dict]: - """ - Calculates the result for an insight. If the insight is query based, - it will use the query to calculate the result. Even if there is a filter present on the insight - - Eventually there will be no filter-based insights left and calculate_for_query_based_insight will be - in-lined into this function - """ - if insight.query is not None: - return calculate_for_query_based_insight(team, insight, dashboard) - else: - return calculate_for_filter_based_insight(team, insight, dashboard) - - def calculate_for_query_based_insight( - team: Team, insight: Insight, dashboard: Optional[Dashboard] -) -> Tuple[str, str, List | Dict]: - cache_key = generate_insight_cache_key(insight, dashboard) - cache_type = get_cache_type(insight.query) + insight: Insight, *, dashboard: Optional[Dashboard] = None, refresh_requested: bool +) -> "InsightResult": + from posthog.api.services.query import process_query + from posthog.caching.fetch_from_cache import InsightResult, NothingInCacheResult - tag_queries( - team_id=team.pk, - insight_id=insight.pk, - cache_type=cache_type, - cache_key=cache_key, + tag_queries(team_id=insight.team_id, insight_id=insight.pk) + if dashboard: + tag_queries(dashboard_id=dashboard.pk) + + effective_query = insight.get_effective_query(dashboard=dashboard) + assert effective_query is not None + + response = process_query( + insight.team, + effective_query, + execution_mode=ExecutionMode.CALCULATION_ALWAYS + if refresh_requested + else ExecutionMode.CACHE_ONLY_NEVER_CALCULATE, ) - # local import to avoid circular reference - from posthog.api.services.query import process_query + if "results" not in response: + # Translating `CacheMissResponse` to legacy insights shape + return NothingInCacheResult(cache_key=response.get("cache_key")) - # TODO need to properly check that hogql is enabled? - return cache_key, cache_type, process_query(team, insight.query, True) + return InsightResult( + # Translating `QueryResponse` to legacy insights shape + # Only `results` is guaranteed even for non-insight queries, such as `EventsQueryResponse` + result=response["results"], + last_refresh=response.get("last_refresh"), + cache_key=response.get("cache_key"), + is_cached=response.get("is_cached", False), + timezone=response.get("timezone"), + next_allowed_client_refresh=response.get("next_allowed_client_refresh"), + timings=response.get("timings"), + ) def calculate_for_filter_based_insight( - team: Team, insight: Insight, dashboard: Optional[Dashboard] + insight: Insight, dashboard: Optional[Dashboard] ) -> Tuple[str, str, List | Dict]: - filter = get_filter(data=insight.dashboard_filters(dashboard), team=team) + filter = get_filter(data=insight.dashboard_filters(dashboard), team=insight.team) cache_key = generate_insight_cache_key(insight, dashboard) cache_type = get_cache_type(filter) tag_queries( - team_id=team.pk, + team_id=insight.team_id, insight_id=insight.pk, cache_type=cache_type, cache_key=cache_key, ) - return cache_key, cache_type, calculate_result_by_cache_type(cache_type, filter, team) + return cache_key, cache_type, calculate_result_by_cache_type(cache_type, filter, insight.team) def calculate_result_by_cache_type(cache_type: CacheType, filter: Filter, team: Team) -> List[Dict[str, Any]]: diff --git a/posthog/caching/fetch_from_cache.py b/posthog/caching/fetch_from_cache.py index 43e847e747a..fcbeb0b72e3 100644 --- a/posthog/caching/fetch_from_cache.py +++ b/posthog/caching/fetch_from_cache.py @@ -5,10 +5,7 @@ from typing import Any, List, Optional, Union from django.utils.timezone import now from prometheus_client import Counter -from posthog.caching.calculate_results import ( - calculate_cache_key, - calculate_result_by_insight, -) +from posthog.caching.calculate_results import calculate_cache_key, calculate_for_filter_based_insight from posthog.caching.insight_cache import update_cached_state from posthog.models import DashboardTile, Insight from posthog.models.dashboard import Dashboard @@ -83,7 +80,7 @@ def synchronously_update_cache( dashboard: Optional[Dashboard], refresh_frequency: Optional[timedelta] = None, ) -> InsightResult: - cache_key, cache_type, result = calculate_result_by_insight(team=insight.team, insight=insight, dashboard=dashboard) + cache_key, cache_type, result = calculate_for_filter_based_insight(insight, dashboard) timestamp = now() next_allowed_client_refresh = timestamp + refresh_frequency if refresh_frequency else None diff --git a/posthog/caching/insight_cache.py b/posthog/caching/insight_cache.py index b2f14eab178..d73486234df 100644 --- a/posthog/caching/insight_cache.py +++ b/posthog/caching/insight_cache.py @@ -12,8 +12,8 @@ from prometheus_client import Counter from sentry_sdk.api import capture_exception from statshog.defaults.django import statsd -from posthog.caching.calculate_results import calculate_result_by_insight -from posthog.models import Dashboard, Insight, InsightCachingState, Team +from posthog.caching.calculate_results import calculate_for_filter_based_insight +from posthog.models import Dashboard, Insight, InsightCachingState from posthog.models.instance_setting import get_instance_setting from posthog.tasks.tasks import update_cache_task @@ -90,13 +90,12 @@ def update_cache(caching_state_id: UUID): return insight, dashboard = _extract_insight_dashboard(caching_state) - team: Team = insight.team start_time = perf_counter() exception = cache_key = cache_type = None metadata = { - "team_id": team.pk, + "team_id": insight.team_id, "insight_id": insight.pk, "dashboard_id": dashboard.pk if dashboard else None, "last_refresh": caching_state.last_refresh, @@ -104,7 +103,7 @@ def update_cache(caching_state_id: UUID): } try: - cache_key, cache_type, result = calculate_result_by_insight(team=team, insight=insight, dashboard=dashboard) + cache_key, cache_type, result = calculate_for_filter_based_insight(insight=insight, dashboard=dashboard) except Exception as err: capture_exception(err, metadata) exception = err diff --git a/posthog/caching/test/test_insight_cache.py b/posthog/caching/test/test_insight_cache.py index 269aebf8878..9de2053f6c2 100644 --- a/posthog/caching/test/test_insight_cache.py +++ b/posthog/caching/test/test_insight_cache.py @@ -165,16 +165,15 @@ def test_update_cache_updates_identical_cache_keys(team: Team, user: User, cache @pytest.mark.django_db @freeze_time("2020-01-04T13:01:01Z") @patch("posthog.caching.insight_cache.update_cache_task") -@patch("posthog.caching.insight_cache.calculate_result_by_insight") +@patch("posthog.caching.insight_cache.calculate_for_filter_based_insight", side_effect=Exception()) def test_update_cache_when_calculation_fails( - spy_calculate_result_by_insight, + spy_calculate_for_filter_based_insight, spy_update_cache_task, team: Team, user: User, cache, ): caching_state = create_insight_caching_state(team, user, refresh_attempt=1) - spy_calculate_result_by_insight.side_effect = Exception() update_cache(caching_state.pk) @@ -190,8 +189,8 @@ def test_update_cache_when_calculation_fails( @pytest.mark.django_db @freeze_time("2020-01-04T13:01:01Z") -@patch("posthog.caching.insight_cache.calculate_result_by_insight") -def test_update_cache_when_recently_refreshed(spy_calculate_result_by_insight, team: Team, user: User): +@patch("posthog.caching.insight_cache.calculate_for_filter_based_insight") +def test_update_cache_when_recently_refreshed(spy_calculate_for_filter_based_insight, team: Team, user: User): caching_state = create_insight_caching_state( team, user, last_refresh=timedelta(hours=1), target_cache_age=timedelta(days=1) ) @@ -200,7 +199,7 @@ def test_update_cache_when_recently_refreshed(spy_calculate_result_by_insight, t updated_caching_state = InsightCachingState.objects.get(team=team) - assert spy_calculate_result_by_insight.call_count == 0 + assert spy_calculate_for_filter_based_insight.call_count == 0 assert updated_caching_state.last_refresh == caching_state.last_refresh diff --git a/posthog/clickhouse/client/execute_async.py b/posthog/clickhouse/client/execute_async.py index eeed13ce5ee..2a2e762d5aa 100644 --- a/posthog/clickhouse/client/execute_async.py +++ b/posthog/clickhouse/client/execute_async.py @@ -83,7 +83,7 @@ def execute_process_query( ): manager = QueryStatusManager(query_id, team_id) - from posthog.api.services.query import process_query + from posthog.api.services.query import process_query, ExecutionMode from posthog.models import Team team = Team.objects.get(pk=team_id) @@ -103,7 +103,12 @@ def execute_process_query( try: tag_queries(client_query_id=query_id, team_id=team_id, user_id=user_id) results = process_query( - team=team, query_json=query_json, limit_context=limit_context, refresh_requested=refresh_requested + team=team, + query_json=query_json, + limit_context=limit_context, + execution_mode=ExecutionMode.CALCULATION_ALWAYS + if refresh_requested + else ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, ) logger.info("Got results for team %s query %s", team_id, query_id) query_status.complete = True diff --git a/posthog/hogql_queries/apply_dashboard_filters.py b/posthog/hogql_queries/apply_dashboard_filters.py index 9506c3704a4..2b1b6dc7b89 100644 --- a/posthog/hogql_queries/apply_dashboard_filters.py +++ b/posthog/hogql_queries/apply_dashboard_filters.py @@ -1,3 +1,4 @@ +from sentry_sdk import capture_exception from posthog.hogql_queries.query_runner import get_query_runner from posthog.models import Team from posthog.schema import DashboardFilter, NodeKind @@ -16,9 +17,11 @@ def apply_dashboard_filters(query: dict, filters: dict, team: Team) -> dict: try: query_runner = get_query_runner(query, team) except ValueError: + capture_exception() return query try: return query_runner.apply_dashboard_filters(DashboardFilter(**filters)).dict() except NotImplementedError: # TODO when we implement apply_dashboard_filters on more query runners, we can remove the try/catch + capture_exception() return query diff --git a/posthog/hogql_queries/insights/test/test_paths_query_runner.py b/posthog/hogql_queries/insights/test/test_paths_query_runner.py index 1dd96a36386..b74102ba705 100644 --- a/posthog/hogql_queries/insights/test/test_paths_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_paths_query_runner.py @@ -6,6 +6,7 @@ from django.utils.timezone import now from freezegun import freeze_time from posthog.hogql_queries.insights.paths_query_runner import PathsQueryRunner +from posthog.hogql_queries.query_runner import CachedQueryResponse from posthog.models import Team from posthog.test.base import ( APIBaseTest, @@ -153,6 +154,7 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(response[0]["source"], "1_/", response) @@ -183,6 +185,8 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 4) date_to = now() @@ -196,6 +200,8 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 4) date_from = now() + relativedelta(days=7) @@ -209,6 +215,8 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 0) date_to = now() - relativedelta(days=7) @@ -222,6 +230,8 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 0) date_from = now() - relativedelta(days=7) @@ -238,6 +248,7 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 4) # Test account filter @@ -253,6 +264,8 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 3) date_from = now() + relativedelta(days=7) @@ -268,6 +281,8 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 0) def test_custom_event_paths(self): @@ -366,6 +381,7 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_custom_event_1", response) @@ -481,6 +497,7 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_custom_event_1!", response) @@ -587,6 +604,7 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_/", response) @@ -707,6 +725,7 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_/") @@ -850,6 +869,7 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(len(response), 5) @@ -870,6 +890,8 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(len(response), 5) @@ -889,6 +911,8 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(len(response), 3) @@ -948,6 +972,8 @@ class TestPaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(response[0]["source"], "1_/") diff --git a/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py b/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py index 96ae1ab49eb..f7d01b9ec9d 100644 --- a/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py +++ b/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py @@ -16,6 +16,7 @@ from posthog.constants import ( ) from posthog.hogql_queries.actors_query_runner import ActorsQueryRunner from posthog.hogql_queries.insights.paths_query_runner import PathsQueryRunner +from posthog.hogql_queries.query_runner import CachedQueryResponse from posthog.models.filters import Filter, PathFilter from posthog.models.group.util import create_group from posthog.models.group_type_mapping import GroupTypeMapping @@ -152,6 +153,7 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): with freeze_time("2012-01-7T03:21:34.000Z"): filter = {"stepLimit": 2} result = PathsQueryRunner(query={"kind": "PathsQuery", "pathsFilter": filter}, team=self.team).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -171,6 +173,7 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): with freeze_time("2012-01-7T03:21:34.000Z"): filter = {"stepLimit": 3} result = PathsQueryRunner(query={"kind": "PathsQuery", "pathsFilter": filter}, team=self.team).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -195,6 +198,7 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): with freeze_time("2012-01-7T03:21:34.000Z"): filter = {"stepLimit": 4} result = PathsQueryRunner(query={"kind": "PathsQuery", "pathsFilter": filter}, team=self.team).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -307,6 +311,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -388,6 +394,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -1840,6 +1848,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -1903,6 +1913,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2045,6 +2057,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2077,6 +2091,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2109,6 +2125,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2142,6 +2160,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2265,6 +2285,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2293,6 +2315,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(len(response), 6) @@ -2390,6 +2414,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2458,6 +2484,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2509,6 +2537,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2604,6 +2634,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2712,6 +2744,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2876,6 +2910,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): query=paths_query.copy(), team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2898,6 +2934,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): query=paths_query, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3080,6 +3118,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3185,6 +3225,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3467,6 +3509,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3701,6 +3745,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3751,6 +3797,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3801,6 +3849,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3939,6 +3989,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): @@ -3990,6 +4042,7 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -4040,6 +4093,7 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -4141,6 +4195,8 @@ class TestClickhousePaths(ClickhouseTestMixin, APIBaseTest): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 3e62d51a621..8629d17ec92 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -818,8 +818,8 @@ class TrendsQueryRunner(QueryRunner): return TrendsDisplay(display) def apply_dashboard_filters(self, *args, **kwargs) -> RunnableQueryNode: + updated_query = super().apply_dashboard_filters(*args, **kwargs) # Remove any set breakdown limit for display on the dashboard - if self.query.breakdownFilter: - self.query.breakdownFilter.breakdown_limit = None - - return self.query + if updated_query.breakdownFilter: + updated_query.breakdownFilter.breakdown_limit = None + return updated_query diff --git a/posthog/hogql_queries/legacy_compatibility/feature_flag.py b/posthog/hogql_queries/legacy_compatibility/feature_flag.py index 2c1708223d9..69e08ea5aa9 100644 --- a/posthog/hogql_queries/legacy_compatibility/feature_flag.py +++ b/posthog/hogql_queries/legacy_compatibility/feature_flag.py @@ -1,26 +1,16 @@ -from typing import cast import posthoganalytics from django.conf import settings -from posthog.cloud_utils import is_cloud from posthog.models.user import User -from django.contrib.auth.models import AnonymousUser -def hogql_insights_enabled(user: User | AnonymousUser) -> bool: +def should_use_hogql_backend_in_insight_serialization(user: User) -> bool: if settings.HOGQL_INSIGHTS_OVERRIDE is not None: return settings.HOGQL_INSIGHTS_OVERRIDE - # on PostHog Cloud, use the feature flag - if is_cloud(): - if not hasattr(user, "distinct_id"): # exclude api endpoints that don't have auth from the flag - return False - - return posthoganalytics.feature_enabled( - "hogql-insights", - cast(str, user.distinct_id), - person_properties={"email": user.email}, - only_evaluate_locally=True, - send_feature_flag_events=False, - ) - else: - return False + return posthoganalytics.feature_enabled( + "hogql-in-insight-serialization", + user.distinct_id, + person_properties={"email": user.email}, + only_evaluate_locally=True, + send_feature_flag_events=False, + ) diff --git a/posthog/hogql_queries/legacy_compatibility/process_insight.py b/posthog/hogql_queries/legacy_compatibility/process_insight.py deleted file mode 100644 index 074128cf86b..00000000000 --- a/posthog/hogql_queries/legacy_compatibility/process_insight.py +++ /dev/null @@ -1,51 +0,0 @@ -from posthog.caching.fetch_from_cache import InsightResult -from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query -from posthog.hogql_queries.insights.lifecycle_query_runner import LifecycleQueryRunner -from posthog.hogql_queries.query_runner import CachedQueryResponse -from posthog.models.filters.filter import Filter as LegacyFilter -from posthog.models.filters.path_filter import PathFilter as LegacyPathFilter -from posthog.models.filters.retention_filter import RetentionFilter as LegacyRetentionFilter -from posthog.models.filters.stickiness_filter import StickinessFilter as LegacyStickinessFilter -from posthog.models.insight import Insight -from posthog.models.team.team import Team -from posthog.types import InsightQueryNode - - -# sync with frontend/src/queries/utils.ts -def is_insight_with_hogql_support(insight: Insight): - if insight.filters.get("insight") == "LIFECYCLE": - return True - else: - return False - - -def _insight_to_query(insight: Insight, team: Team) -> InsightQueryNode: - if insight.filters.get("insight") == "RETENTION": - filter = LegacyRetentionFilter(data=insight.filters, team=team) - elif insight.filters.get("insight") == "PATHS": - filter = LegacyPathFilter(data=insight.filters, team=team) - elif insight.filters.get("insight") == "STICKINESS": - filter = LegacyStickinessFilter(data=insight.filters, team=team) - else: - filter = LegacyFilter(data=insight.filters, team=team) - return filter_to_query(filter.to_dict()) - - -def _cached_response_to_insight_result(response: CachedQueryResponse) -> InsightResult: - response_dict = response.model_dump() - result_keys = InsightResult.__annotations__.keys() - - # replace 'result' with 'results' for schema compatibility - response_keys = ["results" if key == "result" else key for key in result_keys] - - # use only the keys of the response that are also present in the result - result = InsightResult( - **{result_key: response_dict[response_key] for result_key, response_key in zip(result_keys, response_keys)} - ) - return result - - -def process_insight(insight: Insight, team: Team) -> InsightResult: - query = _insight_to_query(insight, team) - response = LifecycleQueryRunner(query=query, team=team).run(refresh_requested=False) - return _cached_response_to_insight_result(response) diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index 25a903e8393..bb060e220e3 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -1,11 +1,13 @@ from abc import ABC, abstractmethod from datetime import datetime +from enum import IntEnum from typing import Any, Generic, List, Optional, Type, Dict, TypeVar, Union, Tuple, cast, TypeGuard from django.conf import settings from django.core.cache import cache from prometheus_client import Counter from pydantic import BaseModel, ConfigDict +from sentry_sdk import capture_exception, push_scope from posthog.clickhouse.query_tagging import tag_queries from posthog.hogql import ast @@ -17,9 +19,12 @@ from posthog.hogql.timings import HogQLTimings from posthog.metrics import LABEL_TEAM_ID from posthog.models import Team from posthog.schema import ( + DateRange, + FilterLogicalOperator, FunnelCorrelationActorsQuery, FunnelCorrelationQuery, FunnelsActorsQuery, + PropertyGroupFilter, TrendsQuery, FunnelsQuery, RetentionQuery, @@ -57,6 +62,15 @@ QUERY_CACHE_HIT_COUNTER = Counter( DataT = TypeVar("DataT") +class ExecutionMode(IntEnum): + CALCULATION_ALWAYS = 2 + """Always recalculate.""" + RECENT_CACHE_CALCULATE_IF_STALE = 1 + """Use cache, unless the results are missing or stale.""" + CACHE_ONLY_NEVER_CALCULATE = 0 + """Do not initiate calculation.""" + + class QueryResponse(BaseModel, Generic[DataT]): model_config = ConfigDict( extra="forbid", @@ -84,6 +98,13 @@ class CachedQueryResponse(QueryResponse): timezone: str +class CacheMissResponse(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + cache_key: str + + RunnableQueryNode = Union[ TrendsQuery, FunnelsQuery, @@ -266,9 +287,12 @@ def get_query_runner( raise ValueError(f"Can't get a runner for an unknown query kind: {kind}") -class QueryRunner(ABC): - query: RunnableQueryNode - query_type: Type[RunnableQueryNode] +Q = TypeVar("Q", bound=RunnableQueryNode) + + +class QueryRunner(ABC, Generic[Q]): + query: Q + query_type: Type[Q] team: Team timings: HogQLTimings modifiers: HogQLQueryModifiers @@ -276,7 +300,7 @@ class QueryRunner(ABC): def __init__( self, - query: RunnableQueryNode | BaseModel | Dict[str, Any], + query: Q | BaseModel | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None, modifiers: Optional[HogQLQueryModifiers] = None, @@ -293,7 +317,7 @@ class QueryRunner(ABC): assert isinstance(query, self.query_type) self.query = query - def is_query_node(self, data) -> TypeGuard[RunnableQueryNode]: + def is_query_node(self, data) -> TypeGuard[Q]: return isinstance(data, self.query_type) @abstractmethod @@ -302,21 +326,48 @@ class QueryRunner(ABC): # Due to the way schema.py is generated, we don't have a good inheritance story here. raise NotImplementedError() - def run(self, refresh_requested: Optional[bool] = None) -> CachedQueryResponse: + def run( + self, execution_mode: ExecutionMode = ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE + ) -> CachedQueryResponse | CacheMissResponse: cache_key = f"{self._cache_key()}_{self.limit_context or LimitContext.QUERY}" tag_queries(cache_key=cache_key) - if not refresh_requested: - cached_response = get_safe_cache(cache_key) - if cached_response: + if execution_mode != ExecutionMode.CALCULATION_ALWAYS: + # Let's look in the cache first + cached_response: CachedQueryResponse | CacheMissResponse + cached_response_candidate = get_safe_cache(cache_key) + match cached_response_candidate: + case CachedQueryResponse(): + cached_response = cached_response_candidate + cached_response_candidate.is_cached = True + case None: + cached_response = CacheMissResponse(cache_key=cache_key) + case _: + # Whatever's in cache is malformed, so let's treat is as non-existent + cached_response = CacheMissResponse(cache_key=cache_key) + with push_scope() as scope: + scope.set_tag("cache_key", cache_key) + capture_exception( + ValueError(f"Cached response is of unexpected type {type(cached_response)}, ignoring it") + ) + + if isinstance(cached_response, CachedQueryResponse): if not self._is_stale(cached_response): QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="hit").inc() - cached_response.is_cached = True + # We have a valid result that's fresh enough, let's return it return cached_response else: QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="stale").inc() + # We have a stale result. If we aren't allowed to calculate, let's still return it + # – otherwise let's proceed to calculation + if execution_mode == ExecutionMode.CACHE_ONLY_NEVER_CALCULATE: + return cached_response else: QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="miss").inc() + # We have no cached result. If we aren't allowed to calculate, let's return the cache miss + # – otherwise let's proceed to calculation + if execution_mode == ExecutionMode.CACHE_ONLY_NEVER_CALCULATE: + return cached_response fresh_response_dict = cast(QueryResponse, self.calculate()).model_dump() fresh_response_dict["is_cached"] = False @@ -369,5 +420,28 @@ class QueryRunner(ABC): def _refresh_frequency(self): raise NotImplementedError() - def apply_dashboard_filters(self, dashboard_filter: DashboardFilter) -> RunnableQueryNode: - raise NotImplementedError() + def apply_dashboard_filters(self, dashboard_filter: DashboardFilter) -> Q: + # The default logic below applies to all insights and a lot of other queries + # Notable exception: `HogQLQuery`, which has `properties` and `dateRange` within `HogQLFilters` + if hasattr(self.query, "properties") and hasattr(self.query, "dateRange"): + query_update: Dict[str, Any] = {} + if dashboard_filter.properties: + if self.query.properties: + query_update["properties"] = PropertyGroupFilter( + type=FilterLogicalOperator.AND, values=[self.query.properties, dashboard_filter.properties] + ) + else: + query_update["properties"] = dashboard_filter.properties + if dashboard_filter.date_from or dashboard_filter.date_to: + date_range_update = {} + if dashboard_filter.date_from: + date_range_update["date_from"] = dashboard_filter.date_from + if dashboard_filter.date_to: + date_range_update["date_to"] = dashboard_filter.date_to + if self.query.dateRange: + query_update["dateRange"] = self.query.dateRange.model_copy(update=date_range_update) + else: + query_update["dateRange"] = DateRange(**date_range_update) + return cast(Q, self.query.model_copy(update=query_update)) # Shallow copy! + + raise NotImplementedError(f"{self.query.__class__.__name__} does not support dashboard filters out of the box") diff --git a/posthog/hogql_queries/test/test_events_query_runner.py b/posthog/hogql_queries/test/test_events_query_runner.py index 9aab4ee14a9..7c8c62c5fb0 100644 --- a/posthog/hogql_queries/test/test_events_query_runner.py +++ b/posthog/hogql_queries/test/test_events_query_runner.py @@ -5,6 +5,7 @@ from freezegun import freeze_time from posthog.hogql import ast from posthog.hogql.ast import CompareOperationOp from posthog.hogql_queries.events_query_runner import EventsQueryRunner +from posthog.hogql_queries.query_runner import CachedQueryResponse from posthog.models import Person, Team from posthog.models.organization import Organization from posthog.schema import ( @@ -84,7 +85,10 @@ class TestEventsQueryRunner(ClickhouseTestMixin, APIBaseTest): ) runner = EventsQueryRunner(query=query, team=self.team) - return runner.run().results + response = runner.run() + assert isinstance(response, CachedQueryResponse) + results = response.results + return results def test_is_not_set_boolean(self): # see https://github.com/PostHog/posthog/issues/18030 diff --git a/posthog/hogql_queries/test/test_query_runner.py b/posthog/hogql_queries/test/test_query_runner.py index 40c991ec1d0..4905feaa2ea 100644 --- a/posthog/hogql_queries/test/test_query_runner.py +++ b/posthog/hogql_queries/test/test_query_runner.py @@ -7,6 +7,9 @@ from freezegun import freeze_time from pydantic import BaseModel from posthog.hogql_queries.query_runner import ( + CacheMissResponse, + CachedQueryResponse, + ExecutionMode, QueryResponse, QueryRunner, ) @@ -125,23 +128,31 @@ class TestQueryRunner(BaseTest): runner = TestQueryRunner(query={"some_attr": "bla"}, team=self.team) with freeze_time(datetime(2023, 2, 4, 13, 37, 42)): + # in cache-only mode, returns cache miss response if uncached + response = runner.run(execution_mode=ExecutionMode.CACHE_ONLY_NEVER_CALCULATE) + self.assertIsInstance(response, CacheMissResponse) + # returns fresh response if uncached - response = runner.run(refresh_requested=False) + response = runner.run(execution_mode=ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE) + self.assertIsInstance(response, CachedQueryResponse) self.assertEqual(response.is_cached, False) self.assertEqual(response.last_refresh, "2023-02-04T13:37:42Z") self.assertEqual(response.next_allowed_client_refresh, "2023-02-04T13:41:42Z") # returns cached response afterwards - response = runner.run(refresh_requested=False) + response = runner.run(execution_mode=ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE) + self.assertIsInstance(response, CachedQueryResponse) self.assertEqual(response.is_cached, True) # return fresh response if refresh requested - response = runner.run(refresh_requested=True) + response = runner.run(execution_mode=ExecutionMode.CALCULATION_ALWAYS) + self.assertIsInstance(response, CachedQueryResponse) self.assertEqual(response.is_cached, False) with freeze_time(datetime(2023, 2, 4, 13, 37 + 11, 42)): # returns fresh response if stale - response = runner.run(refresh_requested=False) + response = runner.run(execution_mode=ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE) + self.assertIsInstance(response, CachedQueryResponse) self.assertEqual(response.is_cached, False) def test_modifier_passthrough(self): diff --git a/posthog/models/insight.py b/posthog/models/insight.py index a3057cdb11c..11af57bab3e 100644 --- a/posthog/models/insight.py +++ b/posthog/models/insight.py @@ -169,12 +169,11 @@ class Insight(models.Model): else: return self.filters - def dashboard_query(self, dashboard: Optional[Dashboard]) -> Optional[dict]: + def get_effective_query(self, *, dashboard: Optional[Dashboard]) -> Optional[dict]: + from posthog.hogql_queries.apply_dashboard_filters import apply_dashboard_filters + if not dashboard or not self.query: return self.query - from posthog.hogql_queries.apply_dashboard_filters import ( - apply_dashboard_filters, - ) return apply_dashboard_filters(self.query, dashboard.filters, self.team) @@ -197,23 +196,6 @@ class InsightViewed(models.Model): @timed("generate_insight_cache_key") def generate_insight_cache_key(insight: Insight, dashboard: Optional[Dashboard]) -> str: try: - if insight.query is not None: - dashboard_filters = dashboard.filters if dashboard else None - - if dashboard_filters: - from posthog.hogql_queries.apply_dashboard_filters import ( - apply_dashboard_filters, - ) - - q = apply_dashboard_filters(insight.query, dashboard_filters, insight.team) - else: - q = insight.query - - if q.get("source"): - q = q["source"] - - return generate_cache_key("{}_{}_{}".format(q, dashboard_filters, insight.team_id)) - dashboard_insight_filter = get_filter(data=insight.dashboard_filters(dashboard=dashboard), team=insight.team) candidate_filters_hash = generate_cache_key("{}_{}".format(dashboard_insight_filter.toJSON(), insight.team_id)) return candidate_filters_hash diff --git a/posthog/models/test/test_insight_model.py b/posthog/models/test/test_insight_model.py index cc0e52943ff..9933531b100 100644 --- a/posthog/models/test/test_insight_model.py +++ b/posthog/models/test/test_insight_model.py @@ -99,27 +99,6 @@ class TestInsightModel(BaseTest): assert filters_hash_one != filters_hash_two - def test_query_hash_matches_same_query_source(self) -> None: - insight_with_query_at_top_level = Insight.objects.create(team=self.team, query={"kind": "EventsQuery"}) - insight_with_query_in_source = Insight.objects.create( - team=self.team, - query={"kind": "DataTable", "source": {"kind": "EventsQuery"}}, - ) - - filters_hash_one = generate_insight_cache_key(insight_with_query_at_top_level, None) - filters_hash_two = generate_insight_cache_key(insight_with_query_in_source, None) - - assert filters_hash_one == filters_hash_two - - def test_query_hash_varies_with_query_content(self) -> None: - insight_one = Insight.objects.create(team=self.team, query={"kind": "EventsQuery"}) - insight_two = Insight.objects.create(team=self.team, query={"kind": "EventsQuery", "anything": "else"}) - - filters_hash_one = generate_insight_cache_key(insight_one, None) - filters_hash_two = generate_insight_cache_key(insight_two, None) - - assert filters_hash_one != filters_hash_two - def test_dashboard_with_query_insight_and_filters(self) -> None: browser_equals_firefox = { "key": "$browser", @@ -245,29 +224,7 @@ class TestInsightModel(BaseTest): ) dashboard = Dashboard.objects.create(team=self.team, filters=dashboard_filters) - data = query_insight.dashboard_query(dashboard) + data = query_insight.get_effective_query(dashboard=dashboard) assert data actual = data["source"]["filters"] assert expected_filters == actual - - def test_query_hash_varies_with_dashboard_filters(self) -> None: - query = { - "kind": "DataTableNode", - "source": { - "filters": {"dateRange": {"date_from": "-14d", "date_to": "-7d"}}, - "kind": "HogQLQuery", - "modifiers": None, - "query": "select * from events where {filters}", - "response": None, - "values": None, - }, - } - dashboard_filters = {"date_from": "-4d", "date_to": "-3d"} - - query_insight = Insight.objects.create(team=self.team, query=query) - dashboard = Dashboard.objects.create(team=self.team, filters=dashboard_filters) - - hash_sans_dashboard = generate_insight_cache_key(query_insight, None) - hash_with_dashboard = generate_insight_cache_key(query_insight, dashboard) - - assert hash_sans_dashboard != hash_with_dashboard diff --git a/posthog/schema.py b/posthog/schema.py index 68471d4510b..5673db2a3bf 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -1184,7 +1184,7 @@ class EventPropertyFilter(BaseModel): ) key: str label: Optional[str] = None - operator: PropertyOperator + operator: Optional[PropertyOperator] = PropertyOperator("exact") type: Literal["event"] = Field(default="event", description="Event properties") value: Optional[Union[str, float, List[Union[str, float]]]] = None