From 95670dc70b24dfccabec3efad0634d3ad67ba360 Mon Sep 17 00:00:00 2001 From: Yakko Majuri <38760734+yakkomajuri@users.noreply.github.com> Date: Fri, 24 Feb 2023 09:18:58 -0300 Subject: [PATCH] fix(sampling): ensure funnel correlation results get corrected for sampling (#14382) * fix(sampling): ensure funnel correlation results get corrected for sampling * adapt signature to round return * update --- ee/clickhouse/queries/funnels/funnel_correlation.py | 4 ++++ posthog/queries/util.py | 9 +++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ee/clickhouse/queries/funnels/funnel_correlation.py b/ee/clickhouse/queries/funnels/funnel_correlation.py index 55543bd7ace..5011bb9324c 100644 --- a/ee/clickhouse/queries/funnels/funnel_correlation.py +++ b/ee/clickhouse/queries/funnels/funnel_correlation.py @@ -29,6 +29,7 @@ from posthog.queries.funnels.utils import get_funnel_order_actor_class from posthog.queries.insight import insight_sync_execute from posthog.queries.person_distinct_id_query import get_team_distinct_ids_query from posthog.queries.person_query import PersonQuery +from posthog.queries.util import correct_result_for_sampling class EventDefinition(TypedDict): @@ -639,6 +640,9 @@ class FunnelCorrelation: event_contingency_tables, success_total, failure_total = self.get_partial_event_contingency_tables() + success_total = correct_result_for_sampling(success_total, self._filter.sampling_factor) + failure_total = correct_result_for_sampling(failure_total, self._filter.sampling_factor) + if not success_total or not failure_total: return [], True diff --git a/posthog/queries/util.py b/posthog/queries/util.py index c5f3ce77a47..ae4c4122142 100644 --- a/posthog/queries/util.py +++ b/posthog/queries/util.py @@ -1,6 +1,6 @@ import json from datetime import datetime, timedelta -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional import pytz from django.utils import timezone @@ -103,8 +103,9 @@ def convert_to_datetime_aware(date_obj): return date_obj -def correct_result_for_sampling(result: Union[int, float], sampling_factor: Optional[float]) -> float: +def correct_result_for_sampling(value: int, sampling_factor: Optional[float]) -> int: if not sampling_factor: - return result + return value - return result * (1 / sampling_factor) + result: int = round(value * (1 / sampling_factor)) + return result