From 674e8fe9e5c639c1a6a959a959f03a2cac536315 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Tue, 5 Nov 2024 14:29:31 +0100 Subject: [PATCH] fix(events): Account for project timezone in absolute range `EventsQuery` (#25832) Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- posthog/hogql_queries/events_query_runner.py | 11 +-- .../test_events_query_runner.ambr | 49 ++++++++++++ .../test/test_events_query_runner.py | 77 ++++++++++++++++--- 3 files changed, 119 insertions(+), 18 deletions(-) create mode 100644 posthog/hogql_queries/test/__snapshots__/test_events_query_runner.ambr diff --git a/posthog/hogql_queries/events_query_runner.py b/posthog/hogql_queries/events_query_runner.py index 72f843ef607..cf8435c0bb4 100644 --- a/posthog/hogql_queries/events_query_runner.py +++ b/posthog/hogql_queries/events_query_runner.py @@ -1,7 +1,6 @@ from datetime import timedelta from typing import Optional -from dateutil.parser import isoparse from django.db.models import Prefetch from django.utils.timezone import now import orjson @@ -133,10 +132,7 @@ class EventsQueryRunner(QueryRunner): with self.timings.measure("timestamps"): # prevent accidentally future events from being visible by default before = self.query.before or (now() + timedelta(seconds=5)).isoformat() - try: - parsed_date = isoparse(before) - except ValueError: - parsed_date = relative_date_parse(before, self.team.timezone_info) + parsed_date = relative_date_parse(before, self.team.timezone_info) where_exprs.append( parse_expr( "timestamp < {timestamp}", @@ -148,10 +144,7 @@ class EventsQueryRunner(QueryRunner): # limit to the last 24h by default after = self.query.after or "-24h" if after != "all": - try: - parsed_date = isoparse(after) - except ValueError: - parsed_date = relative_date_parse(after, self.team.timezone_info) + parsed_date = relative_date_parse(after, self.team.timezone_info) where_exprs.append( parse_expr( "timestamp > {timestamp}", diff --git a/posthog/hogql_queries/test/__snapshots__/test_events_query_runner.ambr b/posthog/hogql_queries/test/__snapshots__/test_events_query_runner.ambr new file mode 100644 index 00000000000..9e58910ec5d --- /dev/null +++ b/posthog/hogql_queries/test/__snapshots__/test_events_query_runner.ambr @@ -0,0 +1,49 @@ +# serializer version: 1 +# name: TestEventsQueryRunner.test_absolute_date_range + ''' + SELECT tuple(events.uuid, events.event, events.properties, toTimeZone(events.timestamp, 'UTC'), events.team_id, events.distinct_id, events.elements_chain, toTimeZone(events.created_at, 'UTC')) + FROM events + WHERE and(equals(events.team_id, 99999), equals(events.event, '$pageview'), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-12 23:59:59.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-12 00:00:00.000000', 6, 'UTC'))) + ORDER BY toTimeZone(events.timestamp, 'UTC') ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestEventsQueryRunner.test_absolute_date_range_minus_utc + ''' + SELECT tuple(events.uuid, events.event, events.properties, toTimeZone(events.timestamp, 'America/Phoenix'), events.team_id, events.distinct_id, events.elements_chain, toTimeZone(events.created_at, 'America/Phoenix')) + FROM events + WHERE and(equals(events.team_id, 99999), equals(events.event, '$pageview'), less(toTimeZone(events.timestamp, 'America/Phoenix'), toDateTime64('2020-01-12 23:59:59.000000', 6, 'America/Phoenix')), greater(toTimeZone(events.timestamp, 'America/Phoenix'), toDateTime64('2020-01-12 00:00:00.000000', 6, 'America/Phoenix'))) + ORDER BY toTimeZone(events.timestamp, 'America/Phoenix') ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestEventsQueryRunner.test_absolute_date_range_plus_utc + ''' + SELECT tuple(events.uuid, events.event, events.properties, toTimeZone(events.timestamp, 'Asia/Tokyo'), events.team_id, events.distinct_id, events.elements_chain, toTimeZone(events.created_at, 'Asia/Tokyo')) + FROM events + WHERE and(equals(events.team_id, 99999), equals(events.event, '$pageview'), less(toTimeZone(events.timestamp, 'Asia/Tokyo'), toDateTime64('2020-01-12 23:59:59.000000', 6, 'Asia/Tokyo')), greater(toTimeZone(events.timestamp, 'Asia/Tokyo'), toDateTime64('2020-01-12 00:00:00.000000', 6, 'Asia/Tokyo'))) + ORDER BY toTimeZone(events.timestamp, 'Asia/Tokyo') ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- diff --git a/posthog/hogql_queries/test/test_events_query_runner.py b/posthog/hogql_queries/test/test_events_query_runner.py index 073d082ca2f..b3407752861 100644 --- a/posthog/hogql_queries/test/test_events_query_runner.py +++ b/posthog/hogql_queries/test/test_events_query_runner.py @@ -1,7 +1,7 @@ from typing import Any, cast from freezegun import freeze_time - +from datetime import datetime from posthog.hogql import ast from posthog.hogql.ast import CompareOperationOp from posthog.hogql_queries.events_query_runner import EventsQueryRunner @@ -18,7 +18,9 @@ from posthog.test.base import ( ClickhouseTestMixin, _create_event, _create_person, + also_test_with_different_timezones, flush_persons_and_events, + snapshot_clickhouse_queries, ) @@ -27,17 +29,20 @@ class TestEventsQueryRunner(ClickhouseTestMixin, APIBaseTest): def _create_events(self, data: list[tuple[str, str, Any]], event="$pageview"): person_result = [] + distinct_ids_handled = set() for distinct_id, timestamp, event_properties in data: with freeze_time(timestamp): - person_result.append( - _create_person( - team_id=self.team.pk, - distinct_ids=[distinct_id], - properties={ - "name": distinct_id, - }, + if distinct_id not in distinct_ids_handled: + person_result.append( + _create_person( + team_id=self.team.pk, + distinct_ids=[distinct_id], + properties={ + "name": distinct_id, + }, + ) ) - ) + distinct_ids_handled.add(distinct_id) _create_event( team=self.team, event=event, @@ -241,3 +246,57 @@ class TestEventsQueryRunner(ClickhouseTestMixin, APIBaseTest): assert isinstance(response, CachedEventsQueryResponse) assert len(response.results) == 1 assert response.results[0][0]["properties"]["arr_field"] == [DOUBLE_QUOTE] + + @also_test_with_different_timezones + @snapshot_clickhouse_queries + def test_absolute_date_range(self): + self._create_events( + data=[ + ( # Event two hours BEFORE THE START of the day + "p17", + "2020-01-11T22:00:00", + {}, + ), + ( # Event one hour after the start of the day + "p2", + "2020-01-12T01:00:00", + {}, + ), + ( # Event right in the middle of the day + "p3", + "2020-01-12T12:00:00", + {}, + ), + ( # Event one hour before the end of the day + "p1", + "2020-01-12T23:00:00", + {}, + ), + ( # Event two hours AFTER THE END of the day + "p3", + "2020-01-13T02:00:00", + {}, + ), + ] + ) + + flush_persons_and_events() + + query = EventsQuery( + after="2020-01-12", + before="2020-01-12T23:59:59", + event="$pageview", + kind="EventsQuery", + orderBy=["timestamp ASC"], + select=["*"], + ) + + runner = EventsQueryRunner(query=query, team=self.team) + + response = runner.run() + assert isinstance(response, CachedEventsQueryResponse) + assert [row[0]["timestamp"] for row in response.results] == [ + datetime(2020, 1, 12, 1, 0, 0, tzinfo=self.team.timezone_info), + datetime(2020, 1, 12, 12, 0, 0, tzinfo=self.team.timezone_info), + datetime(2020, 1, 12, 23, 0, 0, tzinfo=self.team.timezone_info), + ]