0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-24 00:47:50 +01:00

fix(events): Account for project timezone in absolute range EventsQuery (#25832)

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Michael Matloka 2024-11-05 14:29:31 +01:00 committed by GitHub
parent d8d520d1fc
commit 674e8fe9e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 119 additions and 18 deletions

View File

@ -1,7 +1,6 @@
from datetime import timedelta
from typing import Optional
from dateutil.parser import isoparse
from django.db.models import Prefetch
from django.utils.timezone import now
import orjson
@ -133,10 +132,7 @@ class EventsQueryRunner(QueryRunner):
with self.timings.measure("timestamps"):
# prevent accidentally future events from being visible by default
before = self.query.before or (now() + timedelta(seconds=5)).isoformat()
try:
parsed_date = isoparse(before)
except ValueError:
parsed_date = relative_date_parse(before, self.team.timezone_info)
parsed_date = relative_date_parse(before, self.team.timezone_info)
where_exprs.append(
parse_expr(
"timestamp < {timestamp}",
@ -148,10 +144,7 @@ class EventsQueryRunner(QueryRunner):
# limit to the last 24h by default
after = self.query.after or "-24h"
if after != "all":
try:
parsed_date = isoparse(after)
except ValueError:
parsed_date = relative_date_parse(after, self.team.timezone_info)
parsed_date = relative_date_parse(after, self.team.timezone_info)
where_exprs.append(
parse_expr(
"timestamp > {timestamp}",

View File

@ -0,0 +1,49 @@
# serializer version: 1
# name: TestEventsQueryRunner.test_absolute_date_range
'''
SELECT tuple(events.uuid, events.event, events.properties, toTimeZone(events.timestamp, 'UTC'), events.team_id, events.distinct_id, events.elements_chain, toTimeZone(events.created_at, 'UTC'))
FROM events
WHERE and(equals(events.team_id, 99999), equals(events.event, '$pageview'), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-12 23:59:59.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-12 00:00:00.000000', 6, 'UTC')))
ORDER BY toTimeZone(events.timestamp, 'UTC') ASC
LIMIT 101
OFFSET 0 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0
'''
# ---
# name: TestEventsQueryRunner.test_absolute_date_range_minus_utc
'''
SELECT tuple(events.uuid, events.event, events.properties, toTimeZone(events.timestamp, 'America/Phoenix'), events.team_id, events.distinct_id, events.elements_chain, toTimeZone(events.created_at, 'America/Phoenix'))
FROM events
WHERE and(equals(events.team_id, 99999), equals(events.event, '$pageview'), less(toTimeZone(events.timestamp, 'America/Phoenix'), toDateTime64('2020-01-12 23:59:59.000000', 6, 'America/Phoenix')), greater(toTimeZone(events.timestamp, 'America/Phoenix'), toDateTime64('2020-01-12 00:00:00.000000', 6, 'America/Phoenix')))
ORDER BY toTimeZone(events.timestamp, 'America/Phoenix') ASC
LIMIT 101
OFFSET 0 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0
'''
# ---
# name: TestEventsQueryRunner.test_absolute_date_range_plus_utc
'''
SELECT tuple(events.uuid, events.event, events.properties, toTimeZone(events.timestamp, 'Asia/Tokyo'), events.team_id, events.distinct_id, events.elements_chain, toTimeZone(events.created_at, 'Asia/Tokyo'))
FROM events
WHERE and(equals(events.team_id, 99999), equals(events.event, '$pageview'), less(toTimeZone(events.timestamp, 'Asia/Tokyo'), toDateTime64('2020-01-12 23:59:59.000000', 6, 'Asia/Tokyo')), greater(toTimeZone(events.timestamp, 'Asia/Tokyo'), toDateTime64('2020-01-12 00:00:00.000000', 6, 'Asia/Tokyo')))
ORDER BY toTimeZone(events.timestamp, 'Asia/Tokyo') ASC
LIMIT 101
OFFSET 0 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0
'''
# ---

View File

@ -1,7 +1,7 @@
from typing import Any, cast
from freezegun import freeze_time
from datetime import datetime
from posthog.hogql import ast
from posthog.hogql.ast import CompareOperationOp
from posthog.hogql_queries.events_query_runner import EventsQueryRunner
@ -18,7 +18,9 @@ from posthog.test.base import (
ClickhouseTestMixin,
_create_event,
_create_person,
also_test_with_different_timezones,
flush_persons_and_events,
snapshot_clickhouse_queries,
)
@ -27,17 +29,20 @@ class TestEventsQueryRunner(ClickhouseTestMixin, APIBaseTest):
def _create_events(self, data: list[tuple[str, str, Any]], event="$pageview"):
person_result = []
distinct_ids_handled = set()
for distinct_id, timestamp, event_properties in data:
with freeze_time(timestamp):
person_result.append(
_create_person(
team_id=self.team.pk,
distinct_ids=[distinct_id],
properties={
"name": distinct_id,
},
if distinct_id not in distinct_ids_handled:
person_result.append(
_create_person(
team_id=self.team.pk,
distinct_ids=[distinct_id],
properties={
"name": distinct_id,
},
)
)
)
distinct_ids_handled.add(distinct_id)
_create_event(
team=self.team,
event=event,
@ -241,3 +246,57 @@ class TestEventsQueryRunner(ClickhouseTestMixin, APIBaseTest):
assert isinstance(response, CachedEventsQueryResponse)
assert len(response.results) == 1
assert response.results[0][0]["properties"]["arr_field"] == [DOUBLE_QUOTE]
@also_test_with_different_timezones
@snapshot_clickhouse_queries
def test_absolute_date_range(self):
self._create_events(
data=[
( # Event two hours BEFORE THE START of the day
"p17",
"2020-01-11T22:00:00",
{},
),
( # Event one hour after the start of the day
"p2",
"2020-01-12T01:00:00",
{},
),
( # Event right in the middle of the day
"p3",
"2020-01-12T12:00:00",
{},
),
( # Event one hour before the end of the day
"p1",
"2020-01-12T23:00:00",
{},
),
( # Event two hours AFTER THE END of the day
"p3",
"2020-01-13T02:00:00",
{},
),
]
)
flush_persons_and_events()
query = EventsQuery(
after="2020-01-12",
before="2020-01-12T23:59:59",
event="$pageview",
kind="EventsQuery",
orderBy=["timestamp ASC"],
select=["*"],
)
runner = EventsQueryRunner(query=query, team=self.team)
response = runner.run()
assert isinstance(response, CachedEventsQueryResponse)
assert [row[0]["timestamp"] for row in response.results] == [
datetime(2020, 1, 12, 1, 0, 0, tzinfo=self.team.timezone_info),
datetime(2020, 1, 12, 12, 0, 0, tzinfo=self.team.timezone_info),
datetime(2020, 1, 12, 23, 0, 0, tzinfo=self.team.timezone_info),
]