0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-12-01 12:21:02 +01:00

Limit events correctly (#2867)

* Only grab recent events in Events list

* Limit events from clickhouse with fallback

* fix events

* Remove ipdb

* Fix
This commit is contained in:
Tim Glaser 2021-01-06 17:35:27 +00:00 committed by GitHub
parent 1ef890fbf2
commit 40f121deb1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 37 additions and 20 deletions

View File

@ -163,13 +163,15 @@ class ClickhouseEventSerializer(serializers.Serializer):
return event[6]
def determine_event_conditions(conditions: Dict[str, Union[str, List[str]]]) -> Tuple[str, Dict]:
def determine_event_conditions(
conditions: Dict[str, Union[str, List[str]]], long_date_from: bool = False
) -> Tuple[str, Dict]:
result = ""
params: Dict[str, Union[str, List[str]]] = {}
for idx, (k, v) in enumerate(conditions.items()):
if not isinstance(v, str):
continue
if k == "after":
if k == "after" and not long_date_from:
timestamp = isoparse(v).strftime("%Y-%m-%d %H:%M:%S.%f")
result += "AND timestamp > %(after)s"
params.update({"after": timestamp})

View File

@ -38,7 +38,7 @@ def format_ch_timestamp(timestamp: datetime, filter, default_hour_min: str = " 0
is_hour_or_min = (filter.interval and filter.interval.lower() == "hour") or (
filter.interval and filter.interval.lower() == "minute"
)
return timestamp.strftime("%Y-%m-%d{}".format(" %H:%M:%S" if is_hour_or_min else default_hour_min))
return timestamp.strftime("%Y-%m-%d{}".format(" %H:%M:%S.%f" if is_hour_or_min else default_hour_min))
def get_earliest_timestamp(team_id: int) -> datetime:

View File

@ -15,6 +15,7 @@ from ee.clickhouse.models.person import get_persons_by_distinct_ids
from ee.clickhouse.models.property import get_property_values_for_key, parse_prop_clauses
from ee.clickhouse.queries.clickhouse_session_recording import SessionRecording
from ee.clickhouse.queries.sessions.list import SESSIONS_LIST_DEFAULT_LIMIT, ClickhouseSessionsList
from ee.clickhouse.queries.util import parse_timestamps
from ee.clickhouse.sql.events import SELECT_EVENT_WITH_ARRAY_PROPS_SQL, SELECT_EVENT_WITH_PROP_SQL, SELECT_ONE_EVENT_SQL
from posthog.api.event import EventViewSet
from posthog.models import Filter, Person, Team
@ -34,39 +35,40 @@ class ClickhouseEventsViewSet(EventViewSet):
distinct_to_person[distinct_id] = person
return distinct_to_person
def list(self, request: Request, *args: Any, **kwargs: Any) -> Response:
team = self.team
data = {}
if request.GET.get("after"):
data.update({"date_from": request.GET["after"]})
else:
data.update({"date_from": now() - timedelta(days=1)})
if request.GET.get("before"):
data.update({"date_to": request.GET["before"]})
filter = Filter(data=data, request=request)
def _query_events_list(self, filter: Filter, team: Team, request: Request, long_date_from: bool = False) -> List:
limit = "LIMIT 101"
conditions, condition_params = determine_event_conditions(request.GET.dict())
conditions, condition_params = determine_event_conditions(request.GET.dict(), long_date_from)
prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk)
if request.GET.get("action_id"):
action = Action.objects.get(pk=request.GET["action_id"])
if action.steps.count() == 0:
return Response({"next": False, "results": []})
return []
action_query, params = format_action_filter(action)
prop_filters += " AND {}".format(action_query)
prop_filter_params = {**prop_filter_params, **params}
if prop_filters != "":
query_result = sync_execute(
return sync_execute(
SELECT_EVENT_WITH_PROP_SQL.format(conditions=conditions, limit=limit, filters=prop_filters),
{"team_id": team.pk, **condition_params, **prop_filter_params},
)
else:
query_result = sync_execute(
return sync_execute(
SELECT_EVENT_WITH_ARRAY_PROPS_SQL.format(conditions=conditions, limit=limit),
{"team_id": team.pk, **condition_params},
)
def list(self, request: Request, *args: Any, **kwargs: Any) -> Response:
team = self.team
filter = Filter(request=request)
query_result = self._query_events_list(filter, team, request)
# Retry the query without the 1 day optimization
if len(query_result) < 100 and not request.GET.get("after"):
query_result = self._query_events_list(filter, team, request, long_date_from=True)
result = ClickhouseEventSerializer(
query_result[0:100], many=True, context={"people": self._get_people(query_result, team),},
).data

View File

@ -1,7 +1,10 @@
from uuid import uuid4
from freezegun import freeze_time
from ee.clickhouse.models.event import create_event
from ee.clickhouse.util import ClickhouseTestMixin
from posthog.api.test.base import TransactionBaseTest
from posthog.api.test.test_event import test_event_api_factory
from posthog.models import Action, ActionStep, Event, Person

View File

@ -1,4 +1,5 @@
import json
from datetime import timedelta
from dateutil.relativedelta import relativedelta
from django.utils import timezone
@ -245,8 +246,7 @@ def test_event_api_factory(event_factory, person_factory, action_factory):
team=self.team,
event="some event",
distinct_id="1",
timestamp=timezone.datetime(2019, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
+ relativedelta(days=idx, seconds=idx),
timestamp=timezone.now() - relativedelta(months=11) + relativedelta(days=idx, seconds=idx),
)
response = self.client.get("/api/event/?distinct_id=1").json()
self.assertEqual(len(response["results"]), 100)
@ -339,6 +339,16 @@ def test_event_api_factory(event_factory, person_factory, action_factory):
self.assertEqual(len(response_person_1["result"]), 1)
def test_optimize_query(self):
#  For ClickHouse we normally only query the last day,
# but if a user doesn't have many events we still want to return events that are older
event_factory(
event="pageview", timestamp=timezone.now() - timedelta(days=25), team=self.team, distinct_id="user1"
)
event_factory(event="pageview", timestamp=timezone.now(), team=self.team, distinct_id="user1")
response = self.client.get("/api/event/").json()
self.assertEqual(len(response["results"]), 2)
return TestEvents