mirror of
https://github.com/PostHog/posthog.git
synced 2024-12-01 12:21:02 +01:00
Limit events correctly (#2867)
* Only grab recent events in Events list * Limit events from clickhouse with fallback * fix events * Remove ipdb * Fix
This commit is contained in:
parent
1ef890fbf2
commit
40f121deb1
@ -163,13 +163,15 @@ class ClickhouseEventSerializer(serializers.Serializer):
|
||||
return event[6]
|
||||
|
||||
|
||||
def determine_event_conditions(conditions: Dict[str, Union[str, List[str]]]) -> Tuple[str, Dict]:
|
||||
def determine_event_conditions(
|
||||
conditions: Dict[str, Union[str, List[str]]], long_date_from: bool = False
|
||||
) -> Tuple[str, Dict]:
|
||||
result = ""
|
||||
params: Dict[str, Union[str, List[str]]] = {}
|
||||
for idx, (k, v) in enumerate(conditions.items()):
|
||||
if not isinstance(v, str):
|
||||
continue
|
||||
if k == "after":
|
||||
if k == "after" and not long_date_from:
|
||||
timestamp = isoparse(v).strftime("%Y-%m-%d %H:%M:%S.%f")
|
||||
result += "AND timestamp > %(after)s"
|
||||
params.update({"after": timestamp})
|
||||
|
@ -38,7 +38,7 @@ def format_ch_timestamp(timestamp: datetime, filter, default_hour_min: str = " 0
|
||||
is_hour_or_min = (filter.interval and filter.interval.lower() == "hour") or (
|
||||
filter.interval and filter.interval.lower() == "minute"
|
||||
)
|
||||
return timestamp.strftime("%Y-%m-%d{}".format(" %H:%M:%S" if is_hour_or_min else default_hour_min))
|
||||
return timestamp.strftime("%Y-%m-%d{}".format(" %H:%M:%S.%f" if is_hour_or_min else default_hour_min))
|
||||
|
||||
|
||||
def get_earliest_timestamp(team_id: int) -> datetime:
|
||||
|
@ -15,6 +15,7 @@ from ee.clickhouse.models.person import get_persons_by_distinct_ids
|
||||
from ee.clickhouse.models.property import get_property_values_for_key, parse_prop_clauses
|
||||
from ee.clickhouse.queries.clickhouse_session_recording import SessionRecording
|
||||
from ee.clickhouse.queries.sessions.list import SESSIONS_LIST_DEFAULT_LIMIT, ClickhouseSessionsList
|
||||
from ee.clickhouse.queries.util import parse_timestamps
|
||||
from ee.clickhouse.sql.events import SELECT_EVENT_WITH_ARRAY_PROPS_SQL, SELECT_EVENT_WITH_PROP_SQL, SELECT_ONE_EVENT_SQL
|
||||
from posthog.api.event import EventViewSet
|
||||
from posthog.models import Filter, Person, Team
|
||||
@ -34,39 +35,40 @@ class ClickhouseEventsViewSet(EventViewSet):
|
||||
distinct_to_person[distinct_id] = person
|
||||
return distinct_to_person
|
||||
|
||||
def list(self, request: Request, *args: Any, **kwargs: Any) -> Response:
|
||||
team = self.team
|
||||
data = {}
|
||||
if request.GET.get("after"):
|
||||
data.update({"date_from": request.GET["after"]})
|
||||
else:
|
||||
data.update({"date_from": now() - timedelta(days=1)})
|
||||
if request.GET.get("before"):
|
||||
data.update({"date_to": request.GET["before"]})
|
||||
filter = Filter(data=data, request=request)
|
||||
|
||||
def _query_events_list(self, filter: Filter, team: Team, request: Request, long_date_from: bool = False) -> List:
|
||||
limit = "LIMIT 101"
|
||||
conditions, condition_params = determine_event_conditions(request.GET.dict())
|
||||
conditions, condition_params = determine_event_conditions(request.GET.dict(), long_date_from)
|
||||
prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk)
|
||||
|
||||
if request.GET.get("action_id"):
|
||||
action = Action.objects.get(pk=request.GET["action_id"])
|
||||
if action.steps.count() == 0:
|
||||
return Response({"next": False, "results": []})
|
||||
return []
|
||||
action_query, params = format_action_filter(action)
|
||||
prop_filters += " AND {}".format(action_query)
|
||||
prop_filter_params = {**prop_filter_params, **params}
|
||||
|
||||
if prop_filters != "":
|
||||
query_result = sync_execute(
|
||||
return sync_execute(
|
||||
SELECT_EVENT_WITH_PROP_SQL.format(conditions=conditions, limit=limit, filters=prop_filters),
|
||||
{"team_id": team.pk, **condition_params, **prop_filter_params},
|
||||
)
|
||||
else:
|
||||
query_result = sync_execute(
|
||||
return sync_execute(
|
||||
SELECT_EVENT_WITH_ARRAY_PROPS_SQL.format(conditions=conditions, limit=limit),
|
||||
{"team_id": team.pk, **condition_params},
|
||||
)
|
||||
|
||||
def list(self, request: Request, *args: Any, **kwargs: Any) -> Response:
|
||||
team = self.team
|
||||
filter = Filter(request=request)
|
||||
|
||||
query_result = self._query_events_list(filter, team, request)
|
||||
|
||||
# Retry the query without the 1 day optimization
|
||||
if len(query_result) < 100 and not request.GET.get("after"):
|
||||
query_result = self._query_events_list(filter, team, request, long_date_from=True)
|
||||
|
||||
result = ClickhouseEventSerializer(
|
||||
query_result[0:100], many=True, context={"people": self._get_people(query_result, team),},
|
||||
).data
|
||||
|
@ -1,7 +1,10 @@
|
||||
from uuid import uuid4
|
||||
|
||||
from freezegun import freeze_time
|
||||
|
||||
from ee.clickhouse.models.event import create_event
|
||||
from ee.clickhouse.util import ClickhouseTestMixin
|
||||
from posthog.api.test.base import TransactionBaseTest
|
||||
from posthog.api.test.test_event import test_event_api_factory
|
||||
from posthog.models import Action, ActionStep, Event, Person
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
import json
|
||||
from datetime import timedelta
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.utils import timezone
|
||||
@ -245,8 +246,7 @@ def test_event_api_factory(event_factory, person_factory, action_factory):
|
||||
team=self.team,
|
||||
event="some event",
|
||||
distinct_id="1",
|
||||
timestamp=timezone.datetime(2019, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
|
||||
+ relativedelta(days=idx, seconds=idx),
|
||||
timestamp=timezone.now() - relativedelta(months=11) + relativedelta(days=idx, seconds=idx),
|
||||
)
|
||||
response = self.client.get("/api/event/?distinct_id=1").json()
|
||||
self.assertEqual(len(response["results"]), 100)
|
||||
@ -339,6 +339,16 @@ def test_event_api_factory(event_factory, person_factory, action_factory):
|
||||
|
||||
self.assertEqual(len(response_person_1["result"]), 1)
|
||||
|
||||
def test_optimize_query(self):
|
||||
# For ClickHouse we normally only query the last day,
|
||||
# but if a user doesn't have many events we still want to return events that are older
|
||||
event_factory(
|
||||
event="pageview", timestamp=timezone.now() - timedelta(days=25), team=self.team, distinct_id="user1"
|
||||
)
|
||||
event_factory(event="pageview", timestamp=timezone.now(), team=self.team, distinct_id="user1")
|
||||
response = self.client.get("/api/event/").json()
|
||||
self.assertEqual(len(response["results"]), 2)
|
||||
|
||||
return TestEvents
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user