0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-12-01 12:21:02 +01:00
posthog/ee/clickhouse/views/events.py

161 lines
6.9 KiB
Python
Raw Normal View History

import json
from datetime import timedelta
from typing import Any, Dict, List, Optional
from django.utils.timezone import now
from rest_framework.decorators import action
from rest_framework.request import Request
from rest_framework.response import Response
from ee.clickhouse.client import sync_execute
from ee.clickhouse.models.action import format_action_filter
from ee.clickhouse.models.event import ClickhouseEventSerializer, determine_event_conditions
from ee.clickhouse.models.person import get_persons_by_distinct_ids
from ee.clickhouse.models.property import get_property_values_for_key, parse_prop_clauses
from ee.clickhouse.queries.clickhouse_session_recording import SessionRecording
from ee.clickhouse.queries.sessions.list import ClickhouseSessionsList
from ee.clickhouse.sql.events import (
GET_CUSTOM_EVENTS,
SELECT_EVENT_WITH_ARRAY_PROPS_SQL,
SELECT_EVENT_WITH_PROP_SQL,
SELECT_ONE_EVENT_SQL,
)
"Clickhouse Features V2 (#1565)" (#1750) * initial * migration command * migrations working * add modelless views for clickhouse * initial testing structure * use test factory * scaffold for all tests * add insight and person api * add basic readme * add client * change how migrations are run * add base tables * ingesting events * restore delay * remove print * updated testing flow * changed sessions tests * update tests * reorganized sql * parametrize strings * element list query * change to seralizer * add values endpoint * retrieve with filter * pruned code to prepare for staged merge * working ingestion again * tests for ee * undo unneeded tests right now * fix linting * more typing errors * fix tests * add clickhouse image to workflow * move to right job * remove django_clickhouse * return database url * run super * remove keepdb * reordered calls * fix type * fractional seconds * fix type error * add checks * remove retention sql * fix tests * add property storage and tests * merge master * fix tests * fix tests * . * remove keepdb * format python files * update CI env vars * Override defaults and insecure tests * Update how ClickHouse database gets evaluated * remove bootstrapping clickhouse database routine * Don't initialize the clickhouse connection unless we say it's primary * . * fixed id generation * remove dump * black settings * empty client * add param * move docker-compose for ch to ee dir * Add _public_ key to repo for verifying self signed cert on server * update ee compose file for ee dir * fix a few issues with tls in migrations * update migrations to be flexible about storage profile and engine * black settings * add elements prop tables * add elements prop tables * working filter * refactored * better url handling * add mapping table * add processing to worker task * working cohort with actions * add cohort property filtering * add cohort property filtering * reformat and add cohort processing * prop clauses * add util * add more util * add clickhouse modifier * Clickhouse Sessions (#1623) * sessions sql * skeleton * add endpoint * better tests * sessions list * merge clickhouse-actions * added session endpoint * sessions sql working again * add clickhouse modifier * session avg with props working * add dist * tests working (no list) * list working * add formatting * more formatting * fix tests * dummy commit * fix types * remove unnecessary improt * ignore type when importing from ee in task * fix test running * Clickhouse Trends Base (#1609) * initial working * date param almost working * fix date range and labels * fixed monthly math * handle compare * change table * using new event ingestion * direct query actions working * remove interface * fix date range * properties initial working * handle operator * handle operator * move timestamp parse * move more to util * inital breaking down working * working cohort breakdown * some tests running * fix sessions * cohort tests * action and interval test * reorder cohort filtering * rename retention test * fix inits * change multitenancy tests * fix types * fix optional types * replace ch_client.execute with sync_execute * replace ch_client.execute with sync_execute, part 2 * Clickhouse Stickiness + Process Event (#1654) * generate clickhouse uuid script * set CLICKHOUSE_SECURE=False by default if running in TEST or DEBUG * convert person_id to UUID, make adding `person_id` optional, add distinct_ids already in the `create_person` function * Fix test_process_event_ee.py, remove all calls to Person.objects.* * add back util * fix broken imports * improve process_event test clickhouse queries * Basic stickiness query * Clickhouse Stickiness tests * stickiness test [WIP, actions fail] * generate clickhouse uuid script * change default test runner if PRIMARY_DB=clickhouse * fix stickiness test for actions * fix merge bug * remove _create_person stub; cohort person_id is UUID now * fix typing * Clickhouse trends process math (#1660) * most of process math works * all process math * fix ordering issue * unusued imports * update property comparison for process_event_ee * indentation wrong missing calls * demo users and events (#1661) * finish breakdown filtering tests and reformat label function * add increment to demo_data * update demo data populating * Add people endpoint for ch (#1670) * add people endpoint for ch * stickiness people * fix value padding * add process math to breakdown and * add limit * fix tests * condensed code * converted test to factory * add people tests * add month handling * add typing fix * change people test handling * fix tests * Clickhouse funnels 2 (#1668) * add elements to create_event * WIP closes #1663 Add funnels to clickhouse * Make funnels work * Clean up * Move filtering around * Add mypy tests and fix * Performance improvements * fix person tests again * add people for funnel endpoint * fix prop numbering Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Eric <eeoneric@gmail.com> * merge master * add retention * update types * more typing errors * fix types * bug with kafka payload, elements insert, and demo data * Clickhouse Paths (#1657) * paths clickhouse test (fails) * add elements to create_event * make this fail for clickhouse * hardcoded query that returns good results for $pageviews, no filters yet * clean up queries * bound by time, fix 30min new session boundary * support screen and custom events * add properties filter * paths url * filter by path start * better path start test * even better path start test * start from the first "path start" in a group * test for person_id in paths * partition by person_id for POSTGRES paths * partition by person_id for Clickhouse paths * clean up order in paths test * clean up order in paths test * join elements * force element order on element group creation * remove "order" when creating elements in tests and demo * get list of elements for paths * add limit to paths query * use materialized view * rename "element_hash" to "elements_hash" (no change in db) * cull rows that are definitely unused * simplify query * New highly optimized paths clickhouse query * start_point for $autocapture paths * extract event property values from clickhouse * prevent crash * select one element sql * get elements for event * remove lodash * remove host from $pageview path elements if same domain as incoming path * show metadata based on loaded paths filter, not in flight filter * fix order (all soures and targets in order, not all sources first, then all targets after) - makes for a better looking graph * add test that makes the Postgres paths query fail * fix postgres paths --> no fuzzy matching, breaks "starts with" for urls and gives too many incorrect start points * create automatic /demo urls that match the real urls (no ending /) * fix elements queries * path element joins * create persons via postgres in paths test * change serializers back to id * fix tests with uuid * fix demo * more bugs * fix type * change now to timezone aware * [clickhouse] retention filters (#1725) * implemented target entity and prop filtering * add insight view override * fix endpoint and filters * include tests * fix tests * add period filtering * . * fix pg param name * add filtering params to both queries in retention sql * fix param again * change to todatetime * change tz to timezone * add back timezone in model/event * [clickhouse] feature flag endpoint requests (#1731) * add feature flags to endpoints * add flags to endpoints that check on request * remove magic strings and fill in missing flags * fix types * add missing flag * change from iso * fix more timestamps and comparator * change _people to get_people in actions view * remove action and cohort populating * change inheritance * "Clickhouse Features V2 (#1565)" This reverts commit 0b371d43eca149cd3632410ea5653b85b2173e39. * fix types * change to super * change to super x2 Co-authored-by: Eric <eeoneric@gmail.com> Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Tim Glaser <tim.glaser@hiberly.com>
2020-09-29 16:17:26 +02:00
from posthog.api.event import EventViewSet
from posthog.models import Filter, Person, Team
from posthog.models.action import Action
from posthog.models.filters.sessions_filter import SessionsFilter
from posthog.models.session_recording_event import SessionRecordingViewed
from posthog.utils import convert_property_value, flatten
class ClickhouseEventsViewSet(EventViewSet):
def _get_people(self, query_result: List[Dict], team: Team) -> Dict[str, Any]:
distinct_ids = [event[5] for event in query_result]
persons = get_persons_by_distinct_ids(team.pk, distinct_ids)
distinct_to_person: Dict[str, Person] = {}
for person in persons:
for distinct_id in person.distinct_ids:
distinct_to_person[distinct_id] = person
return distinct_to_person
def _query_events_list(
self, filter: Filter, team: Team, request: Request, long_date_from: bool = False, limit: int = 100
) -> List:
limit_sql = f"LIMIT {limit + 1}"
conditions, condition_params = determine_event_conditions(
team,
{
"after": (now() - timedelta(days=1)).isoformat(),
"before": (now() + timedelta(seconds=5)).isoformat(),
**request.GET.dict(),
},
long_date_from,
)
prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk)
if request.GET.get("action_id"):
try:
action = Action.objects.get(pk=request.GET["action_id"], team_id=team.pk)
except Action.DoesNotExist:
return []
if action.steps.count() == 0:
return []
action_query, params = format_action_filter(action)
prop_filters += " AND {}".format(action_query)
prop_filter_params = {**prop_filter_params, **params}
if prop_filters != "":
return sync_execute(
SELECT_EVENT_WITH_PROP_SQL.format(conditions=conditions, limit=limit_sql, filters=prop_filters),
{"team_id": team.pk, **condition_params, **prop_filter_params},
)
else:
return sync_execute(
SELECT_EVENT_WITH_ARRAY_PROPS_SQL.format(conditions=conditions, limit=limit_sql),
{"team_id": team.pk, **condition_params},
)
def list(self, request: Request, *args: Any, **kwargs: Any) -> Response:
is_csv_request = self.request.accepted_renderer.format == "csv"
limit = self.CSV_EXPORT_LIMIT if is_csv_request else 100
team = self.team
filter = Filter(request=request)
query_result = self._query_events_list(filter, team, request, limit=limit)
# Retry the query without the 1 day optimization
if len(query_result) < limit and not request.GET.get("after"):
query_result = self._query_events_list(filter, team, request, long_date_from=True, limit=limit)
result = ClickhouseEventSerializer(
query_result[0:limit], many=True, context={"people": self._get_people(query_result, team),},
).data
next_url: Optional[str] = None
if not is_csv_request and len(query_result) > 100:
path = request.get_full_path()
reverse = request.GET.get("orderBy", "-timestamp") != "-timestamp"
next_url = request.build_absolute_uri(
"{}{}{}={}".format(
path,
"&" if "?" in path else "?",
"after" if reverse else "before",
query_result[99][3].strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
)
)
return Response({"next": next_url, "results": result})
def retrieve(self, request: Request, pk: Optional[int] = None, *args: Any, **kwargs: Any) -> Response:
query_result = sync_execute(SELECT_ONE_EVENT_SQL, {"team_id": self.team.pk, "event_id": pk},)
"Clickhouse Features V2 (#1565)" (#1750) * initial * migration command * migrations working * add modelless views for clickhouse * initial testing structure * use test factory * scaffold for all tests * add insight and person api * add basic readme * add client * change how migrations are run * add base tables * ingesting events * restore delay * remove print * updated testing flow * changed sessions tests * update tests * reorganized sql * parametrize strings * element list query * change to seralizer * add values endpoint * retrieve with filter * pruned code to prepare for staged merge * working ingestion again * tests for ee * undo unneeded tests right now * fix linting * more typing errors * fix tests * add clickhouse image to workflow * move to right job * remove django_clickhouse * return database url * run super * remove keepdb * reordered calls * fix type * fractional seconds * fix type error * add checks * remove retention sql * fix tests * add property storage and tests * merge master * fix tests * fix tests * . * remove keepdb * format python files * update CI env vars * Override defaults and insecure tests * Update how ClickHouse database gets evaluated * remove bootstrapping clickhouse database routine * Don't initialize the clickhouse connection unless we say it's primary * . * fixed id generation * remove dump * black settings * empty client * add param * move docker-compose for ch to ee dir * Add _public_ key to repo for verifying self signed cert on server * update ee compose file for ee dir * fix a few issues with tls in migrations * update migrations to be flexible about storage profile and engine * black settings * add elements prop tables * add elements prop tables * working filter * refactored * better url handling * add mapping table * add processing to worker task * working cohort with actions * add cohort property filtering * add cohort property filtering * reformat and add cohort processing * prop clauses * add util * add more util * add clickhouse modifier * Clickhouse Sessions (#1623) * sessions sql * skeleton * add endpoint * better tests * sessions list * merge clickhouse-actions * added session endpoint * sessions sql working again * add clickhouse modifier * session avg with props working * add dist * tests working (no list) * list working * add formatting * more formatting * fix tests * dummy commit * fix types * remove unnecessary improt * ignore type when importing from ee in task * fix test running * Clickhouse Trends Base (#1609) * initial working * date param almost working * fix date range and labels * fixed monthly math * handle compare * change table * using new event ingestion * direct query actions working * remove interface * fix date range * properties initial working * handle operator * handle operator * move timestamp parse * move more to util * inital breaking down working * working cohort breakdown * some tests running * fix sessions * cohort tests * action and interval test * reorder cohort filtering * rename retention test * fix inits * change multitenancy tests * fix types * fix optional types * replace ch_client.execute with sync_execute * replace ch_client.execute with sync_execute, part 2 * Clickhouse Stickiness + Process Event (#1654) * generate clickhouse uuid script * set CLICKHOUSE_SECURE=False by default if running in TEST or DEBUG * convert person_id to UUID, make adding `person_id` optional, add distinct_ids already in the `create_person` function * Fix test_process_event_ee.py, remove all calls to Person.objects.* * add back util * fix broken imports * improve process_event test clickhouse queries * Basic stickiness query * Clickhouse Stickiness tests * stickiness test [WIP, actions fail] * generate clickhouse uuid script * change default test runner if PRIMARY_DB=clickhouse * fix stickiness test for actions * fix merge bug * remove _create_person stub; cohort person_id is UUID now * fix typing * Clickhouse trends process math (#1660) * most of process math works * all process math * fix ordering issue * unusued imports * update property comparison for process_event_ee * indentation wrong missing calls * demo users and events (#1661) * finish breakdown filtering tests and reformat label function * add increment to demo_data * update demo data populating * Add people endpoint for ch (#1670) * add people endpoint for ch * stickiness people * fix value padding * add process math to breakdown and * add limit * fix tests * condensed code * converted test to factory * add people tests * add month handling * add typing fix * change people test handling * fix tests * Clickhouse funnels 2 (#1668) * add elements to create_event * WIP closes #1663 Add funnels to clickhouse * Make funnels work * Clean up * Move filtering around * Add mypy tests and fix * Performance improvements * fix person tests again * add people for funnel endpoint * fix prop numbering Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Eric <eeoneric@gmail.com> * merge master * add retention * update types * more typing errors * fix types * bug with kafka payload, elements insert, and demo data * Clickhouse Paths (#1657) * paths clickhouse test (fails) * add elements to create_event * make this fail for clickhouse * hardcoded query that returns good results for $pageviews, no filters yet * clean up queries * bound by time, fix 30min new session boundary * support screen and custom events * add properties filter * paths url * filter by path start * better path start test * even better path start test * start from the first "path start" in a group * test for person_id in paths * partition by person_id for POSTGRES paths * partition by person_id for Clickhouse paths * clean up order in paths test * clean up order in paths test * join elements * force element order on element group creation * remove "order" when creating elements in tests and demo * get list of elements for paths * add limit to paths query * use materialized view * rename "element_hash" to "elements_hash" (no change in db) * cull rows that are definitely unused * simplify query * New highly optimized paths clickhouse query * start_point for $autocapture paths * extract event property values from clickhouse * prevent crash * select one element sql * get elements for event * remove lodash * remove host from $pageview path elements if same domain as incoming path * show metadata based on loaded paths filter, not in flight filter * fix order (all soures and targets in order, not all sources first, then all targets after) - makes for a better looking graph * add test that makes the Postgres paths query fail * fix postgres paths --> no fuzzy matching, breaks "starts with" for urls and gives too many incorrect start points * create automatic /demo urls that match the real urls (no ending /) * fix elements queries * path element joins * create persons via postgres in paths test * change serializers back to id * fix tests with uuid * fix demo * more bugs * fix type * change now to timezone aware * [clickhouse] retention filters (#1725) * implemented target entity and prop filtering * add insight view override * fix endpoint and filters * include tests * fix tests * add period filtering * . * fix pg param name * add filtering params to both queries in retention sql * fix param again * change to todatetime * change tz to timezone * add back timezone in model/event * [clickhouse] feature flag endpoint requests (#1731) * add feature flags to endpoints * add flags to endpoints that check on request * remove magic strings and fill in missing flags * fix types * add missing flag * change from iso * fix more timestamps and comparator * change _people to get_people in actions view * remove action and cohort populating * change inheritance * "Clickhouse Features V2 (#1565)" This reverts commit 0b371d43eca149cd3632410ea5653b85b2173e39. * fix types * change to super * change to super x2 Co-authored-by: Eric <eeoneric@gmail.com> Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Tim Glaser <tim.glaser@hiberly.com>
2020-09-29 16:17:26 +02:00
result = ClickhouseEventSerializer(query_result[0], many=False).data
return Response(result)
@action(methods=["GET"], detail=False)
def values(self, request: Request, **kwargs) -> Response:
key = request.GET.get("key")
team = self.team
result = []
flattened = []
if key == "custom_event":
events = sync_execute(GET_CUSTOM_EVENTS, {"team_id": team.pk})
return Response([{"name": event[0]} for event in events])
elif key:
result = get_property_values_for_key(key, team, value=request.GET.get("value"))
for value in result:
try:
# Try loading as json for dicts or arrays
flattened.append(json.loads(value[0]))
except json.decoder.JSONDecodeError:
flattened.append(value[0])
return Response([{"name": convert_property_value(value)} for value in flatten(flattened)])
@action(methods=["GET"], detail=False)
def sessions(self, request: Request, *args: Any, **kwargs: Any) -> Response:
filter = SessionsFilter(request=request)
sessions, pagination = ClickhouseSessionsList.run(team=self.team, filter=filter)
return Response({"result": sessions, "pagination": pagination})
# ******************************************
# /event/session_recording
# params:
# - session_recording_id: (string) id of the session recording
# - save_view: (boolean) save view of the recording
# ******************************************
@action(methods=["GET"], detail=False)
def session_recording(self, request: Request, *args: Any, **kwargs: Any) -> Response:
session_recording = SessionRecording().run(
team=self.team, filter=Filter(request=request), session_recording_id=request.GET["session_recording_id"]
)
if request.GET.get("save_view"):
SessionRecordingViewed.objects.get_or_create(
team=self.team, user=request.user, session_id=request.GET["session_recording_id"]
)
return Response({"result": session_recording})