0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-12-01 04:12:23 +01:00
posthog/ee/clickhouse/models/event.py

132 lines
4.4 KiB
Python
Raw Normal View History

2020-09-09 03:55:01 +02:00
import json
import uuid
from datetime import datetime, timezone
from typing import Dict, List, Optional, Tuple, Union
2020-09-09 03:55:01 +02:00
import pytz
from dateutil.parser import isoparse
from django.utils.timezone import now
2020-09-09 03:55:01 +02:00
from rest_framework import serializers
from ee.clickhouse.client import sync_execute
from ee.clickhouse.models.element import create_elements
"Clickhouse Features V2 (#1565)" (#1750) * initial * migration command * migrations working * add modelless views for clickhouse * initial testing structure * use test factory * scaffold for all tests * add insight and person api * add basic readme * add client * change how migrations are run * add base tables * ingesting events * restore delay * remove print * updated testing flow * changed sessions tests * update tests * reorganized sql * parametrize strings * element list query * change to seralizer * add values endpoint * retrieve with filter * pruned code to prepare for staged merge * working ingestion again * tests for ee * undo unneeded tests right now * fix linting * more typing errors * fix tests * add clickhouse image to workflow * move to right job * remove django_clickhouse * return database url * run super * remove keepdb * reordered calls * fix type * fractional seconds * fix type error * add checks * remove retention sql * fix tests * add property storage and tests * merge master * fix tests * fix tests * . * remove keepdb * format python files * update CI env vars * Override defaults and insecure tests * Update how ClickHouse database gets evaluated * remove bootstrapping clickhouse database routine * Don't initialize the clickhouse connection unless we say it's primary * . * fixed id generation * remove dump * black settings * empty client * add param * move docker-compose for ch to ee dir * Add _public_ key to repo for verifying self signed cert on server * update ee compose file for ee dir * fix a few issues with tls in migrations * update migrations to be flexible about storage profile and engine * black settings * add elements prop tables * add elements prop tables * working filter * refactored * better url handling * add mapping table * add processing to worker task * working cohort with actions * add cohort property filtering * add cohort property filtering * reformat and add cohort processing * prop clauses * add util * add more util * add clickhouse modifier * Clickhouse Sessions (#1623) * sessions sql * skeleton * add endpoint * better tests * sessions list * merge clickhouse-actions * added session endpoint * sessions sql working again * add clickhouse modifier * session avg with props working * add dist * tests working (no list) * list working * add formatting * more formatting * fix tests * dummy commit * fix types * remove unnecessary improt * ignore type when importing from ee in task * fix test running * Clickhouse Trends Base (#1609) * initial working * date param almost working * fix date range and labels * fixed monthly math * handle compare * change table * using new event ingestion * direct query actions working * remove interface * fix date range * properties initial working * handle operator * handle operator * move timestamp parse * move more to util * inital breaking down working * working cohort breakdown * some tests running * fix sessions * cohort tests * action and interval test * reorder cohort filtering * rename retention test * fix inits * change multitenancy tests * fix types * fix optional types * replace ch_client.execute with sync_execute * replace ch_client.execute with sync_execute, part 2 * Clickhouse Stickiness + Process Event (#1654) * generate clickhouse uuid script * set CLICKHOUSE_SECURE=False by default if running in TEST or DEBUG * convert person_id to UUID, make adding `person_id` optional, add distinct_ids already in the `create_person` function * Fix test_process_event_ee.py, remove all calls to Person.objects.* * add back util * fix broken imports * improve process_event test clickhouse queries * Basic stickiness query * Clickhouse Stickiness tests * stickiness test [WIP, actions fail] * generate clickhouse uuid script * change default test runner if PRIMARY_DB=clickhouse * fix stickiness test for actions * fix merge bug * remove _create_person stub; cohort person_id is UUID now * fix typing * Clickhouse trends process math (#1660) * most of process math works * all process math * fix ordering issue * unusued imports * update property comparison for process_event_ee * indentation wrong missing calls * demo users and events (#1661) * finish breakdown filtering tests and reformat label function * add increment to demo_data * update demo data populating * Add people endpoint for ch (#1670) * add people endpoint for ch * stickiness people * fix value padding * add process math to breakdown and * add limit * fix tests * condensed code * converted test to factory * add people tests * add month handling * add typing fix * change people test handling * fix tests * Clickhouse funnels 2 (#1668) * add elements to create_event * WIP closes #1663 Add funnels to clickhouse * Make funnels work * Clean up * Move filtering around * Add mypy tests and fix * Performance improvements * fix person tests again * add people for funnel endpoint * fix prop numbering Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Eric <eeoneric@gmail.com> * merge master * add retention * update types * more typing errors * fix types * bug with kafka payload, elements insert, and demo data * Clickhouse Paths (#1657) * paths clickhouse test (fails) * add elements to create_event * make this fail for clickhouse * hardcoded query that returns good results for $pageviews, no filters yet * clean up queries * bound by time, fix 30min new session boundary * support screen and custom events * add properties filter * paths url * filter by path start * better path start test * even better path start test * start from the first "path start" in a group * test for person_id in paths * partition by person_id for POSTGRES paths * partition by person_id for Clickhouse paths * clean up order in paths test * clean up order in paths test * join elements * force element order on element group creation * remove "order" when creating elements in tests and demo * get list of elements for paths * add limit to paths query * use materialized view * rename "element_hash" to "elements_hash" (no change in db) * cull rows that are definitely unused * simplify query * New highly optimized paths clickhouse query * start_point for $autocapture paths * extract event property values from clickhouse * prevent crash * select one element sql * get elements for event * remove lodash * remove host from $pageview path elements if same domain as incoming path * show metadata based on loaded paths filter, not in flight filter * fix order (all soures and targets in order, not all sources first, then all targets after) - makes for a better looking graph * add test that makes the Postgres paths query fail * fix postgres paths --> no fuzzy matching, breaks "starts with" for urls and gives too many incorrect start points * create automatic /demo urls that match the real urls (no ending /) * fix elements queries * path element joins * create persons via postgres in paths test * change serializers back to id * fix tests with uuid * fix demo * more bugs * fix type * change now to timezone aware * [clickhouse] retention filters (#1725) * implemented target entity and prop filtering * add insight view override * fix endpoint and filters * include tests * fix tests * add period filtering * . * fix pg param name * add filtering params to both queries in retention sql * fix param again * change to todatetime * change tz to timezone * add back timezone in model/event * [clickhouse] feature flag endpoint requests (#1731) * add feature flags to endpoints * add flags to endpoints that check on request * remove magic strings and fill in missing flags * fix types * add missing flag * change from iso * fix more timestamps and comparator * change _people to get_people in actions view * remove action and cohort populating * change inheritance * "Clickhouse Features V2 (#1565)" This reverts commit 0b371d43eca149cd3632410ea5653b85b2173e39. * fix types * change to super * change to super x2 Co-authored-by: Eric <eeoneric@gmail.com> Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Tim Glaser <tim.glaser@hiberly.com>
2020-09-29 16:17:26 +02:00
from ee.clickhouse.sql.events import GET_EVENTS_BY_TEAM_SQL, GET_EVENTS_SQL, INSERT_EVENT_SQL
from ee.kafka.client import ClickhouseProducer
from ee.kafka.topics import KAFKA_EVENTS
from posthog.models.element import Element
2020-09-09 03:55:01 +02:00
from posthog.models.team import Team
def create_event(
event_uuid: uuid.UUID,
2020-09-09 03:55:01 +02:00
event: str,
team: Team,
distinct_id: str,
"Clickhouse Features V2 (#1565)" (#1750) * initial * migration command * migrations working * add modelless views for clickhouse * initial testing structure * use test factory * scaffold for all tests * add insight and person api * add basic readme * add client * change how migrations are run * add base tables * ingesting events * restore delay * remove print * updated testing flow * changed sessions tests * update tests * reorganized sql * parametrize strings * element list query * change to seralizer * add values endpoint * retrieve with filter * pruned code to prepare for staged merge * working ingestion again * tests for ee * undo unneeded tests right now * fix linting * more typing errors * fix tests * add clickhouse image to workflow * move to right job * remove django_clickhouse * return database url * run super * remove keepdb * reordered calls * fix type * fractional seconds * fix type error * add checks * remove retention sql * fix tests * add property storage and tests * merge master * fix tests * fix tests * . * remove keepdb * format python files * update CI env vars * Override defaults and insecure tests * Update how ClickHouse database gets evaluated * remove bootstrapping clickhouse database routine * Don't initialize the clickhouse connection unless we say it's primary * . * fixed id generation * remove dump * black settings * empty client * add param * move docker-compose for ch to ee dir * Add _public_ key to repo for verifying self signed cert on server * update ee compose file for ee dir * fix a few issues with tls in migrations * update migrations to be flexible about storage profile and engine * black settings * add elements prop tables * add elements prop tables * working filter * refactored * better url handling * add mapping table * add processing to worker task * working cohort with actions * add cohort property filtering * add cohort property filtering * reformat and add cohort processing * prop clauses * add util * add more util * add clickhouse modifier * Clickhouse Sessions (#1623) * sessions sql * skeleton * add endpoint * better tests * sessions list * merge clickhouse-actions * added session endpoint * sessions sql working again * add clickhouse modifier * session avg with props working * add dist * tests working (no list) * list working * add formatting * more formatting * fix tests * dummy commit * fix types * remove unnecessary improt * ignore type when importing from ee in task * fix test running * Clickhouse Trends Base (#1609) * initial working * date param almost working * fix date range and labels * fixed monthly math * handle compare * change table * using new event ingestion * direct query actions working * remove interface * fix date range * properties initial working * handle operator * handle operator * move timestamp parse * move more to util * inital breaking down working * working cohort breakdown * some tests running * fix sessions * cohort tests * action and interval test * reorder cohort filtering * rename retention test * fix inits * change multitenancy tests * fix types * fix optional types * replace ch_client.execute with sync_execute * replace ch_client.execute with sync_execute, part 2 * Clickhouse Stickiness + Process Event (#1654) * generate clickhouse uuid script * set CLICKHOUSE_SECURE=False by default if running in TEST or DEBUG * convert person_id to UUID, make adding `person_id` optional, add distinct_ids already in the `create_person` function * Fix test_process_event_ee.py, remove all calls to Person.objects.* * add back util * fix broken imports * improve process_event test clickhouse queries * Basic stickiness query * Clickhouse Stickiness tests * stickiness test [WIP, actions fail] * generate clickhouse uuid script * change default test runner if PRIMARY_DB=clickhouse * fix stickiness test for actions * fix merge bug * remove _create_person stub; cohort person_id is UUID now * fix typing * Clickhouse trends process math (#1660) * most of process math works * all process math * fix ordering issue * unusued imports * update property comparison for process_event_ee * indentation wrong missing calls * demo users and events (#1661) * finish breakdown filtering tests and reformat label function * add increment to demo_data * update demo data populating * Add people endpoint for ch (#1670) * add people endpoint for ch * stickiness people * fix value padding * add process math to breakdown and * add limit * fix tests * condensed code * converted test to factory * add people tests * add month handling * add typing fix * change people test handling * fix tests * Clickhouse funnels 2 (#1668) * add elements to create_event * WIP closes #1663 Add funnels to clickhouse * Make funnels work * Clean up * Move filtering around * Add mypy tests and fix * Performance improvements * fix person tests again * add people for funnel endpoint * fix prop numbering Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Eric <eeoneric@gmail.com> * merge master * add retention * update types * more typing errors * fix types * bug with kafka payload, elements insert, and demo data * Clickhouse Paths (#1657) * paths clickhouse test (fails) * add elements to create_event * make this fail for clickhouse * hardcoded query that returns good results for $pageviews, no filters yet * clean up queries * bound by time, fix 30min new session boundary * support screen and custom events * add properties filter * paths url * filter by path start * better path start test * even better path start test * start from the first "path start" in a group * test for person_id in paths * partition by person_id for POSTGRES paths * partition by person_id for Clickhouse paths * clean up order in paths test * clean up order in paths test * join elements * force element order on element group creation * remove "order" when creating elements in tests and demo * get list of elements for paths * add limit to paths query * use materialized view * rename "element_hash" to "elements_hash" (no change in db) * cull rows that are definitely unused * simplify query * New highly optimized paths clickhouse query * start_point for $autocapture paths * extract event property values from clickhouse * prevent crash * select one element sql * get elements for event * remove lodash * remove host from $pageview path elements if same domain as incoming path * show metadata based on loaded paths filter, not in flight filter * fix order (all soures and targets in order, not all sources first, then all targets after) - makes for a better looking graph * add test that makes the Postgres paths query fail * fix postgres paths --> no fuzzy matching, breaks "starts with" for urls and gives too many incorrect start points * create automatic /demo urls that match the real urls (no ending /) * fix elements queries * path element joins * create persons via postgres in paths test * change serializers back to id * fix tests with uuid * fix demo * more bugs * fix type * change now to timezone aware * [clickhouse] retention filters (#1725) * implemented target entity and prop filtering * add insight view override * fix endpoint and filters * include tests * fix tests * add period filtering * . * fix pg param name * add filtering params to both queries in retention sql * fix param again * change to todatetime * change tz to timezone * add back timezone in model/event * [clickhouse] feature flag endpoint requests (#1731) * add feature flags to endpoints * add flags to endpoints that check on request * remove magic strings and fill in missing flags * fix types * add missing flag * change from iso * fix more timestamps and comparator * change _people to get_people in actions view * remove action and cohort populating * change inheritance * "Clickhouse Features V2 (#1565)" This reverts commit 0b371d43eca149cd3632410ea5653b85b2173e39. * fix types * change to super * change to super x2 Co-authored-by: Eric <eeoneric@gmail.com> Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Tim Glaser <tim.glaser@hiberly.com>
2020-09-29 16:17:26 +02:00
timestamp: Optional[Union[datetime, str]] = None,
2020-09-09 03:55:01 +02:00
properties: Optional[Dict] = {},
elements_hash: Optional[str] = "",
elements: Optional[List[Element]] = None,
) -> str:
2020-09-09 03:55:01 +02:00
if not timestamp:
timestamp = now()
2020-09-09 03:55:01 +02:00
# clickhouse specific formatting
if isinstance(timestamp, str):
timestamp = isoparse(timestamp)
else:
timestamp = timestamp.astimezone(pytz.utc)
if elements and not elements_hash:
elements_hash = create_elements(event_uuid=event_uuid, elements=elements, team=team)
data = {
"uuid": str(event_uuid),
"event": event,
"properties": json.dumps(properties),
"timestamp": timestamp.strftime("%Y-%m-%d %H:%M:%S.%f"),
"team_id": team.pk,
"distinct_id": distinct_id,
"elements_hash": elements_hash,
"created_at": timestamp.strftime("%Y-%m-%d %H:%M:%S.%f"),
}
p = ClickhouseProducer()
p.produce(sql=INSERT_EVENT_SQL, topic=KAFKA_EVENTS, data=data)
return str(event_uuid)
2020-09-09 03:55:01 +02:00
def get_events():
events = sync_execute(GET_EVENTS_SQL)
return ClickhouseEventSerializer(events, many=True, context={"elements": None, "people": None}).data
"Clickhouse Features V2 (#1565)" (#1750) * initial * migration command * migrations working * add modelless views for clickhouse * initial testing structure * use test factory * scaffold for all tests * add insight and person api * add basic readme * add client * change how migrations are run * add base tables * ingesting events * restore delay * remove print * updated testing flow * changed sessions tests * update tests * reorganized sql * parametrize strings * element list query * change to seralizer * add values endpoint * retrieve with filter * pruned code to prepare for staged merge * working ingestion again * tests for ee * undo unneeded tests right now * fix linting * more typing errors * fix tests * add clickhouse image to workflow * move to right job * remove django_clickhouse * return database url * run super * remove keepdb * reordered calls * fix type * fractional seconds * fix type error * add checks * remove retention sql * fix tests * add property storage and tests * merge master * fix tests * fix tests * . * remove keepdb * format python files * update CI env vars * Override defaults and insecure tests * Update how ClickHouse database gets evaluated * remove bootstrapping clickhouse database routine * Don't initialize the clickhouse connection unless we say it's primary * . * fixed id generation * remove dump * black settings * empty client * add param * move docker-compose for ch to ee dir * Add _public_ key to repo for verifying self signed cert on server * update ee compose file for ee dir * fix a few issues with tls in migrations * update migrations to be flexible about storage profile and engine * black settings * add elements prop tables * add elements prop tables * working filter * refactored * better url handling * add mapping table * add processing to worker task * working cohort with actions * add cohort property filtering * add cohort property filtering * reformat and add cohort processing * prop clauses * add util * add more util * add clickhouse modifier * Clickhouse Sessions (#1623) * sessions sql * skeleton * add endpoint * better tests * sessions list * merge clickhouse-actions * added session endpoint * sessions sql working again * add clickhouse modifier * session avg with props working * add dist * tests working (no list) * list working * add formatting * more formatting * fix tests * dummy commit * fix types * remove unnecessary improt * ignore type when importing from ee in task * fix test running * Clickhouse Trends Base (#1609) * initial working * date param almost working * fix date range and labels * fixed monthly math * handle compare * change table * using new event ingestion * direct query actions working * remove interface * fix date range * properties initial working * handle operator * handle operator * move timestamp parse * move more to util * inital breaking down working * working cohort breakdown * some tests running * fix sessions * cohort tests * action and interval test * reorder cohort filtering * rename retention test * fix inits * change multitenancy tests * fix types * fix optional types * replace ch_client.execute with sync_execute * replace ch_client.execute with sync_execute, part 2 * Clickhouse Stickiness + Process Event (#1654) * generate clickhouse uuid script * set CLICKHOUSE_SECURE=False by default if running in TEST or DEBUG * convert person_id to UUID, make adding `person_id` optional, add distinct_ids already in the `create_person` function * Fix test_process_event_ee.py, remove all calls to Person.objects.* * add back util * fix broken imports * improve process_event test clickhouse queries * Basic stickiness query * Clickhouse Stickiness tests * stickiness test [WIP, actions fail] * generate clickhouse uuid script * change default test runner if PRIMARY_DB=clickhouse * fix stickiness test for actions * fix merge bug * remove _create_person stub; cohort person_id is UUID now * fix typing * Clickhouse trends process math (#1660) * most of process math works * all process math * fix ordering issue * unusued imports * update property comparison for process_event_ee * indentation wrong missing calls * demo users and events (#1661) * finish breakdown filtering tests and reformat label function * add increment to demo_data * update demo data populating * Add people endpoint for ch (#1670) * add people endpoint for ch * stickiness people * fix value padding * add process math to breakdown and * add limit * fix tests * condensed code * converted test to factory * add people tests * add month handling * add typing fix * change people test handling * fix tests * Clickhouse funnels 2 (#1668) * add elements to create_event * WIP closes #1663 Add funnels to clickhouse * Make funnels work * Clean up * Move filtering around * Add mypy tests and fix * Performance improvements * fix person tests again * add people for funnel endpoint * fix prop numbering Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Eric <eeoneric@gmail.com> * merge master * add retention * update types * more typing errors * fix types * bug with kafka payload, elements insert, and demo data * Clickhouse Paths (#1657) * paths clickhouse test (fails) * add elements to create_event * make this fail for clickhouse * hardcoded query that returns good results for $pageviews, no filters yet * clean up queries * bound by time, fix 30min new session boundary * support screen and custom events * add properties filter * paths url * filter by path start * better path start test * even better path start test * start from the first "path start" in a group * test for person_id in paths * partition by person_id for POSTGRES paths * partition by person_id for Clickhouse paths * clean up order in paths test * clean up order in paths test * join elements * force element order on element group creation * remove "order" when creating elements in tests and demo * get list of elements for paths * add limit to paths query * use materialized view * rename "element_hash" to "elements_hash" (no change in db) * cull rows that are definitely unused * simplify query * New highly optimized paths clickhouse query * start_point for $autocapture paths * extract event property values from clickhouse * prevent crash * select one element sql * get elements for event * remove lodash * remove host from $pageview path elements if same domain as incoming path * show metadata based on loaded paths filter, not in flight filter * fix order (all soures and targets in order, not all sources first, then all targets after) - makes for a better looking graph * add test that makes the Postgres paths query fail * fix postgres paths --> no fuzzy matching, breaks "starts with" for urls and gives too many incorrect start points * create automatic /demo urls that match the real urls (no ending /) * fix elements queries * path element joins * create persons via postgres in paths test * change serializers back to id * fix tests with uuid * fix demo * more bugs * fix type * change now to timezone aware * [clickhouse] retention filters (#1725) * implemented target entity and prop filtering * add insight view override * fix endpoint and filters * include tests * fix tests * add period filtering * . * fix pg param name * add filtering params to both queries in retention sql * fix param again * change to todatetime * change tz to timezone * add back timezone in model/event * [clickhouse] feature flag endpoint requests (#1731) * add feature flags to endpoints * add flags to endpoints that check on request * remove magic strings and fill in missing flags * fix types * add missing flag * change from iso * fix more timestamps and comparator * change _people to get_people in actions view * remove action and cohort populating * change inheritance * "Clickhouse Features V2 (#1565)" This reverts commit 0b371d43eca149cd3632410ea5653b85b2173e39. * fix types * change to super * change to super x2 Co-authored-by: Eric <eeoneric@gmail.com> Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Tim Glaser <tim.glaser@hiberly.com>
2020-09-29 16:17:26 +02:00
def get_events_by_team(team_id: Union[str, int]):
events = sync_execute(GET_EVENTS_BY_TEAM_SQL, {"team_id": str(team_id)})
return ClickhouseEventSerializer(events, many=True, context={"elements": None, "people": None}).data
2020-09-09 03:55:01 +02:00
# reference raw sql for
class ClickhouseEventSerializer(serializers.Serializer):
"Clickhouse Features V2 (#1565)" (#1750) * initial * migration command * migrations working * add modelless views for clickhouse * initial testing structure * use test factory * scaffold for all tests * add insight and person api * add basic readme * add client * change how migrations are run * add base tables * ingesting events * restore delay * remove print * updated testing flow * changed sessions tests * update tests * reorganized sql * parametrize strings * element list query * change to seralizer * add values endpoint * retrieve with filter * pruned code to prepare for staged merge * working ingestion again * tests for ee * undo unneeded tests right now * fix linting * more typing errors * fix tests * add clickhouse image to workflow * move to right job * remove django_clickhouse * return database url * run super * remove keepdb * reordered calls * fix type * fractional seconds * fix type error * add checks * remove retention sql * fix tests * add property storage and tests * merge master * fix tests * fix tests * . * remove keepdb * format python files * update CI env vars * Override defaults and insecure tests * Update how ClickHouse database gets evaluated * remove bootstrapping clickhouse database routine * Don't initialize the clickhouse connection unless we say it's primary * . * fixed id generation * remove dump * black settings * empty client * add param * move docker-compose for ch to ee dir * Add _public_ key to repo for verifying self signed cert on server * update ee compose file for ee dir * fix a few issues with tls in migrations * update migrations to be flexible about storage profile and engine * black settings * add elements prop tables * add elements prop tables * working filter * refactored * better url handling * add mapping table * add processing to worker task * working cohort with actions * add cohort property filtering * add cohort property filtering * reformat and add cohort processing * prop clauses * add util * add more util * add clickhouse modifier * Clickhouse Sessions (#1623) * sessions sql * skeleton * add endpoint * better tests * sessions list * merge clickhouse-actions * added session endpoint * sessions sql working again * add clickhouse modifier * session avg with props working * add dist * tests working (no list) * list working * add formatting * more formatting * fix tests * dummy commit * fix types * remove unnecessary improt * ignore type when importing from ee in task * fix test running * Clickhouse Trends Base (#1609) * initial working * date param almost working * fix date range and labels * fixed monthly math * handle compare * change table * using new event ingestion * direct query actions working * remove interface * fix date range * properties initial working * handle operator * handle operator * move timestamp parse * move more to util * inital breaking down working * working cohort breakdown * some tests running * fix sessions * cohort tests * action and interval test * reorder cohort filtering * rename retention test * fix inits * change multitenancy tests * fix types * fix optional types * replace ch_client.execute with sync_execute * replace ch_client.execute with sync_execute, part 2 * Clickhouse Stickiness + Process Event (#1654) * generate clickhouse uuid script * set CLICKHOUSE_SECURE=False by default if running in TEST or DEBUG * convert person_id to UUID, make adding `person_id` optional, add distinct_ids already in the `create_person` function * Fix test_process_event_ee.py, remove all calls to Person.objects.* * add back util * fix broken imports * improve process_event test clickhouse queries * Basic stickiness query * Clickhouse Stickiness tests * stickiness test [WIP, actions fail] * generate clickhouse uuid script * change default test runner if PRIMARY_DB=clickhouse * fix stickiness test for actions * fix merge bug * remove _create_person stub; cohort person_id is UUID now * fix typing * Clickhouse trends process math (#1660) * most of process math works * all process math * fix ordering issue * unusued imports * update property comparison for process_event_ee * indentation wrong missing calls * demo users and events (#1661) * finish breakdown filtering tests and reformat label function * add increment to demo_data * update demo data populating * Add people endpoint for ch (#1670) * add people endpoint for ch * stickiness people * fix value padding * add process math to breakdown and * add limit * fix tests * condensed code * converted test to factory * add people tests * add month handling * add typing fix * change people test handling * fix tests * Clickhouse funnels 2 (#1668) * add elements to create_event * WIP closes #1663 Add funnels to clickhouse * Make funnels work * Clean up * Move filtering around * Add mypy tests and fix * Performance improvements * fix person tests again * add people for funnel endpoint * fix prop numbering Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Eric <eeoneric@gmail.com> * merge master * add retention * update types * more typing errors * fix types * bug with kafka payload, elements insert, and demo data * Clickhouse Paths (#1657) * paths clickhouse test (fails) * add elements to create_event * make this fail for clickhouse * hardcoded query that returns good results for $pageviews, no filters yet * clean up queries * bound by time, fix 30min new session boundary * support screen and custom events * add properties filter * paths url * filter by path start * better path start test * even better path start test * start from the first "path start" in a group * test for person_id in paths * partition by person_id for POSTGRES paths * partition by person_id for Clickhouse paths * clean up order in paths test * clean up order in paths test * join elements * force element order on element group creation * remove "order" when creating elements in tests and demo * get list of elements for paths * add limit to paths query * use materialized view * rename "element_hash" to "elements_hash" (no change in db) * cull rows that are definitely unused * simplify query * New highly optimized paths clickhouse query * start_point for $autocapture paths * extract event property values from clickhouse * prevent crash * select one element sql * get elements for event * remove lodash * remove host from $pageview path elements if same domain as incoming path * show metadata based on loaded paths filter, not in flight filter * fix order (all soures and targets in order, not all sources first, then all targets after) - makes for a better looking graph * add test that makes the Postgres paths query fail * fix postgres paths --> no fuzzy matching, breaks "starts with" for urls and gives too many incorrect start points * create automatic /demo urls that match the real urls (no ending /) * fix elements queries * path element joins * create persons via postgres in paths test * change serializers back to id * fix tests with uuid * fix demo * more bugs * fix type * change now to timezone aware * [clickhouse] retention filters (#1725) * implemented target entity and prop filtering * add insight view override * fix endpoint and filters * include tests * fix tests * add period filtering * . * fix pg param name * add filtering params to both queries in retention sql * fix param again * change to todatetime * change tz to timezone * add back timezone in model/event * [clickhouse] feature flag endpoint requests (#1731) * add feature flags to endpoints * add flags to endpoints that check on request * remove magic strings and fill in missing flags * fix types * add missing flag * change from iso * fix more timestamps and comparator * change _people to get_people in actions view * remove action and cohort populating * change inheritance * "Clickhouse Features V2 (#1565)" This reverts commit 0b371d43eca149cd3632410ea5653b85b2173e39. * fix types * change to super * change to super x2 Co-authored-by: Eric <eeoneric@gmail.com> Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Tim Glaser <tim.glaser@hiberly.com>
2020-09-29 16:17:26 +02:00
id = serializers.SerializerMethodField()
2020-09-09 03:55:01 +02:00
properties = serializers.SerializerMethodField()
event = serializers.SerializerMethodField()
timestamp = serializers.SerializerMethodField()
person = serializers.SerializerMethodField()
elements = serializers.SerializerMethodField()
elements_hash = serializers.SerializerMethodField()
"Clickhouse Features V2 (#1565)" (#1750) * initial * migration command * migrations working * add modelless views for clickhouse * initial testing structure * use test factory * scaffold for all tests * add insight and person api * add basic readme * add client * change how migrations are run * add base tables * ingesting events * restore delay * remove print * updated testing flow * changed sessions tests * update tests * reorganized sql * parametrize strings * element list query * change to seralizer * add values endpoint * retrieve with filter * pruned code to prepare for staged merge * working ingestion again * tests for ee * undo unneeded tests right now * fix linting * more typing errors * fix tests * add clickhouse image to workflow * move to right job * remove django_clickhouse * return database url * run super * remove keepdb * reordered calls * fix type * fractional seconds * fix type error * add checks * remove retention sql * fix tests * add property storage and tests * merge master * fix tests * fix tests * . * remove keepdb * format python files * update CI env vars * Override defaults and insecure tests * Update how ClickHouse database gets evaluated * remove bootstrapping clickhouse database routine * Don't initialize the clickhouse connection unless we say it's primary * . * fixed id generation * remove dump * black settings * empty client * add param * move docker-compose for ch to ee dir * Add _public_ key to repo for verifying self signed cert on server * update ee compose file for ee dir * fix a few issues with tls in migrations * update migrations to be flexible about storage profile and engine * black settings * add elements prop tables * add elements prop tables * working filter * refactored * better url handling * add mapping table * add processing to worker task * working cohort with actions * add cohort property filtering * add cohort property filtering * reformat and add cohort processing * prop clauses * add util * add more util * add clickhouse modifier * Clickhouse Sessions (#1623) * sessions sql * skeleton * add endpoint * better tests * sessions list * merge clickhouse-actions * added session endpoint * sessions sql working again * add clickhouse modifier * session avg with props working * add dist * tests working (no list) * list working * add formatting * more formatting * fix tests * dummy commit * fix types * remove unnecessary improt * ignore type when importing from ee in task * fix test running * Clickhouse Trends Base (#1609) * initial working * date param almost working * fix date range and labels * fixed monthly math * handle compare * change table * using new event ingestion * direct query actions working * remove interface * fix date range * properties initial working * handle operator * handle operator * move timestamp parse * move more to util * inital breaking down working * working cohort breakdown * some tests running * fix sessions * cohort tests * action and interval test * reorder cohort filtering * rename retention test * fix inits * change multitenancy tests * fix types * fix optional types * replace ch_client.execute with sync_execute * replace ch_client.execute with sync_execute, part 2 * Clickhouse Stickiness + Process Event (#1654) * generate clickhouse uuid script * set CLICKHOUSE_SECURE=False by default if running in TEST or DEBUG * convert person_id to UUID, make adding `person_id` optional, add distinct_ids already in the `create_person` function * Fix test_process_event_ee.py, remove all calls to Person.objects.* * add back util * fix broken imports * improve process_event test clickhouse queries * Basic stickiness query * Clickhouse Stickiness tests * stickiness test [WIP, actions fail] * generate clickhouse uuid script * change default test runner if PRIMARY_DB=clickhouse * fix stickiness test for actions * fix merge bug * remove _create_person stub; cohort person_id is UUID now * fix typing * Clickhouse trends process math (#1660) * most of process math works * all process math * fix ordering issue * unusued imports * update property comparison for process_event_ee * indentation wrong missing calls * demo users and events (#1661) * finish breakdown filtering tests and reformat label function * add increment to demo_data * update demo data populating * Add people endpoint for ch (#1670) * add people endpoint for ch * stickiness people * fix value padding * add process math to breakdown and * add limit * fix tests * condensed code * converted test to factory * add people tests * add month handling * add typing fix * change people test handling * fix tests * Clickhouse funnels 2 (#1668) * add elements to create_event * WIP closes #1663 Add funnels to clickhouse * Make funnels work * Clean up * Move filtering around * Add mypy tests and fix * Performance improvements * fix person tests again * add people for funnel endpoint * fix prop numbering Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Eric <eeoneric@gmail.com> * merge master * add retention * update types * more typing errors * fix types * bug with kafka payload, elements insert, and demo data * Clickhouse Paths (#1657) * paths clickhouse test (fails) * add elements to create_event * make this fail for clickhouse * hardcoded query that returns good results for $pageviews, no filters yet * clean up queries * bound by time, fix 30min new session boundary * support screen and custom events * add properties filter * paths url * filter by path start * better path start test * even better path start test * start from the first "path start" in a group * test for person_id in paths * partition by person_id for POSTGRES paths * partition by person_id for Clickhouse paths * clean up order in paths test * clean up order in paths test * join elements * force element order on element group creation * remove "order" when creating elements in tests and demo * get list of elements for paths * add limit to paths query * use materialized view * rename "element_hash" to "elements_hash" (no change in db) * cull rows that are definitely unused * simplify query * New highly optimized paths clickhouse query * start_point for $autocapture paths * extract event property values from clickhouse * prevent crash * select one element sql * get elements for event * remove lodash * remove host from $pageview path elements if same domain as incoming path * show metadata based on loaded paths filter, not in flight filter * fix order (all soures and targets in order, not all sources first, then all targets after) - makes for a better looking graph * add test that makes the Postgres paths query fail * fix postgres paths --> no fuzzy matching, breaks "starts with" for urls and gives too many incorrect start points * create automatic /demo urls that match the real urls (no ending /) * fix elements queries * path element joins * create persons via postgres in paths test * change serializers back to id * fix tests with uuid * fix demo * more bugs * fix type * change now to timezone aware * [clickhouse] retention filters (#1725) * implemented target entity and prop filtering * add insight view override * fix endpoint and filters * include tests * fix tests * add period filtering * . * fix pg param name * add filtering params to both queries in retention sql * fix param again * change to todatetime * change tz to timezone * add back timezone in model/event * [clickhouse] feature flag endpoint requests (#1731) * add feature flags to endpoints * add flags to endpoints that check on request * remove magic strings and fill in missing flags * fix types * add missing flag * change from iso * fix more timestamps and comparator * change _people to get_people in actions view * remove action and cohort populating * change inheritance * "Clickhouse Features V2 (#1565)" This reverts commit 0b371d43eca149cd3632410ea5653b85b2173e39. * fix types * change to super * change to super x2 Co-authored-by: Eric <eeoneric@gmail.com> Co-authored-by: Marius Andra <marius.andra@gmail.com> Co-authored-by: Tim Glaser <tim.glaser@hiberly.com>
2020-09-29 16:17:26 +02:00
def get_id(self, event):
2020-09-09 03:55:01 +02:00
return str(event[0])
def get_properties(self, event):
if len(event) >= 10 and event[8] and event[9]:
return dict(zip(event[8], event[9]))
else:
return json.loads(event[2])
2020-09-09 03:55:01 +02:00
def get_event(self, event):
return event[1]
def get_timestamp(self, event):
dt = event[3].replace(tzinfo=timezone.utc)
return dt.astimezone().isoformat()
def get_person(self, event):
if len(event) < 13:
return event[5]
props = json.loads(event[12])
return props.get("email", event[5])
2020-09-09 03:55:01 +02:00
def get_elements(self, event):
if not event[6] or not self.context["elements"] or event[6] not in self.context["elements"]:
return []
return self.context["elements"][event[6]]
2020-09-09 03:55:01 +02:00
def get_elements_hash(self, event):
return event[6]
def determine_event_conditions(conditions: Dict[str, Union[str, List[str]]]) -> Tuple[str, Dict]:
2020-09-09 03:55:01 +02:00
result = ""
params = {}
for idx, (k, v) in enumerate(conditions.items()):
if not isinstance(v, str):
continue
2020-09-09 03:55:01 +02:00
if k == "after":
timestamp = isoparse(v).strftime("%Y-%m-%d %H:%M:%S.%f")
2020-09-09 03:55:01 +02:00
result += "AND timestamp > %(after)s"
params.update({"after": timestamp})
2020-09-09 03:55:01 +02:00
elif k == "before":
timestamp = isoparse(v).strftime("%Y-%m-%d %H:%M:%S.%f")
2020-09-09 03:55:01 +02:00
result += "AND timestamp < %(before)s"
params.update({"before": timestamp})
elif k == "person_id":
result += """AND distinct_id IN (
SELECT distinct_id FROM person_distinct_id WHERE person_id = %(person_id)s AND team_id = %(team_id)s
)"""
params.update({"person_id": v})
elif k == "distinct_id":
result += "AND distinct_id = %(distinct_id)s"
params.update({"distinct_id": v})
2020-09-09 03:55:01 +02:00
return result, params