mirror of
https://github.com/PostHog/posthog.git
synced 2024-12-01 04:12:23 +01:00
522 lines
19 KiB
Python
522 lines
19 KiB
Python
# isort: skip_file
|
|
# Needs to be first to set up django environment
|
|
from .helpers import *
|
|
|
|
from datetime import timedelta
|
|
from typing import List, Tuple
|
|
|
|
from ee.clickhouse.queries.trends.lifecycle import ClickhouseLifecycle
|
|
from ee.clickhouse.materialized_columns import backfill_materialized_columns, get_materialized_columns, materialize
|
|
from ee.clickhouse.queries.stickiness.clickhouse_stickiness import ClickhouseStickiness
|
|
from ee.clickhouse.queries.funnels.funnel_correlation import FunnelCorrelation
|
|
from ee.clickhouse.queries.funnels import ClickhouseFunnel
|
|
from ee.clickhouse.queries.trends.clickhouse_trends import ClickhouseTrends
|
|
from ee.clickhouse.queries.session_recordings.clickhouse_session_recording_list import ClickhouseSessionRecordingList
|
|
from ee.clickhouse.queries.retention.clickhouse_retention import ClickhouseRetention
|
|
from ee.clickhouse.queries.util import get_earliest_timestamp
|
|
from posthog.models import Action, ActionStep, Cohort, Team, Organization
|
|
from posthog.models.filters.retention_filter import RetentionFilter
|
|
from posthog.models.filters.session_recordings_filter import SessionRecordingsFilter
|
|
from posthog.models.filters.stickiness_filter import StickinessFilter
|
|
from posthog.models.filters.filter import Filter
|
|
from posthog.models.property import PropertyName, TableWithProperties
|
|
from posthog.constants import FunnelCorrelationType
|
|
|
|
MATERIALIZED_PROPERTIES: List[Tuple[TableWithProperties, PropertyName]] = [
|
|
("events", "$host"),
|
|
("events", "$current_url"),
|
|
("events", "$event_type"),
|
|
("person", "email"),
|
|
("person", "$browser"),
|
|
]
|
|
|
|
DATE_RANGE = {"date_from": "2021-01-01", "date_to": "2021-10-01", "interval": "week"}
|
|
SHORT_DATE_RANGE = {"date_from": "2021-07-01", "date_to": "2021-10-01", "interval": "week"}
|
|
SESSIONS_DATE_RANGE = {"date_from": "2021-11-17", "date_to": "2021-11-22"}
|
|
|
|
|
|
class QuerySuite:
|
|
timeout = 3000.0 # Timeout for the whole suite
|
|
version = "v001" # Version. Incrementing this will invalidate previous results
|
|
|
|
team: Team
|
|
cohort: Cohort
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_no_filter(self):
|
|
filter = Filter(data={"events": [{"id": "$pageview"}], **DATE_RANGE})
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_event_property_filter(self):
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "$pageview"}],
|
|
"properties": [
|
|
{
|
|
"key": "$host",
|
|
"operator": "is_not",
|
|
"value": [
|
|
"localhost:8000",
|
|
"localhost:5000",
|
|
"127.0.0.1:8000",
|
|
"127.0.0.1:3000",
|
|
"localhost:3000",
|
|
],
|
|
}
|
|
],
|
|
**SHORT_DATE_RANGE,
|
|
}
|
|
)
|
|
|
|
with no_materialized_columns():
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_event_property_filter_materialized(self):
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "$pageview"}],
|
|
"properties": [
|
|
{
|
|
"key": "$host",
|
|
"operator": "is_not",
|
|
"value": [
|
|
"localhost:8000",
|
|
"localhost:5000",
|
|
"127.0.0.1:8000",
|
|
"127.0.0.1:3000",
|
|
"localhost:3000",
|
|
],
|
|
}
|
|
],
|
|
**DATE_RANGE,
|
|
}
|
|
)
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_person_property_filter(self):
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "$pageview"}],
|
|
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
|
|
**DATE_RANGE,
|
|
}
|
|
)
|
|
|
|
with no_materialized_columns():
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_person_property_filter_materialized(self):
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "$pageview"}],
|
|
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
|
|
**DATE_RANGE,
|
|
}
|
|
)
|
|
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_dau(self):
|
|
filter = Filter(data={"events": [{"id": "$pageview", "math": "dau"}], **DATE_RANGE,})
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_dau_person_property_filter(self):
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "$pageview", "math": "dau"}],
|
|
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
|
|
**DATE_RANGE,
|
|
}
|
|
)
|
|
|
|
with no_materialized_columns():
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_dau_person_property_filter_materialized(self):
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "$pageview", "math": "dau"}],
|
|
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
|
|
**DATE_RANGE,
|
|
}
|
|
)
|
|
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_filter_by_cohort_precalculated(self):
|
|
self.cohort.last_calculation = now()
|
|
self.cohort.save()
|
|
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "$pageview"}],
|
|
"properties": [{"key": "id", "value": self.cohort.pk, "type": "cohort"}],
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_filter_by_cohort(self):
|
|
self.cohort.last_calculation = None
|
|
self.cohort.save()
|
|
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "$pageview"}],
|
|
"properties": [{"key": "id", "value": self.cohort.pk, "type": "cohort"}],
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
|
|
with no_materialized_columns():
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_filter_by_cohort_materialized(self):
|
|
self.cohort.last_calculation = None
|
|
self.cohort.save()
|
|
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "$pageview"}],
|
|
"properties": [{"key": "id", "value": self.cohort.pk, "type": "cohort"}],
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_filter_by_action_current_url_materialized(self):
|
|
action = Action.objects.create(team=self.team, name="docs view")
|
|
ActionStep.objects.create(
|
|
action=action, event="$pageview", url="docs", url_matching="contains",
|
|
)
|
|
|
|
filter = Filter(data={"actions": [{"id": action.id}], **DATE_RANGE}, team=self.team)
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_filter_by_action_current_url(self):
|
|
action = Action.objects.create(team=self.team, name="docs view")
|
|
ActionStep.objects.create(
|
|
action=action, event="$pageview", url="docs", url_matching="contains",
|
|
)
|
|
|
|
filter = Filter(data={"actions": [{"id": action.id}], **DATE_RANGE}, team=self.team)
|
|
with no_materialized_columns():
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_filter_by_action_with_person_filters_materialized(self):
|
|
action = Action.objects.create(team=self.team, name=".com-users page views")
|
|
ActionStep.objects.create(
|
|
action=action,
|
|
event="$pageview",
|
|
properties=[{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
|
|
)
|
|
|
|
filter = Filter(data={"actions": [{"id": action.id}], **DATE_RANGE}, team=self.team)
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_trends_filter_by_action_with_person_filters(self):
|
|
action = Action.objects.create(team=self.team, name=".com-users page views")
|
|
ActionStep.objects.create(
|
|
action=action,
|
|
event="$pageview",
|
|
properties=[{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
|
|
)
|
|
|
|
filter = Filter(data={"actions": [{"id": action.id}], **DATE_RANGE}, team=self.team)
|
|
with no_materialized_columns():
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_funnel_normal(self):
|
|
filter = Filter(
|
|
data={
|
|
"insight": "FUNNELS",
|
|
"events": [{"id": "user signed up", "order": 0}, {"id": "insight analyzed", "order": 1}],
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
ClickhouseFunnel(filter, self.team).run()
|
|
|
|
@benchmark_clickhouse
|
|
def track_correlations_by_events(self):
|
|
filter = Filter(
|
|
data={"events": [{"id": "user signed up"}, {"id": "insight analyzed"}], **SHORT_DATE_RANGE,},
|
|
team=self.team,
|
|
)
|
|
|
|
FunnelCorrelation(filter, self.team).run()
|
|
|
|
@benchmark_clickhouse
|
|
def track_correlations_by_properties_materialized(self):
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "user signed up"}, {"id": "insight analyzed"}],
|
|
**SHORT_DATE_RANGE,
|
|
"funnel_correlation_type": FunnelCorrelationType.PROPERTIES,
|
|
"funnel_correlation_names": ["$browser"],
|
|
},
|
|
team=self.team,
|
|
)
|
|
FunnelCorrelation(filter, self.team).run()
|
|
|
|
@benchmark_clickhouse
|
|
def track_correlations_by_properties(self):
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "user signed up"}, {"id": "insight analyzed"}],
|
|
**SHORT_DATE_RANGE,
|
|
"funnel_correlation_type": FunnelCorrelationType.PROPERTIES,
|
|
"funnel_correlation_names": ["$browser"],
|
|
},
|
|
team=self.team,
|
|
)
|
|
with no_materialized_columns():
|
|
FunnelCorrelation(filter, self.team).run()
|
|
|
|
@benchmark_clickhouse
|
|
def track_correlations_by_event_properties(self):
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "user signed up"}, {"id": "insight analyzed"}],
|
|
**SHORT_DATE_RANGE,
|
|
"funnel_correlation_type": FunnelCorrelationType.EVENT_WITH_PROPERTIES,
|
|
"funnel_correlation_event_names": ["$autocapture"],
|
|
},
|
|
team=self.team,
|
|
)
|
|
with no_materialized_columns():
|
|
FunnelCorrelation(filter, self.team).run()
|
|
|
|
@benchmark_clickhouse
|
|
def track_correlations_by_event_properties_materialized(self):
|
|
filter = Filter(
|
|
data={
|
|
"events": [{"id": "user signed up"}, {"id": "insight analyzed"}],
|
|
**SHORT_DATE_RANGE,
|
|
"funnel_correlation_type": FunnelCorrelationType.EVENT_WITH_PROPERTIES,
|
|
"funnel_correlation_event_names": ["$autocapture"],
|
|
},
|
|
team=self.team,
|
|
)
|
|
FunnelCorrelation(filter, self.team).run()
|
|
|
|
@benchmark_clickhouse
|
|
def track_stickiness(self):
|
|
filter = StickinessFilter(
|
|
data={
|
|
"insight": "STICKINESS",
|
|
"events": [{"id": "$pageview"}],
|
|
"shown_as": "Stickiness",
|
|
"display": "ActionsLineGraph",
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
|
|
ClickhouseStickiness().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_stickiness_filter_by_person_property(self):
|
|
filter = StickinessFilter(
|
|
data={
|
|
"insight": "STICKINESS",
|
|
"events": [{"id": "$pageview"}],
|
|
"shown_as": "Stickiness",
|
|
"display": "ActionsLineGraph",
|
|
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
|
|
with no_materialized_columns():
|
|
ClickhouseStickiness().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_stickiness_filter_by_person_property_materialized(self):
|
|
filter = StickinessFilter(
|
|
data={
|
|
"insight": "STICKINESS",
|
|
"events": [{"id": "$pageview"}],
|
|
"shown_as": "Stickiness",
|
|
"display": "ActionsLineGraph",
|
|
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
|
|
ClickhouseStickiness().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_session_recordings_list(self):
|
|
filter = SessionRecordingsFilter(data=SESSIONS_DATE_RANGE, team=self.team,)
|
|
|
|
ClickhouseSessionRecordingList(filter, self.team.pk).run()
|
|
|
|
@benchmark_clickhouse
|
|
def track_session_recordings_list_event_filter(self):
|
|
filter = SessionRecordingsFilter(data={"events": [{"id": "$pageview"}], **SESSIONS_DATE_RANGE}, team=self.team,)
|
|
|
|
ClickhouseSessionRecordingList(filter, self.team.pk).run()
|
|
|
|
@benchmark_clickhouse
|
|
def track_session_recordings_list_person_property_filter(self):
|
|
filter = SessionRecordingsFilter(
|
|
data={
|
|
"events": [
|
|
{
|
|
"id": "$pageview",
|
|
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
|
|
}
|
|
],
|
|
**SESSIONS_DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
|
|
ClickhouseSessionRecordingList(filter, self.team.pk).run()
|
|
|
|
@benchmark_clickhouse
|
|
def track_retention(self):
|
|
filter = RetentionFilter(
|
|
data={
|
|
"insight": "RETENTION",
|
|
"target_event": {"id": "$pageview"},
|
|
"returning_event": {"id": "$pageview"},
|
|
"total_intervals": 14,
|
|
"retention_type": "retention_first_time",
|
|
"period": "Week",
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
|
|
ClickhouseRetention().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_retention_with_person_breakdown(self):
|
|
filter = RetentionFilter(
|
|
data={
|
|
"insight": "RETENTION",
|
|
"target_event": {"id": "$pageview"},
|
|
"returning_event": {"id": "$pageview"},
|
|
"total_intervals": 14,
|
|
"retention_type": "retention_first_time",
|
|
"breakdown_type": "person",
|
|
"breakdowns": [
|
|
{"type": "person", "property": "$browser"},
|
|
{"type": "person", "property": "$browser_version"},
|
|
],
|
|
"period": "Week",
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
|
|
with no_materialized_columns():
|
|
ClickhouseRetention().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_retention_filter_by_person_property(self):
|
|
filter = RetentionFilter(
|
|
data={
|
|
"insight": "RETENTION",
|
|
"target_event": {"id": "$pageview"},
|
|
"returning_event": {"id": "$pageview"},
|
|
"total_intervals": 14,
|
|
"retention_type": "retention_first_time",
|
|
"period": "Week",
|
|
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
|
|
with no_materialized_columns():
|
|
ClickhouseRetention().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_retention_filter_by_person_property_materialized(self):
|
|
filter = RetentionFilter(
|
|
data={
|
|
"insight": "RETENTION",
|
|
"target_event": {"id": "$pageview"},
|
|
"returning_event": {"id": "$pageview"},
|
|
"total_intervals": 14,
|
|
"retention_type": "retention_first_time",
|
|
"period": "Week",
|
|
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
|
|
ClickhouseRetention().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_lifecycle(self):
|
|
filter = Filter(
|
|
data={
|
|
"insight": "LIFECYCLE",
|
|
"events": [{"id": "$pageview", "name": "$pageview", "type": "events", "order": 0, "math": "total"}],
|
|
"display": "ActionsLineGraph",
|
|
"interval": "week",
|
|
"shown_as": "Lifecycle",
|
|
"date_from": "-14d",
|
|
"properties": [],
|
|
"compare": False,
|
|
"filter_test_accounts": True,
|
|
**DATE_RANGE,
|
|
},
|
|
team=self.team,
|
|
)
|
|
|
|
ClickhouseTrends().run(filter, self.team)
|
|
|
|
@benchmark_clickhouse
|
|
def track_earliest_timestamp(self):
|
|
get_earliest_timestamp(2)
|
|
|
|
def setup(self):
|
|
for table, property in MATERIALIZED_PROPERTIES:
|
|
if property not in get_materialized_columns(table):
|
|
materialize(table, property)
|
|
backfill_materialized_columns(table, [property], backfill_period=timedelta(days=1_000))
|
|
|
|
# :TRICKY: Data in benchmark servers has ID=2
|
|
team = Team.objects.filter(id=2).first()
|
|
if team is None:
|
|
organization = Organization.objects.create()
|
|
team = Team.objects.create(id=2, organization=organization, name="The Bakery")
|
|
self.team = team
|
|
|
|
cohort = Cohort.objects.filter(name="benchmarking cohort").first()
|
|
if cohort is None:
|
|
cohort = Cohort.objects.create(
|
|
team_id=2,
|
|
name="benchmarking cohort",
|
|
groups=[{"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}]}],
|
|
)
|
|
cohort.calculate_people_ch()
|
|
self.cohort = cohort
|