0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-12-01 12:21:02 +01:00
posthog/ee/benchmarks/benchmarks.py
Karl-Aksel Puulmann bf28d14aed
Add some benchmarks for stickiness (#7116)
* Add benchmarks for stickiness

* Don't require get_earliest_timestamp always

* Improve benchmark
2021-11-15 13:38:11 +02:00

341 lines
12 KiB
Python

# isort: skip_file
# Needs to be first to set up django environment
from .helpers import *
from datetime import timedelta
from typing import List, Tuple
from ee.clickhouse.materialized_columns import backfill_materialized_columns, get_materialized_columns, materialize
from ee.clickhouse.queries.clickhouse_stickiness import ClickhouseStickiness
from ee.clickhouse.queries.trends.clickhouse_trends import ClickhouseTrends
from ee.clickhouse.queries.funnels.funnel_correlation import FunnelCorrelation
from posthog.models import Action, ActionStep, Cohort, Team, Organization
from posthog.models.filters.stickiness_filter import StickinessFilter
from posthog.models.filters.filter import Filter
from posthog.models.property import PropertyName, TableWithProperties
from posthog.constants import FunnelCorrelationType
MATERIALIZED_PROPERTIES: List[Tuple[TableWithProperties, PropertyName]] = [
("events", "$host"),
("events", "$current_url"),
("events", "$event_type"),
("person", "email"),
("person", "$browser"),
]
DATE_RANGE = {"date_from": "2021-01-01", "date_to": "2021-10-01", "interval": "week"}
SHORT_DATE_RANGE = {"date_from": "2021-07-01", "date_to": "2021-10-01", "interval": "week"}
class QuerySuite:
timeout = 3000.0 # Timeout for the whole suite
version = "v001" # Version. Incrementing this will invalidate previous results
team: Team
cohort: Cohort
@benchmark_clickhouse
def track_trends_no_filter(self):
filter = Filter(data={"events": [{"id": "$pageview"}], **DATE_RANGE})
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_trends_event_property_filter(self):
filter = Filter(
data={
"events": [{"id": "$pageview"}],
"properties": [
{
"key": "$host",
"operator": "is_not",
"value": [
"localhost:8000",
"localhost:5000",
"127.0.0.1:8000",
"127.0.0.1:3000",
"localhost:3000",
],
}
],
**SHORT_DATE_RANGE,
}
)
with no_materialized_columns():
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_trends_event_property_filter_materialized(self):
filter = Filter(
data={
"events": [{"id": "$pageview"}],
"properties": [
{
"key": "$host",
"operator": "is_not",
"value": [
"localhost:8000",
"localhost:5000",
"127.0.0.1:8000",
"127.0.0.1:3000",
"localhost:3000",
],
}
],
**DATE_RANGE,
}
)
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_trends_person_property_filter(self):
filter = Filter(
data={
"events": [{"id": "$pageview"}],
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
**DATE_RANGE,
}
)
with no_materialized_columns():
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_trends_person_property_filter_materialized(self):
filter = Filter(
data={
"events": [{"id": "$pageview"}],
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
**DATE_RANGE,
}
)
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_trends_filter_by_cohort_precalculated(self):
self.cohort.last_calculation = now()
self.cohort.save()
filter = Filter(
data={
"events": [{"id": "$pageview"}],
"properties": [{"key": "id", "value": self.cohort.pk, "type": "cohort"}],
**DATE_RANGE,
},
team=self.team,
)
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_trends_filter_by_cohort(self):
self.cohort.last_calculation = None
self.cohort.save()
filter = Filter(
data={
"events": [{"id": "$pageview"}],
"properties": [{"key": "id", "value": self.cohort.pk, "type": "cohort"}],
**DATE_RANGE,
},
team=self.team,
)
with no_materialized_columns():
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_trends_filter_by_cohort_materialized(self):
self.cohort.last_calculation = None
self.cohort.save()
filter = Filter(
data={
"events": [{"id": "$pageview"}],
"properties": [{"key": "id", "value": self.cohort.pk, "type": "cohort"}],
**DATE_RANGE,
},
team=self.team,
)
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_trends_filter_by_action_current_url_materialized(self):
action = Action.objects.create(team=self.team, name="docs view")
ActionStep.objects.create(
action=action, event="$pageview", url="docs", url_matching="contains",
)
filter = Filter(data={"actions": [{"id": action.id}], **DATE_RANGE}, team=self.team)
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_trends_filter_by_action_current_url(self):
action = Action.objects.create(team=self.team, name="docs view")
ActionStep.objects.create(
action=action, event="$pageview", url="docs", url_matching="contains",
)
filter = Filter(data={"actions": [{"id": action.id}], **DATE_RANGE}, team=self.team)
with no_materialized_columns():
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_trends_filter_by_action_with_person_filters_materialized(self):
action = Action.objects.create(team=self.team, name=".com-users page views")
ActionStep.objects.create(
action=action,
event="$pageview",
properties=[{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
)
filter = Filter(data={"actions": [{"id": action.id}], **DATE_RANGE}, team=self.team)
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_trends_filter_by_action_with_person_filters(self):
action = Action.objects.create(team=self.team, name=".com-users page views")
ActionStep.objects.create(
action=action,
event="$pageview",
properties=[{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
)
filter = Filter(data={"actions": [{"id": action.id}], **DATE_RANGE}, team=self.team)
with no_materialized_columns():
ClickhouseTrends().run(filter, self.team)
@benchmark_clickhouse
def track_correlations_by_events(self):
filter = Filter(
data={"events": [{"id": "user signed up"}, {"id": "insight analyzed"}], **SHORT_DATE_RANGE,}, team=self.team
)
FunnelCorrelation(filter, self.team).run()
@benchmark_clickhouse
def track_correlations_by_properties_materialized(self):
filter = Filter(
data={
"events": [{"id": "user signed up"}, {"id": "insight analyzed"}],
**SHORT_DATE_RANGE,
"funnel_correlation_type": FunnelCorrelationType.PROPERTIES,
"funnel_correlation_names": ["$browser"],
},
team=self.team,
)
FunnelCorrelation(filter, self.team).run()
@benchmark_clickhouse
def track_correlations_by_properties(self):
filter = Filter(
data={
"events": [{"id": "user signed up"}, {"id": "insight analyzed"}],
**SHORT_DATE_RANGE,
"funnel_correlation_type": FunnelCorrelationType.PROPERTIES,
"funnel_correlation_names": ["$browser"],
},
team=self.team,
)
with no_materialized_columns():
FunnelCorrelation(filter, self.team).run()
@benchmark_clickhouse
def track_correlations_by_event_properties(self):
filter = Filter(
data={
"events": [{"id": "user signed up"}, {"id": "insight analyzed"}],
**SHORT_DATE_RANGE,
"funnel_correlation_type": FunnelCorrelationType.EVENT_WITH_PROPERTIES,
"funnel_correlation_event_names": ["$autocapture"],
},
team=self.team,
)
with no_materialized_columns():
FunnelCorrelation(filter, self.team).run()
@benchmark_clickhouse
def track_correlations_by_event_properties_materialized(self):
filter = Filter(
data={
"events": [{"id": "user signed up"}, {"id": "insight analyzed"}],
**SHORT_DATE_RANGE,
"funnel_correlation_type": FunnelCorrelationType.EVENT_WITH_PROPERTIES,
"funnel_correlation_event_names": ["$autocapture"],
},
team=self.team,
)
FunnelCorrelation(filter, self.team).run()
@benchmark_clickhouse
def track_stickiness(self):
filter = StickinessFilter(
data={
"insight": "STICKINESS",
"events": [{"id": "$pageview"}],
"shown_as": "Stickiness",
"display": "ActionsLineGraph",
**DATE_RANGE,
},
team=self.team,
)
ClickhouseStickiness().run(filter, self.team)
@benchmark_clickhouse
def track_stickiness_filter_by_person_property(self):
filter = StickinessFilter(
data={
"insight": "STICKINESS",
"events": [{"id": "$pageview"}],
"shown_as": "Stickiness",
"display": "ActionsLineGraph",
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
**DATE_RANGE,
},
team=self.team,
)
with no_materialized_columns():
ClickhouseStickiness().run(filter, self.team)
@benchmark_clickhouse
def track_stickiness_filter_by_person_property_materialized(self):
filter = StickinessFilter(
data={
"insight": "STICKINESS",
"events": [{"id": "$pageview"}],
"shown_as": "Stickiness",
"display": "ActionsLineGraph",
"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}],
**DATE_RANGE,
},
team=self.team,
)
ClickhouseStickiness().run(filter, self.team)
def setup(self):
for table, property in MATERIALIZED_PROPERTIES:
if property not in get_materialized_columns(table):
materialize(table, property)
backfill_materialized_columns(table, [property], backfill_period=timedelta(days=1_000))
# :TRICKY: Data in benchmark servers has ID=2
team = Team.objects.filter(id=2).first()
if team is None:
organization = Organization.objects.create()
team = Team.objects.create(id=2, organization=organization, name="The Bakery")
self.team = team
cohort = Cohort.objects.filter(name="benchmarking cohort").first()
if cohort is None:
cohort = Cohort.objects.create(
team_id=2,
name="benchmarking cohort",
groups=[{"properties": [{"key": "email", "operator": "icontains", "value": ".com", "type": "person"}]}],
)
cohort.calculate_people_ch()
self.cohort = cohort