mirror of
https://github.com/PostHog/posthog.git
synced 2024-11-25 11:17:50 +01:00
555 lines
19 KiB
Python
555 lines
19 KiB
Python
import pytest
|
|
from freezegun import freeze_time
|
|
|
|
from posthog.models.cohort import Cohort
|
|
from posthog.models.entity import Entity
|
|
from posthog.models.filters import Filter
|
|
from posthog.models.group.util import create_group
|
|
from posthog.models.group_type_mapping import GroupTypeMapping
|
|
from posthog.queries.breakdown_props import (
|
|
_to_bucketing_expression,
|
|
get_breakdown_prop_values,
|
|
)
|
|
from posthog.queries.trends.util import process_math
|
|
from posthog.test.base import (
|
|
APIBaseTest,
|
|
ClickhouseTestMixin,
|
|
_create_event,
|
|
_create_person,
|
|
also_test_with_materialized_columns,
|
|
snapshot_clickhouse_queries,
|
|
)
|
|
|
|
|
|
class TestBreakdownProps(ClickhouseTestMixin, APIBaseTest):
|
|
@also_test_with_materialized_columns(
|
|
event_properties=["$host", "distinct_id"],
|
|
person_properties=["$browser", "email"],
|
|
)
|
|
@snapshot_clickhouse_queries
|
|
def test_breakdown_person_props(self):
|
|
_create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"$browser": "test"})
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p1",
|
|
timestamp="2020-01-02T12:00:00Z",
|
|
properties={"key": "val"},
|
|
)
|
|
|
|
self.team.test_account_filters = [
|
|
{
|
|
"key": "email",
|
|
"type": "person",
|
|
"value": "posthog.com",
|
|
"operator": "not_icontains",
|
|
},
|
|
{
|
|
"key": "$host",
|
|
"type": "event",
|
|
"value": [
|
|
"127.0.0.1:3000",
|
|
"127.0.0.1:5000",
|
|
"localhost:5000",
|
|
"localhost:8000",
|
|
],
|
|
"operator": "is_not",
|
|
},
|
|
{
|
|
"key": "distinct_id",
|
|
"type": "event",
|
|
"value": "posthog.com",
|
|
"operator": "not_icontains",
|
|
},
|
|
]
|
|
self.team.save()
|
|
with freeze_time("2020-01-04T13:01:01Z"):
|
|
filter = Filter(
|
|
data={
|
|
"insight": "FUNNELS",
|
|
"properties": [],
|
|
"filter_test_accounts": True,
|
|
"events": [
|
|
{
|
|
"id": "$pageview",
|
|
"name": "$pageview",
|
|
"type": "events",
|
|
"order": 0,
|
|
}
|
|
],
|
|
"actions": [],
|
|
"funnel_viz_type": "steps",
|
|
"display": "FunnelViz",
|
|
"interval": "day",
|
|
"breakdown": "$browser",
|
|
"breakdown_type": "person",
|
|
"breakdown_limit": 5,
|
|
"date_from": "-14d",
|
|
"funnel_window_days": 14,
|
|
}
|
|
)
|
|
res = get_breakdown_prop_values(
|
|
filter,
|
|
Entity({"id": "$pageview", "type": "events"}),
|
|
"count(*)",
|
|
self.team,
|
|
)
|
|
self.assertEqual(res[0], ["test"])
|
|
|
|
def test_breakdown_person_props_with_entity_filter(self):
|
|
_create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"$browser": "test"})
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p1",
|
|
timestamp="2020-01-02T12:00:00Z",
|
|
properties={"key": "val"},
|
|
)
|
|
_create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"$browser": "test2"})
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p2",
|
|
timestamp="2020-01-02T12:00:00Z",
|
|
properties={"key": "val"},
|
|
)
|
|
|
|
cohort = Cohort.objects.create(
|
|
team=self.team,
|
|
name="a",
|
|
groups=[{"properties": [{"key": "$browser", "value": "test", "type": "person"}]}],
|
|
)
|
|
cohort.calculate_people_ch(pending_version=0)
|
|
|
|
entity_params = [
|
|
{
|
|
"id": "$pageview",
|
|
"name": "$pageview",
|
|
"type": "events",
|
|
"order": 0,
|
|
"properties": [{"key": "id", "value": cohort.pk, "type": "cohort"}],
|
|
}
|
|
]
|
|
with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True):
|
|
with freeze_time("2020-01-04T13:01:01Z"):
|
|
filter = Filter(
|
|
data={
|
|
"insight": "FUNNELS",
|
|
"properties": [],
|
|
"filter_test_accounts": False,
|
|
"events": entity_params,
|
|
"actions": [],
|
|
"funnel_viz_type": "steps",
|
|
"display": "FunnelViz",
|
|
"interval": "day",
|
|
"breakdown": "$browser",
|
|
"breakdown_type": "person",
|
|
"breakdown_limit": 5,
|
|
"date_from": "-14d",
|
|
"funnel_window_days": 14,
|
|
}
|
|
)
|
|
res = get_breakdown_prop_values(filter, Entity(entity_params[0]), "count(*)", self.team)
|
|
self.assertEqual(res[0], ["test"])
|
|
|
|
@snapshot_clickhouse_queries
|
|
def test_breakdown_person_props_with_entity_filter_and_or_props_with_partial_pushdown(self):
|
|
_create_person(
|
|
team_id=self.team.pk,
|
|
distinct_ids=["p1"],
|
|
properties={"$browser": "test", "$os": "test"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p1",
|
|
timestamp="2020-01-02T12:00:00Z",
|
|
properties={"key": "val"},
|
|
)
|
|
_create_person(
|
|
team_id=self.team.pk,
|
|
distinct_ids=["p2"],
|
|
properties={"$browser": "test2", "$os": "test2"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p2",
|
|
timestamp="2020-01-02T12:00:00Z",
|
|
properties={"key": "val2"},
|
|
)
|
|
_create_person(
|
|
team_id=self.team.pk,
|
|
distinct_ids=["p3"],
|
|
properties={"$browser": "test3", "$os": "test3"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p3",
|
|
timestamp="2020-01-02T12:00:00Z",
|
|
properties={"key": "val3"},
|
|
)
|
|
|
|
entity_params = [
|
|
{
|
|
"id": "$pageview",
|
|
"name": "$pageview",
|
|
"type": "events",
|
|
"order": 0,
|
|
"properties": [
|
|
{
|
|
"key": "$browser",
|
|
"type": "person",
|
|
"value": "test",
|
|
"operator": "icontains",
|
|
}
|
|
],
|
|
}
|
|
]
|
|
with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True):
|
|
with freeze_time("2020-01-04T13:01:01Z"):
|
|
filter = Filter(
|
|
data={
|
|
"insight": "FUNNELS",
|
|
"properties": {
|
|
"type": "OR",
|
|
"values": [
|
|
{
|
|
"key": "$os",
|
|
"type": "person",
|
|
"value": "test2",
|
|
"operator": "exact",
|
|
},
|
|
{
|
|
"key": "key",
|
|
"type": "event",
|
|
"value": "val",
|
|
"operator": "exact",
|
|
},
|
|
],
|
|
},
|
|
"filter_test_accounts": False,
|
|
"events": entity_params,
|
|
"actions": [],
|
|
"funnel_viz_type": "steps",
|
|
"display": "FunnelViz",
|
|
"interval": "day",
|
|
"breakdown": "$browser",
|
|
"breakdown_type": "person",
|
|
"breakdown_limit": 5,
|
|
"date_from": "-14d",
|
|
"funnel_window_days": 14,
|
|
}
|
|
)
|
|
res = sorted(get_breakdown_prop_values(filter, Entity(entity_params[0]), "count(*)", self.team)[0])
|
|
self.assertEqual(res, ["test", "test2"])
|
|
|
|
@snapshot_clickhouse_queries
|
|
def test_breakdown_group_props(self):
|
|
GroupTypeMapping.objects.create(
|
|
team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0
|
|
)
|
|
GroupTypeMapping.objects.create(
|
|
team=self.team, project_id=self.team.project_id, group_type="company", group_type_index=1
|
|
)
|
|
|
|
create_group(
|
|
team_id=self.team.pk,
|
|
group_type_index=0,
|
|
group_key="org:5",
|
|
properties={"industry": "finance"},
|
|
)
|
|
create_group(
|
|
team_id=self.team.pk,
|
|
group_type_index=0,
|
|
group_key="org:6",
|
|
properties={"industry": "technology"},
|
|
)
|
|
create_group(
|
|
team_id=self.team.pk,
|
|
group_type_index=0,
|
|
group_key="org:7",
|
|
properties={"industry": "finance"},
|
|
)
|
|
create_group(
|
|
team_id=self.team.pk,
|
|
group_type_index=0,
|
|
group_key="org:8",
|
|
properties={"industry": "another", "out": 1},
|
|
)
|
|
create_group(
|
|
team_id=self.team.pk,
|
|
group_type_index=1,
|
|
group_key="company:10",
|
|
properties={"industry": "foobar"},
|
|
)
|
|
# :TRICKY: Test group type overlapping
|
|
create_group(
|
|
team_id=self.team.pk,
|
|
group_type_index=1,
|
|
group_key="org:8",
|
|
properties={"industry": "foobar"},
|
|
)
|
|
|
|
for org_index in range(5, 9):
|
|
_create_event(
|
|
event="$pageview",
|
|
distinct_id="person1",
|
|
team=self.team,
|
|
properties={"$group_0": f"org:{org_index}"},
|
|
timestamp="2020-01-02T12:00:00Z",
|
|
)
|
|
|
|
filter = Filter(
|
|
data={
|
|
"date_from": "2020-01-01T00:00:00Z",
|
|
"date_to": "2020-01-12T00:00:00Z",
|
|
"breakdown": "industry",
|
|
"breakdown_type": "group",
|
|
"breakdown_group_type_index": 0,
|
|
"breakdown_limit": 5,
|
|
"events": [{"id": "$pageview", "type": "events", "order": 0}],
|
|
"properties": [
|
|
{
|
|
"key": "out",
|
|
"value": "",
|
|
"type": "group",
|
|
"group_type_index": 0,
|
|
"operator": "is_not_set",
|
|
}
|
|
],
|
|
},
|
|
team=self.team,
|
|
)
|
|
result = get_breakdown_prop_values(filter, filter.entities[0], "count(*)", self.team)
|
|
self.assertEqual(result[0], ["finance", "technology"])
|
|
|
|
filter = Filter(
|
|
data={
|
|
"date_from": "2020-01-01T00:00:00Z",
|
|
"date_to": "2020-01-12T00:00:00Z",
|
|
"breakdown": "industry",
|
|
"breakdown_type": "group",
|
|
"breakdown_group_type_index": 0,
|
|
"breakdown_limit": 5,
|
|
"events": [{"id": "$pageview", "type": "events", "order": 0}],
|
|
"properties": {
|
|
"type": "AND",
|
|
"values": [
|
|
{
|
|
"key": "out",
|
|
"value": "",
|
|
"type": "group",
|
|
"group_type_index": 0,
|
|
"operator": "is_not_set",
|
|
}
|
|
],
|
|
},
|
|
}
|
|
)
|
|
result = get_breakdown_prop_values(filter, filter.entities[0], "count(*)", self.team)
|
|
self.assertEqual(result[0], ["finance", "technology"])
|
|
|
|
@snapshot_clickhouse_queries
|
|
def test_breakdown_session_props(self):
|
|
_create_person(
|
|
team_id=self.team.pk,
|
|
distinct_ids=["p1"],
|
|
properties={"$browser": "test", "$os": "test"},
|
|
)
|
|
|
|
# 20 second session that starts before the time range
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p1",
|
|
timestamp="2020-01-01T23:59:50Z",
|
|
properties={"$session_id": "1"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p1",
|
|
timestamp="2020-01-02T00:00:10Z",
|
|
properties={"$session_id": "1"},
|
|
)
|
|
|
|
# 70 second session
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p1",
|
|
timestamp="2020-01-02T12:00:00Z",
|
|
properties={"$session_id": "2"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p1",
|
|
timestamp="2020-01-02T12:01:10Z",
|
|
properties={"$session_id": "2"},
|
|
)
|
|
|
|
filter = Filter(
|
|
data={
|
|
"date_from": "2020-01-02T00:00:00Z",
|
|
"date_to": "2020-01-12T00:00:00Z",
|
|
"breakdown": "$session_duration",
|
|
"breakdown_type": "session",
|
|
"events": [{"id": "$pageview", "type": "events", "order": 0}],
|
|
}
|
|
)
|
|
result = get_breakdown_prop_values(filter, filter.entities[0], "count(*)", self.team)
|
|
self.assertEqual(result[0], [70, 20])
|
|
|
|
@snapshot_clickhouse_queries
|
|
def test_breakdown_with_math_property_session(self):
|
|
_create_person(
|
|
team_id=self.team.pk,
|
|
distinct_ids=["p1"],
|
|
properties={"$browser": "test", "$os": "test"},
|
|
)
|
|
_create_person(
|
|
team_id=self.team.pk,
|
|
distinct_ids=["p2"],
|
|
properties={"$browser": "mac", "$os": "test"},
|
|
)
|
|
|
|
# 20 second session that starts before the time range
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p1",
|
|
timestamp="2020-01-01T23:59:50Z",
|
|
properties={"$session_id": "1"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p1",
|
|
timestamp="2020-01-02T00:00:10Z",
|
|
properties={"$session_id": "1"},
|
|
)
|
|
|
|
# 70 second session
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p1",
|
|
timestamp="2020-01-02T12:00:00Z",
|
|
properties={"$session_id": "2"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p1",
|
|
timestamp="2020-01-02T12:01:10Z",
|
|
properties={"$session_id": "2"},
|
|
)
|
|
|
|
# 10 second session for second person with different browser, but more absolute
|
|
# events than first person
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p2",
|
|
timestamp="2020-01-02T12:00:00Z",
|
|
properties={"$session_id": "3"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p2",
|
|
timestamp="2020-01-02T12:00:01Z",
|
|
properties={"$session_id": "3"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p2",
|
|
timestamp="2020-01-02T12:00:02Z",
|
|
properties={"$session_id": "3"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p2",
|
|
timestamp="2020-01-02T12:00:03Z",
|
|
properties={"$session_id": "3"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p2",
|
|
timestamp="2020-01-02T12:00:04Z",
|
|
properties={"$session_id": "3"},
|
|
)
|
|
_create_event(
|
|
team=self.team,
|
|
event="$pageview",
|
|
distinct_id="p2",
|
|
timestamp="2020-01-02T12:00:10Z",
|
|
properties={"$session_id": "3"},
|
|
)
|
|
|
|
filter = Filter(
|
|
data={
|
|
"date_from": "2020-01-02T00:00:00Z",
|
|
"date_to": "2020-01-12T00:00:00Z",
|
|
"breakdown": "$browser",
|
|
"breakdown_type": "person",
|
|
"events": [
|
|
{
|
|
"id": "$pageview",
|
|
"type": "events",
|
|
"order": 0,
|
|
"math": "sum",
|
|
"math_property": "$session_duration",
|
|
}
|
|
],
|
|
}
|
|
)
|
|
aggregate_operation, _, _ = process_math(filter.entities[0], self.team, filter=filter)
|
|
|
|
result = get_breakdown_prop_values(filter, filter.entities[0], aggregate_operation, self.team)
|
|
# test should come first, based on aggregate operation, even if absolute count of events for
|
|
# mac is higher
|
|
self.assertEqual(result[0], ["test", "mac"])
|
|
|
|
result = get_breakdown_prop_values(filter, filter.entities[0], "count(*)", self.team)
|
|
self.assertEqual(result[0], ["mac", "test"])
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"test_input,expected",
|
|
[
|
|
(0, "arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0,1)(value)))"),
|
|
(1, "arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0,1)(value)))"),
|
|
(
|
|
2,
|
|
"arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0.00,0.50,1.00)(value)))",
|
|
),
|
|
(
|
|
3,
|
|
"arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0.00,0.33,0.67,1.00)(value)))",
|
|
),
|
|
(
|
|
5,
|
|
"arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0.00,0.20,0.40,0.60,0.80,1.00)(value)))",
|
|
),
|
|
(
|
|
7,
|
|
"arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0.00,0.14,0.29,0.43,0.57,0.71,0.86,1.00)(value)))",
|
|
),
|
|
(
|
|
10,
|
|
"arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0.00,0.10,0.20,0.30,0.40,0.50,0.60,0.70,0.80,0.90,1.00)(value)))",
|
|
),
|
|
],
|
|
)
|
|
def test_bucketing_expression(test_input, expected):
|
|
result = _to_bucketing_expression(test_input)
|
|
|
|
assert result == expected
|