0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-12-01 12:21:02 +01:00

Add Support for Excluding Events from Funnels (#5104)

* finish for ordered

* clean up, more tests, docs

* ensure correct filter values get saved to cache
This commit is contained in:
Neil Kakkar 2021-07-15 13:42:40 +01:00 committed by GitHub
parent 861059b38c
commit 8a1bd1c80e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 515 additions and 24 deletions

View File

@ -13,12 +13,9 @@ from ee.clickhouse.queries.breakdown_props import (
get_breakdown_person_prop_values,
)
from ee.clickhouse.queries.funnels.funnel_event_query import FunnelEventQuery
from ee.clickhouse.queries.util import parse_timestamps
from ee.clickhouse.sql.funnels.funnel import FUNNEL_INNER_EVENT_STEPS_QUERY
from ee.clickhouse.sql.person import GET_LATEST_PERSON_DISTINCT_ID_SQL
from posthog.constants import FUNNEL_WINDOW_DAYS, TREND_FILTER_TYPE_ACTIONS
from posthog.models import Action, Entity, Filter, Team
from posthog.models.filters.mixins.funnel import FunnelWindowDaysMixin
from posthog.models import Entity, Filter, Team
from posthog.queries.funnel import Funnel
from posthog.utils import relative_date_parse
@ -94,6 +91,21 @@ class ClickhouseFunnelBase(ABC, Funnel):
if self._filter.breakdown and not self._filter.breakdown_type:
data.update({"breakdown_type": "event"})
for exclusion in self._filter.exclusions:
if exclusion.funnel_from_step is None or exclusion.funnel_to_step is None:
raise ValidationError("Exclusion event needs to define funnel steps")
if exclusion.funnel_from_step >= exclusion.funnel_to_step:
raise ValidationError("Exclusion event range is invalid. End of range should be greater than start.")
if exclusion.funnel_from_step >= len(self._filter.entities) - 1:
raise ValidationError(
"Exclusion event range is invalid. Start of range is greater than number of steps."
)
if exclusion.funnel_to_step > len(self._filter.entities) - 1:
raise ValidationError("Exclusion event range is invalid. End of range is greater than number of steps.")
self._filter = self._filter.with_data(data)
query = self.get_query()
@ -116,6 +128,9 @@ class ClickhouseFunnelBase(ABC, Funnel):
cols.append(f"step_{i}")
if i < level_index:
cols.append(f"latest_{i}")
for exclusion in self._filter.exclusions:
if exclusion.funnel_from_step + 1 == i:
cols.append(f"exclusion_latest_{exclusion.funnel_from_step}")
else:
duplicate_event = 0
if i > 0 and self._filter.entities[i].equals(self._filter.entities[i - 1]):
@ -123,15 +138,33 @@ class ClickhouseFunnelBase(ABC, Funnel):
cols.append(
f"min(latest_{i}) over (PARTITION by person_id {self._get_breakdown_prop()} ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND {duplicate_event} PRECEDING) latest_{i}"
)
for exclusion in self._filter.exclusions:
# exclusion starting at step i follows semantics of step i+1 in the query (since we're looking for exclusions after step i)
if exclusion.funnel_from_step + 1 == i:
cols.append(
f"min(exclusion_latest_{exclusion.funnel_from_step}) over (PARTITION by person_id {self._get_breakdown_prop()} ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) exclusion_latest_{exclusion.funnel_from_step}"
)
return ", ".join(cols)
def _get_comparison_at_step(self, index: int, level_index: int):
or_statements: List[str] = []
def _get_exclusion_condition(self):
if not self._filter.exclusions:
return ""
for i in range(level_index, index + 1):
or_statements.append(f"latest_{i} < latest_{level_index - 1}")
conditions = []
for exclusion in self._filter.exclusions:
from_time = f"latest_{exclusion.funnel_from_step}"
to_time = f"latest_{exclusion.funnel_to_step}"
exclusion_time = f"exclusion_latest_{exclusion.funnel_from_step}"
condition = (
f"if( {exclusion_time} > {from_time} AND {exclusion_time} < "
f"if(isNull({to_time}), {from_time} + INTERVAL {self._filter.funnel_window_days} DAY, {to_time}), 1, 0)"
)
conditions.append(condition)
return " OR ".join(or_statements)
if conditions:
return f", arraySum([{','.join(conditions)}]) as exclusion"
else:
return ""
def _get_sorting_condition(self, curr_index: int, max_steps: int):
@ -159,13 +192,19 @@ class ClickhouseFunnelBase(ABC, Funnel):
if skip_step_filter:
steps_conditions = "1=1"
else:
steps_conditions = self._get_steps_conditions(length=len(self._filter.entities))
steps_conditions = self._get_steps_conditions(length=len(entities_to_use))
all_step_cols: List[str] = []
for index, entity in enumerate(entities_to_use):
step_cols = self._get_step_col(entity, index, entity_name)
all_step_cols.extend(step_cols)
for entity in self._filter.exclusions:
step_cols = self._get_step_col(entity, entity.funnel_from_step, entity_name, "exclusion_")
# every exclusion entity has the form: exclusion_step_i & timestamp exclusion_latest_i
# where i is the starting step for exclusion on that entity
all_step_cols.extend(step_cols)
steps = ", ".join(all_step_cols)
select_prop = self._get_breakdown_select_prop()
@ -196,23 +235,28 @@ class ClickhouseFunnelBase(ABC, Funnel):
for index in range(length):
step_conditions.append(f"step_{index} = 1")
for entity in self._filter.exclusions:
step_conditions.append(f"exclusion_step_{entity.funnel_from_step} = 1")
return " OR ".join(step_conditions)
def _get_step_col(self, entity: Entity, index: int, entity_name: str) -> List[str]:
def _get_step_col(self, entity: Entity, index: int, entity_name: str, step_prefix: str = "") -> List[str]:
# step prefix is used to distinguish actual steps, and exclusion steps
# without the prefix, we get the same parameter binding for both, which borks things up
step_cols: List[str] = []
condition = self._build_step_query(entity, index, entity_name)
step_cols.append(f"if({condition}, 1, 0) as step_{index}")
step_cols.append(f"if(step_{index} = 1, timestamp, null) as latest_{index}")
condition = self._build_step_query(entity, index, entity_name, step_prefix)
step_cols.append(f"if({condition}, 1, 0) as {step_prefix}step_{index}")
step_cols.append(f"if({step_prefix}step_{index} = 1, timestamp, null) as {step_prefix}latest_{index}")
return step_cols
def _build_step_query(self, entity: Entity, index: int, entity_name: str) -> str:
def _build_step_query(self, entity: Entity, index: int, entity_name: str, step_prefix: str) -> str:
filters = self._build_filters(entity, index)
if entity.type == TREND_FILTER_TYPE_ACTIONS:
action = entity.get_action()
for action_step in action.steps.all():
self.params[entity_name].append(action_step.event)
action_query, action_params = format_action_filter(action, "{}_step_{}".format(entity_name, index))
action_query, action_params = format_action_filter(action, f"{entity_name}_{step_prefix}step_{index}")
if action_query == "":
return ""
@ -220,7 +264,7 @@ class ClickhouseFunnelBase(ABC, Funnel):
content_sql = "{actions_query} {filters}".format(actions_query=action_query, filters=filters,)
else:
self.params[entity_name].append(entity.id)
event_param_key = f"{entity_name}_event_{index}"
event_param_key = f"{entity_name}_{step_prefix}event_{index}"
self.params[event_param_key] = entity.id
content_sql = f"event = %({event_param_key})s {filters}"
return content_sql

View File

@ -5,6 +5,28 @@ from posthog.models.cohort import Cohort
class ClickhouseFunnel(ClickhouseFunnelBase):
"""
A basic ordered funnel.
## Query Intuition
We start with all events of interest (coming from the `FunnelEventQuery`). The query runs in different levels: at each
level, we first get the minimum timestamp of every event following the previous event. Then, we trickle up the levels, till we get to the top level,
which implies all events are sorted in increasing order.
Each level is a subquery.
## Exclusion Intuition
Event exclusion between steps means that if this specific event happened between two funnel steps, we disqualify the user, not showing them in the results.
To include event exclusions inside the funnel, the critical insight is that the exclusion is just like a parallel step to the funnel step that happens after
the exclusion start step.
For example, if we have a funnel with steps [1, 2, 3, 4] and we want to exclude events between step 2 and step 4, then the exclusion step semantics are just
like step 3 semantics. We want to find this event after step 2.
Since it's a parallel step, we don't need to add an extra level, we can reuse the existing levels.
See `get_comparison_cols` and `_get_partition_cols` for how this works.
Exclusion doesn't support duplicates like: steps [event 1, event 2], and excluding event 1 between steps 1 and 2.
"""
def get_query(self):
max_steps = len(self._filter.entities)
@ -82,21 +104,47 @@ class ClickhouseFunnel(ClickhouseFunnelBase):
else:
formatted_query = self._get_inner_event_query()
exclusion_clause = self._get_exclusion_condition()
return f"""
SELECT *, {self._get_sorting_condition(max_steps, max_steps)} AS steps {self._get_step_times(max_steps)} {self._get_breakdown_prop()} FROM (
SELECT *, {self._get_sorting_condition(max_steps, max_steps)} AS steps {exclusion_clause} {self._get_step_times(max_steps)} {self._get_breakdown_prop()} FROM (
{formatted_query}
) WHERE step_0 = 1
{'AND exclusion = 0' if exclusion_clause else ''}
SETTINGS allow_experimental_window_functions = 1
"""
def _get_comparison_at_step(self, index: int, level_index: int):
or_statements: List[str] = []
for i in range(level_index, index + 1):
or_statements.append(f"latest_{i} < latest_{level_index - 1}")
return " OR ".join(or_statements)
def get_comparison_cols(self, level_index: int, max_steps: int):
"""
level_index: The current smallest comparison step. Everything before
level index is already at the minimum ordered timestamps.
"""
cols: List[str] = []
for i in range(0, max_steps):
cols.append(f"step_{i}")
if i < level_index:
cols.append(f"latest_{i}")
for exclusion in self._filter.exclusions:
if exclusion.funnel_from_step + 1 == i:
cols.append(f"exclusion_latest_{exclusion.funnel_from_step}")
else:
comparison = self._get_comparison_at_step(i, level_index)
cols.append(f"if({comparison}, NULL, latest_{i}) as latest_{i}")
for exclusion in self._filter.exclusions:
if exclusion.funnel_from_step + 1 == i:
cols.append(
f"if(exclusion_latest_{exclusion.funnel_from_step} < latest_{exclusion.funnel_from_step}, NULL, exclusion_latest_{exclusion.funnel_from_step}) as exclusion_latest_{exclusion.funnel_from_step}"
)
return ", ".join(cols)
def build_step_subquery(self, level_index: int, max_steps: int):

View File

@ -1,6 +1,8 @@
import operator
from uuid import uuid4
from rest_framework.exceptions import ValidationError
from ee.clickhouse.client import sync_execute
from ee.clickhouse.models.event import create_event
from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnel
from ee.clickhouse.queries.funnels.funnel_persons import ClickhouseFunnelPersons
@ -125,6 +127,192 @@ class TestFunnel(ClickhouseTestMixin, funnel_test_factory(ClickhouseFunnel, _cre
self._get_people_at_step(filter, 2), [person1_stopped_after_two_signups.uuid],
)
def test_funnel_exclusions_full_window(self):
filters = {
"events": [
{"id": "user signed up", "type": "events", "order": 0},
{"id": "paid", "type": "events", "order": 1},
],
"insight": INSIGHT_FUNNELS,
"funnel_window_days": 14,
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-14 00:00:00",
"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 0, "funnel_to_step": 1},],
}
filter = Filter(data=filters)
funnel = ClickhouseFunnel(filter, self.team)
# event 1
person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person1", timestamp="2021-05-01 01:00:00")
_create_event(team=self.team, event="paid", distinct_id="person1", timestamp="2021-05-01 02:00:00")
# event 2
person2 = _create_person(distinct_ids=["person2"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person2", timestamp="2021-05-01 03:00:00")
_create_event(team=self.team, event="x", distinct_id="person2", timestamp="2021-05-01 03:30:00")
_create_event(team=self.team, event="paid", distinct_id="person2", timestamp="2021-05-01 04:00:00")
# event 3
person3 = _create_person(distinct_ids=["person3"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person3", timestamp="2021-05-01 05:00:00")
_create_event(team=self.team, event="paid", distinct_id="person3", timestamp="2021-05-01 06:00:00")
result = funnel.run()
self.assertEqual(len(result), 2)
self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 2)
self.assertEqual(len(result[0]["people"]), 2)
self.assertEqual(result[1]["name"], "paid")
self.assertEqual(result[1]["count"], 2)
self.assertEqual(len(result[1]["people"]), 2)
self.assertCountEqual(
self._get_people_at_step(filter, 1), [person1.uuid, person3.uuid],
)
self.assertCountEqual(
self._get_people_at_step(filter, 2), [person1.uuid, person3.uuid],
)
def test_advanced_funnel_exclusions_between_steps(self):
filters = {
"events": [
{"id": "user signed up", "type": "events", "order": 0},
{"id": "$pageview", "type": "events", "order": 1},
{"id": "insight viewed", "type": "events", "order": 2},
{"id": "invite teammate", "type": "events", "order": 3},
{"id": "pageview2", "type": "events", "order": 4},
],
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-14 00:00:00",
"insight": INSIGHT_FUNNELS,
"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 0, "funnel_to_step": 1},],
}
person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk)
# this dude is discarded when funnel_from_step = 1
# this dude is discarded when funnel_from_step = 2
# this dude is discarded when funnel_from_step = 3
_create_event(team=self.team, event="user signed up", distinct_id="person1", timestamp="2021-05-01 01:00:00")
_create_event(team=self.team, event="$pageview", distinct_id="person1", timestamp="2021-05-01 02:00:00")
_create_event(team=self.team, event="x", distinct_id="person1", timestamp="2021-05-01 03:00:00")
_create_event(team=self.team, event="insight viewed", distinct_id="person1", timestamp="2021-05-01 04:00:00")
_create_event(team=self.team, event="x", distinct_id="person1", timestamp="2021-05-01 04:30:00")
_create_event(team=self.team, event="invite teammate", distinct_id="person1", timestamp="2021-05-01 05:00:00")
_create_event(team=self.team, event="x", distinct_id="person1", timestamp="2021-05-01 05:30:00")
_create_event(team=self.team, event="pageview2", distinct_id="person1", timestamp="2021-05-01 06:00:00")
person2 = _create_person(distinct_ids=["person2"], team_id=self.team.pk)
# this dude is discarded when funnel_from_step = 2
# this dude is discarded when funnel_from_step = 3
_create_event(team=self.team, event="user signed up", distinct_id="person2", timestamp="2021-05-01 01:00:00")
_create_event(team=self.team, event="$pageview", distinct_id="person2", timestamp="2021-05-01 02:00:00")
_create_event(team=self.team, event="insight viewed", distinct_id="person2", timestamp="2021-05-01 04:00:00")
_create_event(team=self.team, event="x", distinct_id="person2", timestamp="2021-05-01 04:30:00")
_create_event(team=self.team, event="invite teammate", distinct_id="person2", timestamp="2021-05-01 05:00:00")
_create_event(team=self.team, event="x", distinct_id="person2", timestamp="2021-05-01 05:30:00")
_create_event(team=self.team, event="pageview2", distinct_id="person2", timestamp="2021-05-01 06:00:00")
person3 = _create_person(distinct_ids=["person3"], team_id=self.team.pk)
# this dude is discarded when funnel_from_step = 0
# this dude is discarded when funnel_from_step = 3
_create_event(team=self.team, event="user signed up", distinct_id="person3", timestamp="2021-05-01 01:00:00")
_create_event(team=self.team, event="x", distinct_id="person3", timestamp="2021-05-01 01:30:00")
_create_event(team=self.team, event="$pageview", distinct_id="person3", timestamp="2021-05-01 02:00:00")
_create_event(team=self.team, event="insight viewed", distinct_id="person3", timestamp="2021-05-01 04:00:00")
_create_event(team=self.team, event="invite teammate", distinct_id="person3", timestamp="2021-05-01 05:00:00")
_create_event(team=self.team, event="x", distinct_id="person3", timestamp="2021-05-01 05:30:00")
_create_event(team=self.team, event="pageview2", distinct_id="person3", timestamp="2021-05-01 06:00:00")
filter = Filter(data=filters)
funnel = ClickhouseFunnel(filter, self.team)
result = funnel.run()
self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 2)
self.assertEqual(len(result[0]["people"]), 2)
self.assertEqual(result[4]["count"], 2)
self.assertEqual(len(result[4]["people"]), 2)
self.assertCountEqual(
self._get_people_at_step(filter, 1), [person1.uuid, person2.uuid,],
)
filter = filter.with_data(
{"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 1, "funnel_to_step": 2}]}
)
funnel = ClickhouseFunnel(filter, self.team)
result = funnel.run()
self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 2)
self.assertEqual(len(result[0]["people"]), 2)
self.assertEqual(result[4]["count"], 2)
self.assertEqual(len(result[4]["people"]), 2)
self.assertCountEqual(
self._get_people_at_step(filter, 1), [person2.uuid, person3.uuid,],
)
filter = filter.with_data(
{"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 2, "funnel_to_step": 3}]}
)
funnel = ClickhouseFunnel(filter, self.team)
result = funnel.run()
self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 1)
self.assertEqual(len(result[0]["people"]), 1)
self.assertEqual(result[4]["count"], 1)
self.assertEqual(len(result[4]["people"]), 1)
self.assertCountEqual(
self._get_people_at_step(filter, 1), [person3.uuid,],
)
filter = filter.with_data(
{"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 3, "funnel_to_step": 4}]}
)
funnel = ClickhouseFunnel(filter, self.team)
result = funnel.run()
self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 0)
self.assertEqual(len(result[0]["people"]), 0)
self.assertEqual(result[4]["count"], 0)
self.assertEqual(len(result[4]["people"]), 0)
self.assertCountEqual(
self._get_people_at_step(filter, 1), [],
)
#  bigger step window
filter = filter.with_data(
{"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 1, "funnel_to_step": 3}]}
)
funnel = ClickhouseFunnel(filter, self.team)
result = funnel.run()
self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 1)
self.assertEqual(len(result[0]["people"]), 1)
self.assertEqual(result[4]["count"], 1)
self.assertEqual(len(result[4]["people"]), 1)
self.assertCountEqual(
self._get_people_at_step(filter, 1), [person3.uuid],
)
def test_advanced_funnel_with_repeat_steps(self):
filters = {
"events": [
@ -746,6 +934,153 @@ class TestFunnel(ClickhouseTestMixin, funnel_test_factory(ClickhouseFunnel, _cre
self.assertEqual(result[1]["average_conversion_time"], 6000)
self.assertEqual(result[2]["average_conversion_time"], 5400)
def test_funnel_exclusions_invalid_params(self):
filters = {
"events": [
{"id": "user signed up", "type": "events", "order": 0},
{"id": "paid", "type": "events", "order": 1},
],
"insight": INSIGHT_FUNNELS,
"funnel_window_days": 14,
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-14 00:00:00",
"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 1, "funnel_to_step": 1},],
}
filter = Filter(data=filters)
funnel = ClickhouseFunnel(filter, self.team)
self.assertRaises(ValidationError, funnel.run)
filter = filter.with_data(
{"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 1, "funnel_to_step": 2}]}
)
funnel = ClickhouseFunnel(filter, self.team)
self.assertRaises(ValidationError, funnel.run)
filter = filter.with_data(
{"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 2, "funnel_to_step": 1}]}
)
funnel = ClickhouseFunnel(filter, self.team)
self.assertRaises(ValidationError, funnel.run)
filter = filter.with_data(
{"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 0, "funnel_to_step": 2}]}
)
funnel = ClickhouseFunnel(filter, self.team)
self.assertRaises(ValidationError, funnel.run)
def test_funnel_exclusion_no_end_event(self):
filters = {
"events": [
{"id": "user signed up", "type": "events", "order": 0},
{"id": "paid", "type": "events", "order": 1},
],
"insight": INSIGHT_FUNNELS,
"funnel_window_days": 1,
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-14 00:00:00",
"exclusions": [{"id": "x", "type": "events", "funnel_from_step": 0, "funnel_to_step": 1},],
}
filter = Filter(data=filters)
funnel = ClickhouseFunnel(filter, self.team)
# event 1
person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person1", timestamp="2021-05-01 01:00:00")
_create_event(team=self.team, event="paid", distinct_id="person1", timestamp="2021-05-01 02:00:00")
# event 2
person2 = _create_person(distinct_ids=["person2"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person2", timestamp="2021-05-01 03:00:00")
_create_event(team=self.team, event="x", distinct_id="person2", timestamp="2021-05-01 03:30:00")
_create_event(team=self.team, event="paid", distinct_id="person2", timestamp="2021-05-01 04:00:00")
# event 3
person3 = _create_person(distinct_ids=["person3"], team_id=self.team.pk)
# should be discarded, even if nothing happened after x, since within conversion window
_create_event(team=self.team, event="user signed up", distinct_id="person3", timestamp="2021-05-01 05:00:00")
_create_event(team=self.team, event="x", distinct_id="person3", timestamp="2021-05-01 06:00:00")
# event 4 - outside conversion window
person4 = _create_person(distinct_ids=["person4"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person4", timestamp="2021-05-01 07:00:00")
_create_event(team=self.team, event="x", distinct_id="person4", timestamp="2021-05-02 08:00:00")
result = funnel.run()
self.assertEqual(len(result), 2)
self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 2)
self.assertEqual(len(result[0]["people"]), 2)
self.assertEqual(result[1]["name"], "paid")
self.assertEqual(result[1]["count"], 1)
self.assertEqual(len(result[1]["people"]), 1)
self.assertCountEqual(
self._get_people_at_step(filter, 1), [person1.uuid, person4.uuid],
)
self.assertCountEqual(
self._get_people_at_step(filter, 2), [person1.uuid],
)
def test_funnel_exclusions_with_actions(self):
sign_up_action = _create_action(
name="sign up",
team=self.team,
properties=[{"key": "key", "type": "event", "value": ["val"], "operator": "exact"}],
)
filters = {
"events": [
{"id": "user signed up", "type": "events", "order": 0},
{"id": "paid", "type": "events", "order": 1},
],
"insight": INSIGHT_FUNNELS,
"funnel_window_days": 14,
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-14 00:00:00",
"exclusions": [{"id": sign_up_action.id, "type": "actions", "funnel_from_step": 0, "funnel_to_step": 1},],
}
filter = Filter(data=filters)
funnel = ClickhouseFunnel(filter, self.team)
# event 1
person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person1", timestamp="2021-05-01 01:00:00")
_create_event(team=self.team, event="paid", distinct_id="person1", timestamp="2021-05-01 02:00:00")
# event 2
person2 = _create_person(distinct_ids=["person2"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person2", timestamp="2021-05-01 03:00:00")
_create_event(
team=self.team,
event="sign up",
distinct_id="person2",
properties={"key": "val"},
timestamp="2021-05-01 03:30:00",
)
_create_event(team=self.team, event="paid", distinct_id="person2", timestamp="2021-05-01 04:00:00")
# event 3
person3 = _create_person(distinct_ids=["person3"], team_id=self.team.pk)
_create_event(team=self.team, event="user signed up", distinct_id="person3", timestamp="2021-05-01 05:00:00")
_create_event(team=self.team, event="paid", distinct_id="person3", timestamp="2021-05-01 06:00:00")
result = funnel.run()
self.assertEqual(len(result), 2)
self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 2)
self.assertEqual(len(result[0]["people"]), 2)
self.assertEqual(result[1]["name"], "paid")
self.assertEqual(result[1]["count"], 2)
self.assertEqual(len(result[1]["people"]), 2)
self.assertCountEqual(
self._get_people_at_step(filter, 1), [person1.uuid, person3.uuid],
)
self.assertCountEqual(
self._get_people_at_step(filter, 2), [person1.uuid, person3.uuid],
)
def test_funnel_with_denormalised_properties(self):
filters = {
"events": [

View File

@ -352,3 +352,32 @@ class ClickhouseTestFunnelTypes(ClickhouseTestMixin, APIBaseTest):
self.assertEqual(response.status_code, 400)
self.assertEqual(response.json(), self.validation_error_response("Action ID 666 does not exist!"))
def test_funnel_basic_exclusions(self):
_create_person(distinct_ids=["1"], team=self.team)
_create_event(team=self.team, event="step one", distinct_id="1")
_create_event(team=self.team, event="step x", distinct_id="1")
_create_event(team=self.team, event="step two", distinct_id="1")
_create_person(distinct_ids=["2"], team=self.team)
_create_event(team=self.team, event="step one", distinct_id="2")
_create_event(team=self.team, event="step two", distinct_id="2")
response = self.client.post(
"/api/insight/funnel/",
{
"events": [
{"id": "step one", "type": "events", "order": 0},
{"id": "step two", "type": "events", "order": 1},
],
"exclusions": [{"id": "step x", "type": "events", "funnel_from_step": 0, "funnel_to_step": 1},],
"funnel_window_days": 14,
"insight": "funnels",
},
).json()
self.assertEqual(len(response["result"]), 2)
self.assertEqual(response["result"][0]["name"], "step one")
self.assertEqual(response["result"][1]["name"], "step two")
self.assertEqual(response["result"][0]["count"], 1)
self.assertEqual(response["result"][1]["count"], 1)

View File

@ -58,6 +58,7 @@ DATE_TO = "date_to"
ENTITIES = "entities"
ACTIONS = "actions"
EVENTS = "events"
EXCLUSIONS = "exclusions"
PROPERTIES = "properties"
SELECTOR = "selector"
INTERVAL = "interval"

View File

@ -1,9 +1,11 @@
import inspect
from typing import Any, Dict, Optional, Union
from rest_framework.exceptions import ValidationError
from posthog.constants import TREND_FILTER_TYPE_ACTIONS, TREND_FILTER_TYPE_EVENTS
from posthog.models.action import Action
from posthog.models.filters.mixins.funnel import FunnelFromToStepsMixin
from posthog.models.filters.mixins.property import PropertyMixin
@ -76,3 +78,23 @@ class Entity(PropertyMixin):
return Action.objects.get(id=self.id)
except:
raise ValidationError(f"Action ID {self.id} does not exist!")
class ExclusionEntity(Entity, FunnelFromToStepsMixin):
"""
Exclusion Entities represent Entities in Filter objects
with extra parameters for exclusion semantics.
"""
def __init__(self, data: Dict[str, Any]) -> None:
super().__init__(data)
def to_dict(self) -> Dict[str, Any]:
ret = super().to_dict()
for _, func in inspect.getmembers(self, inspect.ismethod):
if hasattr(func, "include_dict"): # provided by @include_dict decorator
ret.update(func())
return ret

View File

@ -17,6 +17,7 @@ from posthog.constants import (
DATE_TO,
DISPLAY,
EVENTS,
EXCLUSIONS,
FILTER_TEST_ACCOUNTS,
FORMULA,
INSIGHT,
@ -30,7 +31,7 @@ from posthog.constants import (
TREND_FILTER_TYPE_ACTIONS,
TREND_FILTER_TYPE_EVENTS,
)
from posthog.models.entity import Entity
from posthog.models.entity import Entity, ExclusionEntity
from posthog.models.filters.mixins.base import BaseParamMixin
from posthog.models.filters.mixins.utils import cached_property, include_dict
from posthog.utils import relative_date_parse, str_to_bool
@ -294,11 +295,22 @@ class EntitiesMixin(BaseParamMixin):
def events(self) -> List[Entity]:
return [entity for entity in self.entities if entity.type == TREND_FILTER_TYPE_EVENTS]
@cached_property
def exclusions(self) -> List[ExclusionEntity]:
_exclusions: List[ExclusionEntity] = []
if self._data.get(EXCLUSIONS):
exclusion_list = self._data.get(EXCLUSIONS, [])
if isinstance(exclusion_list, str):
exclusion_list = json.loads(exclusion_list)
_exclusions.extend([ExclusionEntity({**entity}) for entity in exclusion_list])
return _exclusions
@include_dict
def entities_to_dict(self):
return {
**({"events": [entity.to_dict() for entity in self.events]} if len(self.events) > 0 else {}),
**({"actions": [entity.to_dict() for entity in self.actions]} if len(self.actions) > 0 else {}),
**({"exclusions": [entity.to_dict() for entity in self.exclusions]} if len(self.exclusions) > 0 else {}),
}

View File

@ -27,22 +27,22 @@ from posthog.utils import relative_date_parse, str_to_bool
class FunnelFromToStepsMixin(BaseParamMixin):
@cached_property
def funnel_from_step(self) -> Optional[int]:
if self._data.get(FUNNEL_FROM_STEP):
if self._data.get(FUNNEL_FROM_STEP) is not None:
return int(self._data[FUNNEL_FROM_STEP])
return None
@cached_property
def funnel_to_step(self) -> Optional[int]:
if self._data.get(FUNNEL_TO_STEP):
if self._data.get(FUNNEL_TO_STEP) is not None:
return int(self._data[FUNNEL_TO_STEP])
return None
@include_dict
def funnel_from_to_steps_to_dict(self):
dict_part = {}
if self.funnel_from_step:
if self.funnel_from_step is not None:
dict_part[FUNNEL_FROM_STEP] = self.funnel_from_step
if self.funnel_to_step:
if self.funnel_to_step is not None:
dict_part[FUNNEL_TO_STEP] = self.funnel_to_step
return dict_part