0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-21 21:49:51 +01:00

fix: allow creating static cohorts for lifecycle (#16248)

Co-authored-by: psykomal <bkomalsai@gmail.com>
This commit is contained in:
Neil Kakkar 2023-06-28 14:57:47 +01:00 committed by GitHub
parent 9a8ca3310a
commit f922dae994
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 2297 additions and 1734 deletions

View File

@ -23,6 +23,7 @@
- Keep control over your data by deploying PostHog on your infrastructure
- Use apps to connect to external services and manage data flows
## Table of Contents
- [Get started for free](#get-started-for-free)

View File

@ -354,3 +354,167 @@ class TestClickhouseCalculateCohort(ClickhouseTestMixin, calculate_cohort_test_f
self.assertEqual(cohort.errors_calculating, 0)
self.assertEqual(people.count(), 1)
self.assertEqual(cohort.count, 1)
@patch("posthog.tasks.calculate_cohort.insert_cohort_from_insight_filter.delay")
def test_create_lifecycle_cohort(self, _insert_cohort_from_insight_filter):
def _create_events(data, event="$pageview"):
person_result = []
for id, timestamps in data:
with freeze_time(timestamps[0]):
person_result.append(
_create_person(
team_id=self.team.pk,
distinct_ids=[id],
properties={"name": id, **({"email": "test@posthog.com"} if id == "p1" else {})},
)
)
for timestamp in timestamps:
_create_event(team=self.team, event=event, distinct_id=id, timestamp=timestamp)
return person_result
people = _create_events(
data=[
(
"p1",
[
"2020-01-11T12:00:00Z",
"2020-01-12T12:00:00Z",
"2020-01-13T12:00:00Z",
"2020-01-15T12:00:00Z",
"2020-01-17T12:00:00Z",
"2020-01-19T12:00:00Z",
],
),
("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]),
("p3", ["2020-01-12T12:00:00Z"]),
("p4", ["2020-01-15T12:00:00Z"]),
]
)
query_params = {
"date_from": "2020-01-12T00:00:00Z",
"date_to": "2020-01-19T00:00:00Z",
"events": json.dumps([{"id": "$pageview", "type": "events", "order": 0}]),
"insight": "LIFECYCLE",
"interval": "day",
"shown_as": "Lifecycle",
"smoothing_intervals": 1,
"entity_id": "$pageview",
"entity_type": "events",
"entity_math": "total",
"target_date": "2020-01-13",
"entity_order": 0,
"lifecycle_type": "returning",
}
response = self.client.post(
f"/api/cohort/?{urllib.parse.urlencode(query_params)}",
data={"is_static": True, "name": "lifecycle_static_cohort_returning"},
).json()
cohort_id = response["id"]
_insert_cohort_from_insight_filter.assert_called_once_with(
cohort_id,
{
"date_from": "2020-01-12T00:00:00Z",
"date_to": "2020-01-19T00:00:00Z",
"events": '[{"id": "$pageview", "type": "events", "order": 0}]',
"insight": "LIFECYCLE",
"interval": "day",
"shown_as": "Lifecycle",
"smoothing_intervals": "1",
"entity_id": "$pageview",
"entity_type": "events",
"entity_math": "total",
"target_date": "2020-01-13",
"entity_order": "0",
"lifecycle_type": "returning",
},
)
insert_cohort_from_insight_filter(
cohort_id,
{
"date_from": "2020-01-12T00:00:00Z",
"date_to": "2020-01-19T00:00:00Z",
"events": [{"id": "$pageview", "type": "events", "order": 0}],
"insight": "LIFECYCLE",
"interval": "day",
"shown_as": "Lifecycle",
"smoothing_intervals": "1",
"entity_id": "$pageview",
"entity_type": "events",
"entity_math": "total",
"target_date": "2020-01-13",
"entity_order": "0",
"lifecycle_type": "returning",
},
)
cohort = Cohort.objects.get(pk=response["id"])
people_result = Person.objects.filter(cohort__id=cohort.pk).values_list("id", flat=True)
self.assertIn(people[0].id, people_result)
query_params = {
"date_from": "2020-01-12T00:00:00Z",
"date_to": "2020-01-19T00:00:00Z",
"events": json.dumps([{"id": "$pageview", "type": "events", "order": 0}]),
"insight": "LIFECYCLE",
"interval": "day",
"shown_as": "Lifecycle",
"smoothing_intervals": 1,
"entity_id": "$pageview",
"entity_type": "events",
"entity_math": "total",
"target_date": "2020-01-13",
"entity_order": 0,
"lifecycle_type": "dormant",
}
response = self.client.post(
f"/api/cohort/?{urllib.parse.urlencode(query_params)}",
data={"is_static": True, "name": "lifecycle_static_cohort_dormant"},
).json()
cohort_id = response["id"]
_insert_cohort_from_insight_filter.assert_called_with(
cohort_id,
{
"date_from": "2020-01-12T00:00:00Z",
"date_to": "2020-01-19T00:00:00Z",
"events": '[{"id": "$pageview", "type": "events", "order": 0}]',
"insight": "LIFECYCLE",
"interval": "day",
"shown_as": "Lifecycle",
"smoothing_intervals": "1",
"entity_id": "$pageview",
"entity_type": "events",
"entity_math": "total",
"target_date": "2020-01-13",
"entity_order": "0",
"lifecycle_type": "dormant",
},
)
self.assertEqual(_insert_cohort_from_insight_filter.call_count, 2)
insert_cohort_from_insight_filter(
cohort_id,
{
"date_from": "2020-01-12T00:00:00Z",
"date_to": "2020-01-19T00:00:00Z",
"events": [{"id": "$pageview", "type": "events", "order": 0}],
"insight": "LIFECYCLE",
"interval": "day",
"shown_as": "Lifecycle",
"smoothing_intervals": "1",
"entity_id": "$pageview",
"entity_type": "events",
"entity_math": "total",
"target_date": "2020-01-13",
"entity_order": "0",
"lifecycle_type": "dormant",
},
)
cohort = Cohort.objects.get(pk=response["id"])
self.assertEqual(cohort.count, 2)
people_result = Person.objects.filter(cohort__id=cohort.pk).values_list("id", flat=True)
self.assertCountEqual([people[1].id, people[2].id], people_result)

View File

@ -25,6 +25,7 @@ from posthog.client import sync_execute
from posthog.constants import (
CSV_EXPORT_LIMIT,
INSIGHT_FUNNELS,
INSIGHT_LIFECYCLE,
INSIGHT_PATHS,
INSIGHT_STICKINESS,
INSIGHT_TRENDS,
@ -38,6 +39,7 @@ from posthog.models.cohort import get_and_update_pending_version
from posthog.models.filters.filter import Filter
from posthog.models.filters.path_filter import PathFilter
from posthog.models.filters.stickiness_filter import StickinessFilter
from posthog.models.filters.lifecycle_filter import LifecycleFilter
from posthog.models.person.sql import INSERT_COHORT_ALL_PEOPLE_THROUGH_PERSON_ID, PERSON_STATIC_COHORT_TABLE
from posthog.permissions import ProjectMembershipNecessaryPermissions, TeamMemberAccessPermission
from posthog.queries.actor_base_query import ActorBaseQuery, get_people
@ -45,6 +47,7 @@ from posthog.queries.paths import PathsActors
from posthog.queries.person_query import PersonQuery
from posthog.queries.stickiness import StickinessActors
from posthog.queries.trends.trends_actors import TrendsActors
from posthog.queries.trends.lifecycle_actors import LifecycleActors
from posthog.queries.util import get_earliest_timestamp
from posthog.tasks.calculate_cohort import (
calculate_cohort_ch,
@ -118,7 +121,6 @@ class CohortSerializer(serializers.ModelSerializer):
calculate_cohort_from_list.delay(cohort.pk, distinct_ids_and_emails)
def validate_filters(self, request_filters: Dict):
if isinstance(request_filters, dict) and "properties" in request_filters:
if self.context["request"].method == "PATCH":
parsed_filter = Filter(data=request_filters)
@ -314,6 +316,9 @@ def insert_cohort_actors_into_ch(cohort: Cohort, filter_data: Dict):
elif insight_type == INSIGHT_PATHS:
path_filter = PathFilter(data=filter_data, team=cohort.team)
query_builder = PathsActors(path_filter, cohort.team, funnel_filter=None)
elif insight_type == INSIGHT_LIFECYCLE:
lifecycle_filter = LifecycleFilter(data=filter_data, team=cohort.team)
query_builder = LifecycleActors(team=cohort.team, filter=lifecycle_filter)
else:
if settings.DEBUG:
raise ValueError(f"Insight type: {insight_type} not supported for cohort creation")

View File

@ -38,6 +38,7 @@ from posthog.models.activity_logging.activity_log import Change, Detail, load_ac
from posthog.models.activity_logging.activity_page import activity_page_response
from posthog.models.async_deletion import AsyncDeletion, DeletionType
from posthog.models.cohort.util import get_all_cohort_ids_by_person_uuid
from posthog.models.filters.lifecycle_filter import LifecycleFilter
from posthog.models.filters.path_filter import PathFilter
from posthog.models.filters.properties_timeline_filter import PropertiesTimelineFilter
from posthog.models.filters.retention_filter import RetentionFilter
@ -61,12 +62,7 @@ from posthog.queries.util import get_earliest_timestamp
from posthog.rate_limit import ClickHouseBurstRateThrottle, ClickHouseSustainedRateThrottle
from posthog.settings import EE_AVAILABLE
from posthog.tasks.split_person import split_person
from posthog.utils import (
convert_property_value,
format_query_params_absolute_url,
is_anonymous_id,
relative_date_parse,
)
from posthog.utils import convert_property_value, format_query_params_absolute_url, is_anonymous_id
DEFAULT_PAGE_LIMIT = 100
# Sync with .../lib/constants.tsx and .../ingestion/hooks.ts
@ -156,7 +152,6 @@ def get_funnel_actor_class(filter: Filter) -> Callable:
funnel_actor_class: Type[ActorBaseQuery]
if filter.correlation_person_entity and EE_AVAILABLE:
if EE_AVAILABLE:
from ee.clickhouse.queries.funnels.funnel_correlation_persons import FunnelCorrelationActors
@ -339,7 +334,7 @@ class PersonViewSet(StructuredViewSetMixin, viewsets.ModelViewSet):
if key:
result = self._get_person_property_values_for_key(key, value)
for (value, count) in result:
for value, count in result:
try:
# Try loading as json for dicts or arrays
flattened.append({"name": convert_property_value(json.loads(value)), "count": count}) # type: ignore
@ -647,26 +642,23 @@ class PersonViewSet(StructuredViewSetMixin, viewsets.ModelViewSet):
status=400,
)
filter = Filter(request=request, team=self.team)
filter = prepare_actor_query_filter(filter)
target_date = request.GET.get("target_date", None)
if target_date is None:
return response.Response(
{"message": "Missing parameter", "detail": "Must include specified date"}, status=400
)
target_date_parsed = relative_date_parse(target_date)
lifecycle_type = request.GET.get("lifecycle_type", None)
if lifecycle_type is None:
return response.Response(
{"message": "Missing parameter", "detail": "Must include lifecycle type"}, status=400
)
filter = LifecycleFilter(request=request, data=request.GET.dict(), team=self.team)
filter = prepare_actor_query_filter(filter)
people = self.lifecycle_class().get_people(
target_date=target_date_parsed,
filter=filter,
team=team,
lifecycle_type=lifecycle_type,
)
next_url = paginated_result(request, len(people), filter.offset, filter.limit)
return response.Response({"results": [{"people": people, "count": len(people)}], "next": next_url})
@ -716,7 +708,7 @@ def paginated_result(
return format_paginated_url(request, offset, limit) if count >= limit else None
T = TypeVar("T", Filter, PathFilter, RetentionFilter, StickinessFilter)
T = TypeVar("T", Filter, PathFilter, RetentionFilter, LifecycleFilter, StickinessFilter)
def prepare_actor_query_filter(filter: T) -> T:

File diff suppressed because it is too large Load Diff

View File

@ -588,6 +588,12 @@
"key": "version",
"type": "integer"
}
],
"whatever": [
{
"key": "id",
"type": "string"
}
]
}
'
@ -1177,6 +1183,12 @@
"key": "version",
"type": "integer"
}
],
"whatever": [
{
"key": "id",
"type": "string"
}
]
}
'

View File

@ -0,0 +1,33 @@
import datetime
from typing import Any, Dict, Optional
from posthog.models import Filter
from posthog.utils import relative_date_parse
from rest_framework.request import Request
from rest_framework.exceptions import ValidationError
class LifecycleFilter(Filter):
target_date: Optional[datetime.datetime] = None
lifecycle_type: Optional[str] = None
def __init__(
self,
data: Optional[Dict[str, Any]] = None,
request: Optional[Request] = None,
**kwargs,
) -> None:
super().__init__(data, request, **kwargs)
if not data and not request:
raise ValidationError("Must include lifecycle type and target date")
if data:
target_date = data.get("target_date", None)
if target_date:
self.target_date = relative_date_parse(target_date)
if self.target_date is None:
raise ValidationError("Must include specified target date")
self.lifecycle_type = data.get("lifecycle_type", None)
if self.lifecycle_type is None:
raise ValidationError("Must include lifecycle type")

View File

@ -14,6 +14,7 @@
"posthog_team"."ingested_event",
"posthog_team"."autocapture_opt_out",
"posthog_team"."autocapture_exceptions_opt_in",
"posthog_team"."autocapture_exceptions_errors_to_ignore",
"posthog_team"."session_recording_opt_in",
"posthog_team"."capture_console_log_opt_in",
"posthog_team"."capture_performance_opt_in",
@ -62,6 +63,7 @@
"posthog_team"."ingested_event",
"posthog_team"."autocapture_opt_out",
"posthog_team"."autocapture_exceptions_opt_in",
"posthog_team"."autocapture_exceptions_errors_to_ignore",
"posthog_team"."session_recording_opt_in",
"posthog_team"."capture_console_log_opt_in",
"posthog_team"."capture_performance_opt_in",
@ -110,6 +112,7 @@
"posthog_team"."ingested_event",
"posthog_team"."autocapture_opt_out",
"posthog_team"."autocapture_exceptions_opt_in",
"posthog_team"."autocapture_exceptions_errors_to_ignore",
"posthog_team"."session_recording_opt_in",
"posthog_team"."capture_console_log_opt_in",
"posthog_team"."capture_performance_opt_in",
@ -158,6 +161,7 @@
"posthog_team"."ingested_event",
"posthog_team"."autocapture_opt_out",
"posthog_team"."autocapture_exceptions_opt_in",
"posthog_team"."autocapture_exceptions_errors_to_ignore",
"posthog_team"."session_recording_opt_in",
"posthog_team"."capture_console_log_opt_in",
"posthog_team"."capture_performance_opt_in",
@ -206,6 +210,7 @@
"posthog_team"."ingested_event",
"posthog_team"."autocapture_opt_out",
"posthog_team"."autocapture_exceptions_opt_in",
"posthog_team"."autocapture_exceptions_errors_to_ignore",
"posthog_team"."session_recording_opt_in",
"posthog_team"."capture_console_log_opt_in",
"posthog_team"."capture_performance_opt_in",

View File

@ -0,0 +1,67 @@
from posthog.models.filters.lifecycle_filter import LifecycleFilter
from posthog.test.base import BaseTest
from posthog.utils import relative_date_parse
class TestLifecycleFilter(BaseTest):
def test_filter_properties(self):
target_date = "2023-05-15"
lifecycle_type = "new"
filter = LifecycleFilter(
data={
"breakdown_attribution_type": "first_touch",
"breakdown_normalize_url": "False",
"date_from": "-14d",
"display": "ActionsLineGraph",
"events": '[{"id": "$pageview", "type": "events", "order": 0, "name": "$pageview", "custom_name": null, "math": "total", "math_property": null, "math_group_type_index": null, "properties": {}}]',
"insight": "LIFECYCLE",
"interval": "week",
"sampling_factor": "",
"shown_as": "Lifecycle",
"smoothing_intervals": "1",
"entity_id": "$pageview",
"entity_type": "events",
"entity_math": "total",
"target_date": target_date,
"entity_order": "0",
"lifecycle_type": lifecycle_type,
"cache_invalidation_key": "ZY7tZ2Ak",
"is_simplified": True,
},
team=self.team,
)
self.assertEqual(
filter.to_dict(),
{
"breakdown_attribution_type": "first_touch",
"breakdown_normalize_url": False,
"date_from": "-14d",
"display": "ActionsLineGraph",
"events": [
{
"id": "$pageview",
"type": "events",
"order": 0,
"name": "$pageview",
"custom_name": None,
"math": "total",
"math_property": None,
"math_hogql": None,
"math_group_type_index": None,
"properties": {},
}
],
"entity_id": "$pageview",
"entity_math": "total",
"entity_order": "0",
"entity_type": "events",
"insight": "LIFECYCLE",
"interval": "week",
"sampling_factor": "",
"shown_as": "Lifecycle",
"smoothing_intervals": 1,
},
)
self.assertEqual(filter.lifecycle_type, lifecycle_type)
self.assertEqual(filter.target_date, relative_date_parse(target_date))

View File

@ -4,6 +4,7 @@ from freezegun import freeze_time
from posthog.constants import FILTER_TEST_ACCOUNTS, TRENDS_LIFECYCLE
from posthog.models import Action, ActionStep, Filter
from posthog.models.filters.lifecycle_filter import LifecycleFilter
from posthog.models.instance_setting import get_instance_setting
from posthog.queries.trends.trends import Trends
from posthog.test.base import (
@ -13,7 +14,6 @@ from posthog.test.base import (
_create_person,
snapshot_clickhouse_queries,
)
from posthog.utils import relative_date_parse
def create_action(**kwargs):
@ -193,7 +193,6 @@ class TestLifecycle(ClickhouseTestMixin, APIBaseTest):
)
def test_lifecycle_trend_prop_filtering(self):
_create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"})
_create_event(
team=self.team,
@ -305,7 +304,6 @@ class TestLifecycle(ClickhouseTestMixin, APIBaseTest):
)
def test_lifecycle_trend_person_prop_filtering(self):
_create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "p1"})
_create_event(
team=self.team,
@ -427,7 +425,6 @@ class TestLifecycle(ClickhouseTestMixin, APIBaseTest):
)
def test_lifecycle_trend_people(self):
people = self._create_events(
data=[
(
@ -518,7 +515,6 @@ class TestLifecycle(ClickhouseTestMixin, APIBaseTest):
self.assertEqual(len(second_result["results"][0]["people"]), 50)
def test_lifecycle_trend_action(self):
self._create_events(
data=[
(
@ -740,19 +736,19 @@ class TestLifecycle(ClickhouseTestMixin, APIBaseTest):
)
Trends().get_people(
Filter(
LifecycleFilter(
data={
"target_date": "2020-01-13T00:00:00Z",
"date_from": "2020-01-12T00:00:00Z",
"date_to": "2020-01-19T00:00:00Z",
"events": [{"id": "$pageview", "type": "events", "order": 0}],
"shown_as": TRENDS_LIFECYCLE,
FILTER_TEST_ACCOUNTS: True,
"lifecycle_type": "dormant",
},
team=self.team,
),
self.team,
relative_date_parse("2020-01-13T00:00:00Z"),
"dormant",
)
@snapshot_clickhouse_queries

View File

@ -1,20 +1,16 @@
import urllib
from datetime import datetime
from typing import Any, Callable, Dict, List, Tuple
from django.db.models.query import Prefetch
from posthog.models.entity import Entity
from posthog.models.entity.util import get_entity_filtering_params
from posthog.models.filters import Filter
from posthog.models.filters.lifecycle_filter import LifecycleFilter
from posthog.models.filters.mixins.utils import cached_property
from posthog.models.person.util import get_persons_by_uuids
from posthog.models.team import Team
from posthog.queries.event_query import EventQuery
from posthog.queries.insight import insight_sync_execute
from posthog.queries.person_query import PersonQuery
from posthog.queries.query_date_range import QueryDateRange
from posthog.queries.trends.sql import LIFECYCLE_EVENTS_QUERY, LIFECYCLE_PEOPLE_SQL, LIFECYCLE_SQL
from posthog.queries.trends.sql import LIFECYCLE_EVENTS_QUERY, LIFECYCLE_SQL
from posthog.queries.trends.util import parse_response
from posthog.queries.util import get_person_properties_mode
from posthog.utils import PersonOnEventsMode, encode_get_request_params, generate_short_id
@ -58,32 +54,11 @@ class Lifecycle:
return _parse
def get_people(self, filter: Filter, team: Team, target_date: datetime, lifecycle_type: str):
event_query, event_params = LifecycleEventQuery(
team=team, filter=filter, person_on_events_mode=team.person_on_events_mode
).get_query()
def get_people(self, filter: LifecycleFilter, team: Team):
from posthog.queries.trends.lifecycle_actors import LifecycleActors
result = insight_sync_execute(
LIFECYCLE_PEOPLE_SQL.format(events_query=event_query, interval_expr=filter.interval),
{
**event_params,
**filter.hogql_context.values,
"status": lifecycle_type,
"target_date": target_date,
"offset": filter.offset,
"limit": filter.limit or 100,
},
query_type="lifecycle_people",
filter=filter,
team_id=team.pk,
)
people = get_persons_by_uuids(team=team, uuids=[p[0] for p in result])
people = people.prefetch_related(Prefetch("persondistinctid_set", to_attr="distinct_ids_cache"))
from posthog.api.person import PersonSerializer
serializer_context = {"get_team": lambda: team}
return PersonSerializer(people, context=serializer_context, many=True).data
_, serialized_actors, _ = LifecycleActors(filter=filter, team=team, limit_actors=True).get_actors()
return serialized_actors
def _get_persons_urls(self, filter: Filter, entity: Entity, times: List[str], status) -> List[Dict[str, Any]]:
persons_url = []

View File

@ -0,0 +1,40 @@
from typing import Dict, Optional, Tuple
from posthog.queries.actor_base_query import ActorBaseQuery
from posthog.queries.trends.lifecycle import LifecycleEventQuery
from posthog.models.filters.lifecycle_filter import LifecycleFilter
from posthog.queries.trends.sql import LIFECYCLE_PEOPLE_SQL
class LifecycleActors(ActorBaseQuery):
event_query_class = LifecycleEventQuery
_filter: LifecycleFilter
QUERY_TYPE = "lifecycle"
def actor_query(self, limit_actors: Optional[bool] = True) -> Tuple[str, Dict]:
events_query, event_params = self.event_query_class(
filter=self._filter,
team=self._team,
person_on_events_mode=self._team.person_on_events_mode,
).get_query()
lifecycle_type = self._filter.lifecycle_type
target_date = self._filter.target_date
return (
LIFECYCLE_PEOPLE_SQL.format(
events_query=events_query,
limit=f'{"LIMIT %(limit)s" if limit_actors else ""}',
offset=f'{"OFFSET %(offset)s" if limit_actors else ""}',
),
{
**event_params,
**self._filter.hogql_context.values,
"offset": self._filter.offset,
"limit": self._filter.limit,
"status": lifecycle_type,
"target_date": target_date,
},
)

View File

@ -498,10 +498,12 @@ FROM (
GROUP BY status
"""
LIFECYCLE_PEOPLE_SQL = """
SELECT person_id
SELECT DISTINCT person_id as actor_id
FROM ({events_query}) e
WHERE status = %(status)s
AND dateTrunc(%(interval)s, toDateTime(%(target_date)s, %(timezone)s)) = start_of_period
LIMIT %(limit)s OFFSET %(offset)s
{limit}
{offset}
"""