0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-12-01 12:21:02 +01:00
posthog/ee/clickhouse/queries/funnels/funnel_trends.py
Eric Duong 41aa793ce3
Funnel step query new (#4851)
* wip: pagination for persons on clickhouse funnels

* wip: added offset support for getting a list of persons; added support for conversion window;

* fixed mypy exception

* helper function to insert data for local testing

* moved generate code into separate class for more functionality later

* corrected person_distinct_id to use the person id from postgres

* minor corrections to generate local class along with addition of data cleanup via destroy() method

* reduce the number of persons who make it to each step

* moved funnel queries to a new folder for better organization; separated funnel_persons and funnel_trends_persons into individual classes;

* funnel persons and tests

* initial implementation

* invoke the funnel or funnel trends class respectively

* add a test

* add breakdown handling and first test

* add test stubs

* remove repeats

* mypy corrections and PR feedback

* run funnel test suite on new query implementation

* remove imports

* corrected tests

* minor test updates

* correct func name

* fix types

* func name change

* move builder functions to funnel base

* add test classe for new funnel

* Handle multiple same events in the funnel (#4863)

* dedup + tests

* deep equality. Tests to come

* write test for entity equality

* finish testing funnels

* clean up comments

Co-authored-by: Buddy Williams <buddy@posthog.com>
Co-authored-by: Neil Kakkar <neilkakkar@gmail.com>
2021-06-28 10:20:08 -04:00

133 lines
4.9 KiB
Python

from datetime import datetime, timedelta
from ee.clickhouse.client import sync_execute
from ee.clickhouse.models.property import parse_prop_clauses
from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase
from ee.clickhouse.queries.util import get_time_diff, get_trunc_func_ch, parse_timestamps
from ee.clickhouse.sql.events import NULL_SQL_FUNNEL_TRENDS
from ee.clickhouse.sql.funnels.funnel_trend import FUNNEL_TREND_SQL
from ee.clickhouse.sql.person import GET_LATEST_PERSON_DISTINCT_ID_SQL
DAY_START = 0
TOTAL_COMPLETED_FUNNELS = 1
ALL_FUNNELS_ENTRIES = 2
PERSON_IDS = 3
TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"
HUMAN_READABLE_TIMESTAMP_FORMAT = "%a. %-d %b"
class ClickhouseFunnelTrends(ClickhouseFunnelBase):
def run(self):
if len(self._filter.entities) == 0:
return []
summary = self.perform_query()
ui_response = self._get_ui_response(summary)
return ui_response
def perform_query(self):
sql = self._configure_sql()
results = sync_execute(sql, self.params)
summary = self._summarize_data(results)
return summary
def _configure_sql(self):
funnel_trend_null_sql = self._get_funnel_trend_null_sql()
parsed_date_from, parsed_date_to, _ = self._get_dates()
prop_filters, _ = self._get_filters()
steps = self._get_steps()
step_count = len(steps)
interval_method = get_trunc_func_ch(self._filter.interval)
sql = FUNNEL_TREND_SQL.format(
team_id=self._team.pk,
steps=", ".join(steps),
step_count=step_count,
filters=prop_filters.replace("uuid IN", "events.uuid IN", 1),
parsed_date_from=parsed_date_from,
parsed_date_to=parsed_date_to,
within_time=self._filter.milliseconds_from_days(self._filter.funnel_window_days),
latest_distinct_id_sql=GET_LATEST_PERSON_DISTINCT_ID_SQL,
funnel_trend_null_sql=funnel_trend_null_sql,
interval_method=interval_method,
)
return sql
def _summarize_data(self, results):
total = 0
for result in results:
total += result[ALL_FUNNELS_ENTRIES]
out = []
for result in results:
percent_complete = round(result[TOTAL_COMPLETED_FUNNELS] / total * 100, 2)
record = {
"timestamp": result[DAY_START],
"completed_funnels": result[TOTAL_COMPLETED_FUNNELS],
"total": total,
"percent_complete": percent_complete,
"is_complete": self._determine_complete(result[DAY_START]),
"cohort": result[PERSON_IDS],
}
out.append(record)
return out
@staticmethod
def _get_ui_response(summary):
count = len(summary)
data = []
days = []
labels = []
for row in summary:
data.append(row["percent_complete"])
days.append(row["timestamp"].strftime(HUMAN_READABLE_TIMESTAMP_FORMAT))
labels.append(row["timestamp"].strftime(HUMAN_READABLE_TIMESTAMP_FORMAT))
return [{"count": count, "data": data, "days": days, "labels": labels,}]
def _get_funnel_trend_null_sql(self):
interval_annotation = get_trunc_func_ch(self._filter.interval)
num_intervals, seconds_in_interval, round_interval = get_time_diff(
self._filter.interval or "day", self._filter.date_from, self._filter.date_to, team_id=self._team.id
)
funnel_trend_null_sql = NULL_SQL_FUNNEL_TRENDS.format(
interval=interval_annotation,
seconds_in_interval=seconds_in_interval,
num_intervals=num_intervals,
date_to=self._filter.date_to.strftime("%Y-%m-%d %H:%M:%S"),
)
return funnel_trend_null_sql
def _get_dates(self):
return parse_timestamps(filter=self._filter, table="events.", team_id=self._team.pk)
def _get_filters(self):
prop_filters, prop_filter_params = parse_prop_clauses(
self._filter.properties,
self._team.pk,
prepend="global",
allow_denormalized_props=True,
filter_test_accounts=self._filter.filter_test_accounts,
)
self.params.update(prop_filter_params)
return prop_filters, prop_filter_params
def _get_steps(self):
return [self._build_step_query(entity, index) for index, entity in enumerate(self._filter.entities)]
def _determine_complete(self, timestamp):
# difference between current date and timestamp greater than window
now = datetime.utcnow().date()
days_to_subtract = self._filter.funnel_window_days * -1
delta = timedelta(days=days_to_subtract)
completed_end = now + delta
compare_timestamp = timestamp.date() if type(timestamp) is datetime else timestamp
is_incomplete = compare_timestamp > completed_end
return not is_incomplete
def get_query(self, format_properties):
pass