diff --git a/ee/clickhouse/models/test/__snapshots__/test_property.ambr b/ee/clickhouse/models/test/__snapshots__/test_property.ambr index 97160d71faf..54971a81813 100644 --- a/ee/clickhouse/models/test/__snapshots__/test_property.ambr +++ b/ee/clickhouse/models/test/__snapshots__/test_property.ambr @@ -57,7 +57,7 @@ --- # name: test_parse_groups_persons_edge_case_with_single_filter ( - 'AND ( has(%(vglobalperson_0)s, replaceRegexpAll(JSONExtractRaw(person_props, %(kglobalperson_0)s), \'^"|"$\', \'\')))', + 'AND ( has(%(vglobalperson_0)s, "pmat_email"))', { 'kglobalperson_0': 'email', 'vglobalperson_0': [ @@ -119,7 +119,7 @@ --- # name: test_parse_prop_clauses_defaults.2 ( - 'AND ( has(%(vglobal_0)s, replaceRegexpAll(JSONExtractRaw(properties, %(kglobal_0)s), \'^"|"$\', \'\')) AND replaceRegexpAll(JSONExtractRaw(argMax(person.properties, _timestamp), %(kpersonquery_global_1)s), \'^"|"$\', \'\') ILIKE %(vpersonquery_global_1)s)', + 'AND ( has(%(vglobal_0)s, replaceRegexpAll(JSONExtractRaw(properties, %(kglobal_0)s), \'^"|"$\', \'\')) AND argMax(person."pmat_email", _timestamp) ILIKE %(vpersonquery_global_1)s)', { 'kglobal_0': 'event_prop', 'kpersonquery_global_1': 'email', diff --git a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation.ambr b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation.ambr index 9325b35581c..be689f8216e 100644 --- a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation.ambr +++ b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation.ambr @@ -2992,7 +2992,8 @@ timestamp, steps, final_timestamp, - first_timestamp + first_timestamp, + group0_properties FROM (SELECT aggregation_target, steps, @@ -3000,7 +3001,8 @@ median(step_1_conversion_time) step_1_median_conversion_time_inner , argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp + argMax(latest_0, steps) as first_timestamp, + any(group0_properties) as group0_properties FROM (SELECT aggregation_target, steps, @@ -3008,13 +3010,15 @@ step_1_conversion_time , latest_0, latest_1, - latest_0 + latest_0, + group0_properties FROM (SELECT *, if(latest_0 <= latest_1 AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time , + group0_properties FROM (SELECT aggregation_target, timestamp, @@ -3022,25 +3026,30 @@ latest_0, step_1, min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , + group0_properties FROM (SELECT aggregation_target, timestamp, if(event = 'user signed up', 1, 0) as step_0, if(step_0 = 1, timestamp, null) as latest_0, if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 + if(step_1 = 1, timestamp, null) as latest_1 , + group0_properties FROM (SELECT e.event as event, e.team_id as team_id, e.distinct_id as distinct_id, e.timestamp as timestamp, - e."$group_0" as aggregation_target + e."$group_0" as aggregation_target, + e.group0_properties AS group0_properties, + e.person_id as person_id FROM events e WHERE team_id = 2 AND event IN ['paid', 'user signed up'] AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) events + AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') + AND e.person_id != toUUIDOrZero('') ) events WHERE (step_0 = 1 OR step_1 = 1) )) WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) @@ -3056,15 +3065,8 @@ FROM (SELECT actor_id, funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop - FROM funnel_actors - INNER JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props + arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(group0_properties, 'industry'), '^"|"$', '')])) as prop + FROM funnel_actors) aggregation_target_with_props GROUP BY prop.1, prop.2 HAVING prop.1 NOT IN [] @@ -3082,7 +3084,8 @@ timestamp, steps, final_timestamp, - first_timestamp + first_timestamp, + group0_properties FROM (SELECT aggregation_target, steps, @@ -3090,7 +3093,8 @@ median(step_1_conversion_time) step_1_median_conversion_time_inner , argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp + argMax(latest_0, steps) as first_timestamp, + any(group0_properties) as group0_properties FROM (SELECT aggregation_target, steps, @@ -3098,13 +3102,15 @@ step_1_conversion_time , latest_0, latest_1, - latest_0 + latest_0, + group0_properties FROM (SELECT *, if(latest_0 <= latest_1 AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time , + group0_properties FROM (SELECT aggregation_target, timestamp, @@ -3112,34 +3118,32 @@ latest_0, step_1, min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , + group0_properties FROM (SELECT aggregation_target, timestamp, if(event = 'user signed up', 1, 0) as step_0, if(step_0 = 1, timestamp, null) as latest_0, if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 + if(step_1 = 1, timestamp, null) as latest_1 , + group0_properties FROM (SELECT e.event as event, e.team_id as team_id, e.distinct_id as distinct_id, e.timestamp as timestamp, e."$group_0" as aggregation_target, - groups_0.group_properties_0 as group_properties_0 + e.group0_properties AS group0_properties, + e.person_id as person_id, + e."group0_properties" as "group0_properties" FROM events e - INNER JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key WHERE team_id = 2 AND event IN ['paid', 'user signed up'] AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) ) events + AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group0_properties, 'industry'), '^"|"$', ''))) + AND e.person_id != toUUIDOrZero('') ) events WHERE (step_0 = 1 OR step_1 = 1) )) WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) @@ -3165,7 +3169,8 @@ timestamp, steps, final_timestamp, - first_timestamp + first_timestamp, + group0_properties FROM (SELECT aggregation_target, steps, @@ -3173,7 +3178,8 @@ median(step_1_conversion_time) step_1_median_conversion_time_inner , argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp + argMax(latest_0, steps) as first_timestamp, + any(group0_properties) as group0_properties FROM (SELECT aggregation_target, steps, @@ -3181,13 +3187,15 @@ step_1_conversion_time , latest_0, latest_1, - latest_0 + latest_0, + group0_properties FROM (SELECT *, if(latest_0 <= latest_1 AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time , + group0_properties FROM (SELECT aggregation_target, timestamp, @@ -3195,34 +3203,32 @@ latest_0, step_1, min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , + group0_properties FROM (SELECT aggregation_target, timestamp, if(event = 'user signed up', 1, 0) as step_0, if(step_0 = 1, timestamp, null) as latest_0, if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 + if(step_1 = 1, timestamp, null) as latest_1 , + group0_properties FROM (SELECT e.event as event, e.team_id as team_id, e.distinct_id as distinct_id, e.timestamp as timestamp, e."$group_0" as aggregation_target, - groups_0.group_properties_0 as group_properties_0 + e.group0_properties AS group0_properties, + e.person_id as person_id, + e."group0_properties" as "group0_properties" FROM events e - INNER JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key WHERE team_id = 2 AND event IN ['paid', 'user signed up'] AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) ) events + AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group0_properties, 'industry'), '^"|"$', ''))) + AND e.person_id != toUUIDOrZero('') ) events WHERE (step_0 = 1 OR step_1 = 1) )) WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) @@ -3248,7 +3254,8 @@ timestamp, steps, final_timestamp, - first_timestamp + first_timestamp, + group0_properties FROM (SELECT aggregation_target, steps, @@ -3256,7 +3263,8 @@ median(step_1_conversion_time) step_1_median_conversion_time_inner , argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp + argMax(latest_0, steps) as first_timestamp, + any(group0_properties) as group0_properties FROM (SELECT aggregation_target, steps, @@ -3264,13 +3272,15 @@ step_1_conversion_time , latest_0, latest_1, - latest_0 + latest_0, + group0_properties FROM (SELECT *, if(latest_0 <= latest_1 AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time , + group0_properties FROM (SELECT aggregation_target, timestamp, @@ -3278,34 +3288,32 @@ latest_0, step_1, min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , + group0_properties FROM (SELECT aggregation_target, timestamp, if(event = 'user signed up', 1, 0) as step_0, if(step_0 = 1, timestamp, null) as latest_0, if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 + if(step_1 = 1, timestamp, null) as latest_1 , + group0_properties FROM (SELECT e.event as event, e.team_id as team_id, e.distinct_id as distinct_id, e.timestamp as timestamp, e."$group_0" as aggregation_target, - groups_0.group_properties_0 as group_properties_0 + e.group0_properties AS group0_properties, + e.person_id as person_id, + e."group0_properties" as "group0_properties" FROM events e - INNER JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key WHERE team_id = 2 AND event IN ['paid', 'user signed up'] AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) ) events + AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group0_properties, 'industry'), '^"|"$', ''))) + AND e.person_id != toUUIDOrZero('') ) events WHERE (step_0 = 1 OR step_1 = 1) )) WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) @@ -3331,7 +3339,8 @@ timestamp, steps, final_timestamp, - first_timestamp + first_timestamp, + group0_properties FROM (SELECT aggregation_target, steps, @@ -3339,7 +3348,8 @@ median(step_1_conversion_time) step_1_median_conversion_time_inner , argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp + argMax(latest_0, steps) as first_timestamp, + any(group0_properties) as group0_properties FROM (SELECT aggregation_target, steps, @@ -3347,13 +3357,15 @@ step_1_conversion_time , latest_0, latest_1, - latest_0 + latest_0, + group0_properties FROM (SELECT *, if(latest_0 <= latest_1 AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time , + group0_properties FROM (SELECT aggregation_target, timestamp, @@ -3361,34 +3373,32 @@ latest_0, step_1, min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , + group0_properties FROM (SELECT aggregation_target, timestamp, if(event = 'user signed up', 1, 0) as step_0, if(step_0 = 1, timestamp, null) as latest_0, if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 + if(step_1 = 1, timestamp, null) as latest_1 , + group0_properties FROM (SELECT e.event as event, e.team_id as team_id, e.distinct_id as distinct_id, e.timestamp as timestamp, e."$group_0" as aggregation_target, - groups_0.group_properties_0 as group_properties_0 + e.group0_properties AS group0_properties, + e.person_id as person_id, + e."group0_properties" as "group0_properties" FROM events e - INNER JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key WHERE team_id = 2 AND event IN ['paid', 'user signed up'] AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) ) events + AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group0_properties, 'industry'), '^"|"$', ''))) + AND e.person_id != toUUIDOrZero('') ) events WHERE (step_0 = 1 OR step_1 = 1) )) WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) @@ -3408,529 +3418,6 @@ ' --- # name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.5 - ' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id, - timestamp, - steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, - timestamp, - step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT aggregation_target, - timestamp, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM - (SELECT e.event as event, - e.team_id as team_id, - e.distinct_id as distinct_id, - e.timestamp as timestamp, - e."$group_0" as aggregation_target - FROM events e - WHERE team_id = 2 - AND event IN ['paid', 'user signed up'] - AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) events - WHERE (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayMap(x -> x.1, JSONExtractKeysAndValuesRaw(groups_0.group_properties_0)) as person_prop_keys, - arrayJoin(arrayZip(person_prop_keys, arrayMap(x -> replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, x), '^"|"$', ''), person_prop_keys))) as prop - FROM funnel_actors - INNER JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ' ---- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized - ' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id, - timestamp, - steps, - final_timestamp, - first_timestamp, - mat_gp0_industry - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp, - any(mat_gp0_industry) as mat_gp0_industry - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0, - mat_gp0_industry - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time , - mat_gp0_industry - FROM - (SELECT aggregation_target, - timestamp, - step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - mat_gp0_industry - FROM - (SELECT aggregation_target, - timestamp, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 , - mat_gp0_industry - FROM - (SELECT e.event as event, - e.team_id as team_id, - e.distinct_id as distinct_id, - e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.mat_gp0_industry AS mat_gp0_industry, - e.person_id as person_id - FROM events e - WHERE team_id = 2 - AND event IN ['paid', 'user signed up'] - AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND e.person_id != toUUIDOrZero('') ) events - WHERE (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], ["mat_gp0_industry"])) as prop - FROM funnel_actors) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ' ---- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.1 - ' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id, - timestamp, - steps, - final_timestamp, - first_timestamp, - mat_gp0_industry - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp, - any(mat_gp0_industry) as mat_gp0_industry - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0, - mat_gp0_industry - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time , - mat_gp0_industry - FROM - (SELECT aggregation_target, - timestamp, - step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - mat_gp0_industry - FROM - (SELECT aggregation_target, - timestamp, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 , - mat_gp0_industry - FROM - (SELECT e.event as event, - e.team_id as team_id, - e.distinct_id as distinct_id, - e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.mat_gp0_industry AS mat_gp0_industry, - e.person_id as person_id, - e."mat_gp0_industry" as "mat_gp0_industry" - FROM events e - WHERE team_id = 2 - AND event IN ['paid', 'user signed up'] - AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], "mat_gp0_industry")) - AND e.person_id != toUUIDOrZero('') ) events - WHERE (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ' ---- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.2 - ' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id, - timestamp, - steps, - final_timestamp, - first_timestamp, - mat_gp0_industry - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp, - any(mat_gp0_industry) as mat_gp0_industry - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0, - mat_gp0_industry - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time , - mat_gp0_industry - FROM - (SELECT aggregation_target, - timestamp, - step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - mat_gp0_industry - FROM - (SELECT aggregation_target, - timestamp, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 , - mat_gp0_industry - FROM - (SELECT e.event as event, - e.team_id as team_id, - e.distinct_id as distinct_id, - e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.mat_gp0_industry AS mat_gp0_industry, - e.person_id as person_id, - e."mat_gp0_industry" as "mat_gp0_industry" - FROM events e - WHERE team_id = 2 - AND event IN ['paid', 'user signed up'] - AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], "mat_gp0_industry")) - AND e.person_id != toUUIDOrZero('') ) events - WHERE (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ' ---- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.3 - ' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id, - timestamp, - steps, - final_timestamp, - first_timestamp, - mat_gp0_industry - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp, - any(mat_gp0_industry) as mat_gp0_industry - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0, - mat_gp0_industry - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time , - mat_gp0_industry - FROM - (SELECT aggregation_target, - timestamp, - step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - mat_gp0_industry - FROM - (SELECT aggregation_target, - timestamp, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 , - mat_gp0_industry - FROM - (SELECT e.event as event, - e.team_id as team_id, - e.distinct_id as distinct_id, - e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.mat_gp0_industry AS mat_gp0_industry, - e.person_id as person_id, - e."mat_gp0_industry" as "mat_gp0_industry" - FROM events e - WHERE team_id = 2 - AND event IN ['paid', 'user signed up'] - AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], "mat_gp0_industry")) - AND e.person_id != toUUIDOrZero('') ) events - WHERE (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ' ---- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.4 - ' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id, - timestamp, - steps, - final_timestamp, - first_timestamp, - mat_gp0_industry - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp, - any(mat_gp0_industry) as mat_gp0_industry - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0, - mat_gp0_industry - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time , - mat_gp0_industry - FROM - (SELECT aggregation_target, - timestamp, - step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - mat_gp0_industry - FROM - (SELECT aggregation_target, - timestamp, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 , - mat_gp0_industry - FROM - (SELECT e.event as event, - e.team_id as team_id, - e.distinct_id as distinct_id, - e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.mat_gp0_industry AS mat_gp0_industry, - e.person_id as person_id, - e."mat_gp0_industry" as "mat_gp0_industry" - FROM events e - WHERE team_id = 2 - AND event IN ['paid', 'user signed up'] - AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], "mat_gp0_industry")) - AND e.person_id != toUUIDOrZero('') ) events - WHERE (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ' ---- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.5 ' WITH funnel_actors as (SELECT aggregation_target AS actor_id, @@ -4023,3 +3510,516 @@ FROM funnel_actors ' --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized + ' + WITH funnel_actors as + (SELECT aggregation_target AS actor_id, + timestamp, + steps, + final_timestamp, + first_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_1, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time , + latest_0, + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time + FROM + (SELECT aggregation_target, + timestamp, + step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 + FROM + (SELECT aggregation_target, + timestamp, + if(event = 'user signed up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'paid', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1 + FROM + (SELECT e.event as event, + e.team_id as team_id, + e.distinct_id as distinct_id, + e.timestamp as timestamp, + e."$group_0" as aggregation_target + FROM events e + WHERE team_id = 2 + AND event IN ['paid', 'user signed up'] + AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) events + WHERE (step_0 = 1 + OR step_1 = 1) )) + WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) + WHERE steps IN [1, 2] + ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), + 2 AS target_step + SELECT concat(prop.1, '::', prop.2) as name, + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count + FROM + (SELECT actor_id, + funnel_actors.steps as steps, + arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop + FROM funnel_actors + INNER JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] + UNION ALL + SELECT 'Total_Values_In_Query' as name, + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count + FROM funnel_actors + ' +--- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.1 + ' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + timestamp, + steps, + final_timestamp, + first_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_1, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time , + latest_0, + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time + FROM + (SELECT aggregation_target, + timestamp, + step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 + FROM + (SELECT aggregation_target, + timestamp, + if(event = 'user signed up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'paid', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1 + FROM + (SELECT e.event as event, + e.team_id as team_id, + e.distinct_id as distinct_id, + e.timestamp as timestamp, + e."$group_0" as aggregation_target, + groups_0.group_properties_0 as group_properties_0 + FROM events e + INNER JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['paid', 'user signed up'] + AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') + AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) ) events + WHERE (step_0 = 1 + OR step_1 = 1) )) + WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) + WHERE steps IN [1, 2] + ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), + 2 AS target_step + SELECT funnel_actors.actor_id AS actor_id + FROM funnel_actors + WHERE funnel_actors.steps = target_step + GROUP BY funnel_actors.actor_id + ORDER BY actor_id + LIMIT 100 + OFFSET 0 + ' +--- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.2 + ' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + timestamp, + steps, + final_timestamp, + first_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_1, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time , + latest_0, + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time + FROM + (SELECT aggregation_target, + timestamp, + step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 + FROM + (SELECT aggregation_target, + timestamp, + if(event = 'user signed up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'paid', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1 + FROM + (SELECT e.event as event, + e.team_id as team_id, + e.distinct_id as distinct_id, + e.timestamp as timestamp, + e."$group_0" as aggregation_target, + groups_0.group_properties_0 as group_properties_0 + FROM events e + INNER JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['paid', 'user signed up'] + AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') + AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) ) events + WHERE (step_0 = 1 + OR step_1 = 1) )) + WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) + WHERE steps IN [1, 2] + ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), + 2 AS target_step + SELECT funnel_actors.actor_id AS actor_id + FROM funnel_actors + WHERE funnel_actors.steps <> target_step + GROUP BY funnel_actors.actor_id + ORDER BY actor_id + LIMIT 100 + OFFSET 0 + ' +--- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.3 + ' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + timestamp, + steps, + final_timestamp, + first_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_1, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time , + latest_0, + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time + FROM + (SELECT aggregation_target, + timestamp, + step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 + FROM + (SELECT aggregation_target, + timestamp, + if(event = 'user signed up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'paid', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1 + FROM + (SELECT e.event as event, + e.team_id as team_id, + e.distinct_id as distinct_id, + e.timestamp as timestamp, + e."$group_0" as aggregation_target, + groups_0.group_properties_0 as group_properties_0 + FROM events e + INNER JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['paid', 'user signed up'] + AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') + AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) ) events + WHERE (step_0 = 1 + OR step_1 = 1) )) + WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) + WHERE steps IN [1, 2] + ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), + 2 AS target_step + SELECT funnel_actors.actor_id AS actor_id + FROM funnel_actors + WHERE funnel_actors.steps = target_step + GROUP BY funnel_actors.actor_id + ORDER BY actor_id + LIMIT 100 + OFFSET 0 + ' +--- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.4 + ' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + timestamp, + steps, + final_timestamp, + first_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_1, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time , + latest_0, + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time + FROM + (SELECT aggregation_target, + timestamp, + step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 + FROM + (SELECT aggregation_target, + timestamp, + if(event = 'user signed up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'paid', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1 + FROM + (SELECT e.event as event, + e.team_id as team_id, + e.distinct_id as distinct_id, + e.timestamp as timestamp, + e."$group_0" as aggregation_target, + groups_0.group_properties_0 as group_properties_0 + FROM events e + INNER JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['paid', 'user signed up'] + AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') + AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) ) events + WHERE (step_0 = 1 + OR step_1 = 1) )) + WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) + WHERE steps IN [1, 2] + ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), + 2 AS target_step + SELECT funnel_actors.actor_id AS actor_id + FROM funnel_actors + WHERE funnel_actors.steps <> target_step + GROUP BY funnel_actors.actor_id + ORDER BY actor_id + LIMIT 100 + OFFSET 0 + ' +--- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.5 + ' + WITH funnel_actors as + (SELECT aggregation_target AS actor_id, + timestamp, + steps, + final_timestamp, + first_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_1, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time , + latest_0, + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time + FROM + (SELECT aggregation_target, + timestamp, + step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 + FROM + (SELECT aggregation_target, + timestamp, + if(event = 'user signed up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'paid', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1 + FROM + (SELECT e.event as event, + e.team_id as team_id, + e.distinct_id as distinct_id, + e.timestamp as timestamp, + e."$group_0" as aggregation_target + FROM events e + WHERE team_id = 2 + AND event IN ['paid', 'user signed up'] + AND toDateTime(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toDateTime(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) events + WHERE (step_0 = 1 + OR step_1 = 1) )) + WHERE step_0 = 1 SETTINGS allow_experimental_window_functions = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max_steps SETTINGS allow_experimental_window_functions = 1) + WHERE steps IN [1, 2] + ORDER BY aggregation_target SETTINGS allow_experimental_window_functions = 1), + 2 AS target_step + SELECT concat(prop.1, '::', prop.2) as name, + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count + FROM + (SELECT actor_id, + funnel_actors.steps as steps, + arrayMap(x -> x.1, JSONExtractKeysAndValuesRaw(groups_0.group_properties_0)) as person_prop_keys, + arrayJoin(arrayZip(person_prop_keys, arrayMap(x -> replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, x), '^"|"$', ''), person_prop_keys))) as prop + FROM funnel_actors + INNER JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] + UNION ALL + SELECT 'Total_Values_In_Query' as name, + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count + FROM funnel_actors + ' +--- diff --git a/pytest.ini b/pytest.ini index 6442d739e3a..25f9b01f5d9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -9,4 +9,4 @@ markers = ee clickhouse_only skip_on_multitenancy - async_migrations + async_migrations \ No newline at end of file diff --git a/requirements-dev.in b/requirements-dev.in index 2aec7ecdd49..49749040db4 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -41,7 +41,6 @@ pytest-env==0.6.2 pytest-icdiff==0.5 pytest-mock==3.5.1 pytest-cov==2.12.1 -pytest-randomly==3.12.0 pytest-split==0.6.0 pytest-watch==4.2.0 syrupy==1.4.6 diff --git a/requirements-dev.txt b/requirements-dev.txt index daf1bc9bc13..e1263586aa5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -103,10 +103,6 @@ idna==2.8 # -c requirements.txt # requests # urllib3 -importlib-metadata==5.0.0 - # via - # -c requirements.txt - # pytest-randomly iniconfig==1.1.1 # via pytest isort==5.2.2 @@ -181,7 +177,6 @@ pytest==6.2.2 # pytest-env # pytest-icdiff # pytest-mock - # pytest-randomly # pytest-split # pytest-watch # syrupy @@ -195,8 +190,6 @@ pytest-icdiff==0.5 # via -r requirements-dev.in pytest-mock==3.5.1 # via -r requirements-dev.in -pytest-randomly==3.12.0 - # via -r requirements-dev.in pytest-split==0.6.0 # via -r requirements-dev.in pytest-watch==4.2.0 @@ -279,10 +272,6 @@ wrapt==1.14.1 # via # -c requirements.txt # deprecated -zipp==3.1.0 - # via - # -c requirements.txt - # importlib-metadata # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements.in b/requirements.in index 3e64e968499..40a9ef2bf21 100644 --- a/requirements.in +++ b/requirements.in @@ -37,7 +37,7 @@ geoip2==4.6.0 google-cloud-sqlcommenter==2.0.0 gunicorn==20.1.0 idna==2.8 -importlib-metadata==5.0.0 +importlib-metadata==1.6.0 infi-clickhouse-orm@ git+https://github.com/PostHog/infi.clickhouse_orm@37722f350f3b449bbcd6564917c436b0d93e796f kafka-python==2.0.2 kafka-helper==0.2 diff --git a/requirements.txt b/requirements.txt index 4fd9f7e8360..f08097a7c90 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with python 3.8 +# This file is autogenerated by pip-compile with python 3.9 # To update, run: # # pip-compile requirements.in @@ -166,10 +166,8 @@ idna==2.8 # trio # urllib3 # yarl -importlib-metadata==5.0.0 +importlib-metadata==1.6.0 # via -r requirements.in -importlib-resources==5.10.0 - # via jsonschema infi-clickhouse-orm @ git+https://github.com/PostHog/infi.clickhouse_orm@37722f350f3b449bbcd6564917c436b0d93e796f # via -r requirements.in inflection==0.5.1 @@ -379,9 +377,7 @@ xmlsec==1.3.12 yarl==1.7.2 # via aiohttp zipp==3.1.0 - # via - # importlib-metadata - # importlib-resources + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools