0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-29 03:04:16 +01:00
posthog/ee/clickhouse/sql/cohort.py
2021-12-16 10:38:41 -06:00

80 lines
2.8 KiB
Python

from posthog.settings import CLICKHOUSE_CLUSTER
from .clickhouse import COLLAPSING_MERGE_TREE, table_engine
CALCULATE_COHORT_PEOPLE_SQL = """
SELECT {id_column} FROM ({GET_TEAM_PERSON_DISTINCT_IDS}) WHERE {query}
"""
CREATE_COHORTPEOPLE_TABLE_SQL = lambda: """
CREATE TABLE IF NOT EXISTS cohortpeople ON CLUSTER {cluster}
(
person_id UUID,
cohort_id Int64,
team_id Int64,
sign Int8
) ENGINE = {engine}
Order By (team_id, cohort_id, person_id)
{storage_policy}
""".format(
cluster=CLICKHOUSE_CLUSTER, engine=table_engine("cohortpeople", "sign", COLLAPSING_MERGE_TREE), storage_policy=""
)
TRUNCATE_COHORTPEOPLE_TABLE_SQL = f"TRUNCATE TABLE IF EXISTS cohortpeople ON CLUSTER {CLICKHOUSE_CLUSTER}"
DROP_COHORTPEOPLE_TABLE_SQL = f"DROP TABLE IF EXISTS cohortpeople ON CLUSTER {CLICKHOUSE_CLUSTER}"
REMOVE_PEOPLE_NOT_MATCHING_COHORT_ID_SQL = """
INSERT INTO cohortpeople
SELECT person_id, cohort_id, %(team_id)s as team_id, -1 as _sign
FROM cohortpeople
JOIN (
SELECT id, argMax(properties, person._timestamp) as properties, sum(is_deleted) as is_deleted FROM person WHERE team_id = %(team_id)s GROUP BY id
) as person ON (person.id = cohortpeople.person_id)
WHERE cohort_id = %(cohort_id)s
AND
(
person.is_deleted = 1 OR NOT person_id IN ({cohort_filter})
)
"""
GET_COHORT_SIZE_SQL = """
SELECT count(*)
FROM (
SELECT 1
FROM cohortpeople
WHERE team_id = %(team_id)s AND cohort_id = %(cohort_id)s
GROUP BY person_id, cohort_id, team_id
HAVING sum(sign) > 0
)
"""
INSERT_PEOPLE_MATCHING_COHORT_ID_SQL = """
INSERT INTO cohortpeople
SELECT id, %(cohort_id)s as cohort_id, %(team_id)s as team_id, 1 as _sign
FROM (
SELECT id, argMax(properties, person._timestamp) as properties, sum(is_deleted) as is_deleted FROM person WHERE team_id = %(team_id)s GROUP BY id
) as person
LEFT JOIN (
SELECT person_id, sum(sign) AS sign FROM cohortpeople WHERE cohort_id = %(cohort_id)s AND team_id = %(team_id)s GROUP BY person_id
) as cohortpeople ON (person.id = cohortpeople.person_id)
WHERE (cohortpeople.person_id = '00000000-0000-0000-0000-000000000000' OR sign = 0)
AND person.is_deleted = 0
AND id IN ({cohort_filter})
"""
GET_DISTINCT_ID_BY_ENTITY_SQL = """
SELECT distinct_id FROM events WHERE team_id = %(team_id)s {date_query} AND {entity_query}
"""
GET_PERSON_ID_BY_ENTITY_COUNT_SQL = """
SELECT person_id FROM events
INNER JOIN ({GET_TEAM_PERSON_DISTINCT_IDS}) as pdi
ON events.distinct_id = pdi.distinct_id
WHERE team_id = %(team_id)s {date_query} AND {entity_query}
GROUP BY person_id {count_condition}
"""
GET_PERSON_ID_BY_PRECALCULATED_COHORT_ID = """
SELECT person_id FROM cohortpeople WHERE team_id = %(team_id)s AND cohort_id = %({prepend}_cohort_id_{index})s GROUP BY person_id, cohort_id, team_id HAVING sum(sign) > 0
"""