mirror of
https://github.com/PostHog/posthog.git
synced 2024-11-24 00:47:50 +01:00
perf: Speed up persons batch export (#26183)
This commit is contained in:
parent
c4df3e0ee2
commit
ee8d0f94f1
@ -2,62 +2,133 @@ from django.conf import settings
|
|||||||
|
|
||||||
CREATE_PERSONS_BATCH_EXPORT_VIEW = f"""
|
CREATE_PERSONS_BATCH_EXPORT_VIEW = f"""
|
||||||
CREATE OR REPLACE VIEW persons_batch_export ON CLUSTER {settings.CLICKHOUSE_CLUSTER} AS (
|
CREATE OR REPLACE VIEW persons_batch_export ON CLUSTER {settings.CLICKHOUSE_CLUSTER} AS (
|
||||||
SELECT
|
with new_persons as (
|
||||||
pd.team_id AS team_id,
|
select
|
||||||
|
id,
|
||||||
|
max(version) as version,
|
||||||
|
argMax(_timestamp, person.version) AS _timestamp2
|
||||||
|
from
|
||||||
|
person
|
||||||
|
where
|
||||||
|
team_id = {{team_id:Int64}}
|
||||||
|
and id in (
|
||||||
|
select
|
||||||
|
id
|
||||||
|
from
|
||||||
|
person
|
||||||
|
where
|
||||||
|
team_id = {{team_id:Int64}}
|
||||||
|
and _timestamp >= {{interval_start:DateTime64}}
|
||||||
|
AND _timestamp < {{interval_end:DateTime64}}
|
||||||
|
)
|
||||||
|
group by
|
||||||
|
id
|
||||||
|
having
|
||||||
|
(
|
||||||
|
_timestamp2 >= {{interval_start:DateTime64}}
|
||||||
|
AND _timestamp2 < {{interval_end:DateTime64}}
|
||||||
|
)
|
||||||
|
),
|
||||||
|
new_distinct_ids as (
|
||||||
|
SELECT
|
||||||
|
argMax(person_id, person_distinct_id2.version) as person_id
|
||||||
|
from
|
||||||
|
person_distinct_id2
|
||||||
|
where
|
||||||
|
team_id = {{team_id:Int64}}
|
||||||
|
and distinct_id in (
|
||||||
|
select
|
||||||
|
distinct_id
|
||||||
|
from
|
||||||
|
person_distinct_id2
|
||||||
|
where
|
||||||
|
team_id = {{team_id:Int64}}
|
||||||
|
and _timestamp >= {{interval_start:DateTime64}}
|
||||||
|
AND _timestamp < {{interval_end:DateTime64}}
|
||||||
|
)
|
||||||
|
group by
|
||||||
|
distinct_id
|
||||||
|
having
|
||||||
|
(
|
||||||
|
argMax(_timestamp, person_distinct_id2.version) >= {{interval_start:DateTime64}}
|
||||||
|
AND argMax(_timestamp, person_distinct_id2.version) < {{interval_end:DateTime64}}
|
||||||
|
)
|
||||||
|
),
|
||||||
|
all_new_persons as (
|
||||||
|
select
|
||||||
|
id,
|
||||||
|
version
|
||||||
|
from
|
||||||
|
new_persons
|
||||||
|
UNION
|
||||||
|
ALL
|
||||||
|
select
|
||||||
|
id,
|
||||||
|
max(version)
|
||||||
|
from
|
||||||
|
person
|
||||||
|
where
|
||||||
|
team_id = {{team_id:Int64}}
|
||||||
|
and id in new_distinct_ids
|
||||||
|
group by
|
||||||
|
id
|
||||||
|
)
|
||||||
|
select
|
||||||
|
p.team_id AS team_id,
|
||||||
pd.distinct_id AS distinct_id,
|
pd.distinct_id AS distinct_id,
|
||||||
toString(p.id) AS person_id,
|
toString(p.id) AS person_id,
|
||||||
p.properties AS properties,
|
p.properties AS properties,
|
||||||
pd.version AS person_distinct_id_version,
|
pd.version AS person_distinct_id_version,
|
||||||
p.version AS person_version,
|
p.version AS person_version,
|
||||||
multiIf(
|
multiIf(
|
||||||
(pd._timestamp >= {{interval_start:DateTime64}} AND pd._timestamp < {{interval_end:DateTime64}})
|
(
|
||||||
AND NOT (p._timestamp >= {{interval_start:DateTime64}} AND p._timestamp < {{interval_end:DateTime64}}),
|
pd._timestamp >= {{interval_start:DateTime64}}
|
||||||
|
AND pd._timestamp < {{interval_end:DateTime64}}
|
||||||
|
)
|
||||||
|
AND NOT (
|
||||||
|
p._timestamp >= {{interval_start:DateTime64}}
|
||||||
|
AND p._timestamp < {{interval_end:DateTime64}}
|
||||||
|
),
|
||||||
pd._timestamp,
|
pd._timestamp,
|
||||||
(p._timestamp >= {{interval_start:DateTime64}} AND p._timestamp < {{interval_end:DateTime64}})
|
(
|
||||||
AND NOT (pd._timestamp >= {{interval_start:DateTime64}} AND pd._timestamp < {{interval_end:DateTime64}}),
|
p._timestamp >= {{interval_start:DateTime64}}
|
||||||
|
AND p._timestamp < {{interval_end:DateTime64}}
|
||||||
|
)
|
||||||
|
AND NOT (
|
||||||
|
pd._timestamp >= {{interval_start:DateTime64}}
|
||||||
|
AND pd._timestamp < {{interval_end:DateTime64}}
|
||||||
|
),
|
||||||
p._timestamp,
|
p._timestamp,
|
||||||
least(p._timestamp, pd._timestamp)
|
least(p._timestamp, pd._timestamp)
|
||||||
) AS _inserted_at
|
) AS _inserted_at
|
||||||
FROM (
|
from
|
||||||
SELECT
|
person p
|
||||||
team_id,
|
INNER JOIN (
|
||||||
distinct_id,
|
SELECT
|
||||||
max(version) AS version,
|
distinct_id,
|
||||||
argMax(person_id, person_distinct_id2.version) AS person_id,
|
max(version) AS version,
|
||||||
argMax(_timestamp, person_distinct_id2.version) AS _timestamp
|
argMax(person_id, person_distinct_id2.version) AS person_id2,
|
||||||
FROM
|
argMax(_timestamp, person_distinct_id2.version) AS _timestamp
|
||||||
person_distinct_id2
|
FROM
|
||||||
PREWHERE
|
person_distinct_id2
|
||||||
team_id = {{team_id:Int64}}
|
WHERE
|
||||||
GROUP BY
|
team_id = {{team_id:Int64}}
|
||||||
team_id,
|
and person_id IN (
|
||||||
distinct_id
|
select
|
||||||
) AS pd
|
id
|
||||||
INNER JOIN (
|
from
|
||||||
SELECT
|
all_new_persons
|
||||||
team_id,
|
)
|
||||||
id,
|
GROUP BY
|
||||||
max(version) AS version,
|
distinct_id
|
||||||
argMax(properties, person.version) AS properties,
|
) AS pd ON p.id = pd.person_id2
|
||||||
argMax(_timestamp, person.version) AS _timestamp
|
where
|
||||||
FROM
|
team_id = {{team_id:Int64}}
|
||||||
person
|
and (id, version) in all_new_persons
|
||||||
PREWHERE
|
|
||||||
team_id = {{team_id:Int64}}
|
|
||||||
GROUP BY
|
|
||||||
team_id,
|
|
||||||
id
|
|
||||||
) AS p ON p.id = pd.person_id AND p.team_id = pd.team_id
|
|
||||||
WHERE
|
|
||||||
pd.team_id = {{team_id:Int64}}
|
|
||||||
AND p.team_id = {{team_id:Int64}}
|
|
||||||
AND (
|
|
||||||
(pd._timestamp >= {{interval_start:DateTime64}} AND pd._timestamp < {{interval_end:DateTime64}})
|
|
||||||
OR (p._timestamp >= {{interval_start:DateTime64}} AND p._timestamp < {{interval_end:DateTime64}})
|
|
||||||
)
|
|
||||||
ORDER BY
|
ORDER BY
|
||||||
_inserted_at
|
_inserted_at
|
||||||
)
|
)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
CREATE_PERSONS_BATCH_EXPORT_VIEW_BACKFILL = f"""
|
CREATE_PERSONS_BATCH_EXPORT_VIEW_BACKFILL = f"""
|
||||||
|
@ -0,0 +1,11 @@
|
|||||||
|
from posthog.batch_exports.sql import (
|
||||||
|
CREATE_PERSONS_BATCH_EXPORT_VIEW,
|
||||||
|
)
|
||||||
|
from posthog.clickhouse.client.migration_tools import run_sql_with_exceptions
|
||||||
|
|
||||||
|
operations = map(
|
||||||
|
run_sql_with_exceptions,
|
||||||
|
[
|
||||||
|
CREATE_PERSONS_BATCH_EXPORT_VIEW,
|
||||||
|
],
|
||||||
|
)
|
Loading…
Reference in New Issue
Block a user