mirror of
https://github.com/PostHog/posthog.git
synced 2024-11-28 18:26:15 +01:00
a819069128
* chore(pdi): add data migration for pdi to pdi2 This adds an async migration to copy the latest non-deleted `(team_id, person_id, distinct_id)` tuples from `pdi` to `pdi2`. Note that this has already be performed for tead_id = 2 on posthog, cloud so we ensure we're maintaining parity with however this migration was performed. I've done this by running: ``` SELECT * FROM <old_query> FULL JOIN <new_query> new ON old.distinct_id = new.distinct_id WHERE old.person_id <> new.person_id ``` specifically for team_id = 2. * Rename migration * Skip 0003_fill_person_distinct_id2 on fresh installs * Clarify version requirements * Run async migrations using a while-loop instead of tail recursion Python has a stack limit of 1000, which we might easily run into for 0003 migration * Use built-in progress tracking * Make description fit into database 400 char limit * Add correctness test for new async migration * Migrate person_distinct_id2 team-by-team * Remove dead code * Update migration notes * Fix foss tests Co-authored-by: Karl-Aksel Puulmann <oxymaccy@gmail.com>
19 lines
729 B
Python
19 lines
729 B
Python
from infi.clickhouse_orm import migrations
|
|
|
|
from ee.clickhouse.sql.person import PERSONS_DISTINCT_ID_TABLE_SQL, PERSONS_TABLE_SQL
|
|
from posthog.settings import CLICKHOUSE_CLUSTER
|
|
|
|
operations = [
|
|
migrations.RunSQL(PERSONS_TABLE_SQL()),
|
|
migrations.RunSQL(PERSONS_DISTINCT_ID_TABLE_SQL()),
|
|
# :TRICKY: This is only run on new installations, we use this to know to skip
|
|
# posthog/async_migrations/migrations/0003_fill_person_distinct_id2.py
|
|
# We would use table comments but can't due to clickhouse version limitations
|
|
migrations.RunSQL(
|
|
f"""
|
|
ALTER TABLE person_distinct_id ON CLUSTER {CLICKHOUSE_CLUSTER}
|
|
COMMENT COLUMN distinct_id 'skip_0003_fill_person_distinct_id2'
|
|
"""
|
|
),
|
|
]
|