0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-21 21:49:51 +01:00

fix(data-warehouse): add naming convention (#25429)

This commit is contained in:
Eric Duong 2024-10-07 11:59:48 -04:00 committed by GitHub
parent fb92840378
commit 22d2aff23c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 7 additions and 6 deletions

View File

@ -11,7 +11,6 @@ import asyncio
from posthog.settings.base_variables import TEST
from structlog.typing import FilteringBoundLogger
from dlt.common.libs.deltalake import get_delta_tables
from dlt.common.normalizers.naming.snake_case import NamingConvention
from dlt.load.exceptions import LoadClientJobRetry
from dlt.sources import DltSource
from deltalake.exceptions import DeltaError
@ -104,9 +103,7 @@ class DataImportPipeline:
job: ExternalDataJob = await get_external_data_job(job_id=self.inputs.run_id)
schema: ExternalDataSchema = await aget_schema_by_id(self.inputs.schema_id, self.inputs.team_id)
normalized_schema_name = NamingConvention().normalize_identifier(schema.name)
prepare_s3_files_for_querying(job.folder_path(), normalized_schema_name, file_uris)
prepare_s3_files_for_querying(job.folder_path(), schema.name, file_uris)
def _run(self) -> dict[str, int]:
if self.refresh_dlt:

View File

@ -1,13 +1,16 @@
from posthog.warehouse.s3 import get_s3_client
from django.conf import settings
from dlt.common.normalizers.naming.snake_case import NamingConvention
def prepare_s3_files_for_querying(folder_path: str, table_name: str, file_uris: list[str]):
s3 = get_s3_client()
normalized_table_name = NamingConvention().normalize_identifier(table_name)
s3_folder_for_job = f"{settings.BUCKET_URL}/{folder_path}"
s3_folder_for_schema = f"{s3_folder_for_job}/{table_name}"
s3_folder_for_querying = f"{s3_folder_for_job}/{table_name}__query"
s3_folder_for_schema = f"{s3_folder_for_job}/{normalized_table_name}"
s3_folder_for_querying = f"{s3_folder_for_job}/{normalized_table_name}__query"
if s3.exists(s3_folder_for_querying):
s3.delete(s3_folder_for_querying, recursive=True)

View File

@ -330,6 +330,7 @@ async def materialize_model(model_label: str, team: Team) -> tuple[str, DeltaTab
table.vacuum(retention_hours=24, enforce_retention_duration=False, dry_run=False)
file_uris = table.file_uris()
prepare_s3_files_for_querying(saved_query.folder_path, saved_query.name, file_uris)
key, delta_table = tables.popitem()