From d66bfa037426cb94ef74948b5c1fbdf5e5b88cb4 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Mon, 18 Nov 2024 14:48:00 +0000 Subject: [PATCH] fix(data-warehouse): Remove special characters from error messages (#26222) --- .../data_imports/external_data_job.py | 5 +- .../tests/data_imports/test_end_to_end.py | 53 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/posthog/temporal/data_imports/external_data_job.py b/posthog/temporal/data_imports/external_data_job.py index 84c798961e6..c5ebd4dddf1 100644 --- a/posthog/temporal/data_imports/external_data_job.py +++ b/posthog/temporal/data_imports/external_data_job.py @@ -1,6 +1,7 @@ import dataclasses import datetime as dt import json +import re from django.db import close_old_connections import posthoganalytics @@ -91,11 +92,13 @@ def update_external_data_job_model(inputs: UpdateExternalDataJobStatusInputs) -> f"External data job failed for external data schema {inputs.schema_id} with error: {inputs.internal_error}" ) + internal_error_normalized = re.sub("[\n\r\t]", " ", inputs.internal_error) + source: ExternalDataSource = ExternalDataSource.objects.get(pk=inputs.source_id) non_retryable_errors = Non_Retryable_Schema_Errors.get(ExternalDataSource.Type(source.source_type)) if non_retryable_errors is not None: - has_non_retryable_error = any(error in inputs.internal_error for error in non_retryable_errors) + has_non_retryable_error = any(error in internal_error_normalized for error in non_retryable_errors) if has_non_retryable_error: logger.info("Schema has a non-retryable error - turning off syncing") posthoganalytics.capture( diff --git a/posthog/temporal/tests/data_imports/test_end_to_end.py b/posthog/temporal/tests/data_imports/test_end_to_end.py index 1c213ec6684..fce2047cd1c 100644 --- a/posthog/temporal/tests/data_imports/test_end_to_end.py +++ b/posthog/temporal/tests/data_imports/test_end_to_end.py @@ -941,6 +941,59 @@ async def test_non_retryable_error(team, stripe_customer): await sync_to_async(execute_hogql_query)("SELECT * FROM stripe_customer", team) +@pytest.mark.django_db(transaction=True) +@pytest.mark.asyncio +async def test_non_retryable_error_with_special_characters(team, stripe_customer): + source = await sync_to_async(ExternalDataSource.objects.create)( + source_id=uuid.uuid4(), + connection_id=uuid.uuid4(), + destination_id=uuid.uuid4(), + team=team, + status="running", + source_type="Stripe", + job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"}, + ) + + schema = await sync_to_async(ExternalDataSchema.objects.create)( + name="Customer", + team_id=team.pk, + source_id=source.pk, + sync_type=ExternalDataSchema.SyncType.FULL_REFRESH, + sync_type_config={}, + ) + + workflow_id = str(uuid.uuid4()) + inputs = ExternalDataWorkflowInputs( + team_id=team.id, + external_data_source_id=source.pk, + external_data_schema_id=schema.id, + ) + + with ( + mock.patch( + "posthog.temporal.data_imports.workflow_activities.check_billing_limits.list_limited_team_attributes", + ) as mock_list_limited_team_attributes, + mock.patch.object(posthoganalytics, "capture") as capture_mock, + ): + mock_list_limited_team_attributes.side_effect = Exception( + "401 Client Error:\nUnauthorized for url: https://api.stripe.com" + ) + + with pytest.raises(Exception): + await _execute_run(workflow_id, inputs, stripe_customer["data"]) + + capture_mock.assert_called_once() + + job: ExternalDataJob = await sync_to_async(ExternalDataJob.objects.get)(team_id=team.id, schema_id=schema.pk) + await sync_to_async(schema.refresh_from_db)() + + assert job.status == ExternalDataJob.Status.FAILED + assert schema.should_sync is False + + with pytest.raises(Exception): + await sync_to_async(execute_hogql_query)("SELECT * FROM stripe_customer", team) + + @pytest.mark.django_db(transaction=True) @pytest.mark.asyncio async def test_delta_table_deleted(team, stripe_balance_transaction):