0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 09:16:49 +01:00

Add dead letter queue metrics to system status (#8294)

* add dead letter queue metrics to system status

* today -> last 24h

* fix test
This commit is contained in:
Yakko Majuri 2022-01-31 18:10:10 +00:00 committed by GitHub
parent 6ec22ce452
commit 09e37663f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 42 additions and 5 deletions

View File

@ -82,12 +82,46 @@ def system_status() -> Generator[SystemStatusRow, None, None]:
"subrows": {"columns": ["Metric", "Value", "Description"], "rows": list(sorted(system_metrics))},
}
last_event_ingested_timestamp = sync_execute("SELECT max(_timestamp) FROM events")
last_event_ingested_timestamp = sync_execute("SELECT max(_timestamp) FROM events")[0][0]
yield {
"key": "last_event_ingested_timestamp",
"metric": "Last event ingested",
"value": last_event_ingested_timestamp[0],
"value": last_event_ingested_timestamp,
}
dead_letter_queue_size = sync_execute("SELECT count(*) FROM events_dead_letter_queue")[0][0]
yield {
"key": "dead_letter_queue_size",
"metric": "Dead letter queue size",
"value": dead_letter_queue_size,
}
dead_letter_queue_events_last_day = sync_execute(
"SELECT count(*) FROM events_dead_letter_queue WHERE _timestamp >= (NOW() - INTERVAL 1 DAY)"
)[0][0]
yield {
"key": "dead_letter_queue_events_last_day",
"metric": "Events sent to dead letter queue in the last 24h",
"value": dead_letter_queue_events_last_day,
}
total_events_ingested_last_day = sync_execute(
"SELECT count(*) as b from events WHERE _timestamp >= (NOW() - INTERVAL 1 DAY)"
)[0][0]
dead_letter_queue_ingestion_ratio = dead_letter_queue_events_last_day / total_events_ingested_last_day
# if the dead letter queue has as many events today as ingestion, issue an alert
dead_letter_queue_events_high = (
dead_letter_queue_ingestion_ratio >= 0.5 and dead_letter_queue_events_last_day > 10000
)
yield {
"key": "dead_letter_queue_ratio_ok",
"metric": "Dead letter queue ratio healthy",
"value": not dead_letter_queue_events_high,
}

View File

@ -13,6 +13,9 @@ def test_system_status(db):
"clickhouse_table_sizes",
"clickhouse_system_metrics",
"last_event_ingested_timestamp",
"dead_letter_queue_size",
"dead_letter_queue_events_last_day",
"dead_letter_queue_ratio_ok",
]
assert len(results[-3]["subrows"]["rows"]) > 0
assert len(results[-2]["subrows"]["rows"]) > 0
assert len(results[6]["subrows"]["rows"]) > 0
assert len(results[7]["subrows"]["rows"]) > 0

View File

@ -130,7 +130,7 @@ export const navigationLogic = kea<navigationLogicType<WarningType>>({
}
// if you have status metrics these three must have `value: true`
const aliveMetrics = ['redis_alive', 'db_alive', 'plugin_sever_alive']
const aliveMetrics = ['redis_alive', 'db_alive', 'plugin_sever_alive', 'dead_letter_queue_ratio_ok']
const aliveSignals = statusMetrics
.filter((sm) => sm.key && aliveMetrics.includes(sm.key))
.filter((sm) => sm.value).length