mirror of
https://github.com/PostHog/posthog.git
synced 2024-11-28 09:16:49 +01:00
Add dead letter queue metrics to system status (#8294)
* add dead letter queue metrics to system status * today -> last 24h * fix test
This commit is contained in:
parent
6ec22ce452
commit
09e37663f4
@ -82,12 +82,46 @@ def system_status() -> Generator[SystemStatusRow, None, None]:
|
||||
"subrows": {"columns": ["Metric", "Value", "Description"], "rows": list(sorted(system_metrics))},
|
||||
}
|
||||
|
||||
last_event_ingested_timestamp = sync_execute("SELECT max(_timestamp) FROM events")
|
||||
last_event_ingested_timestamp = sync_execute("SELECT max(_timestamp) FROM events")[0][0]
|
||||
|
||||
yield {
|
||||
"key": "last_event_ingested_timestamp",
|
||||
"metric": "Last event ingested",
|
||||
"value": last_event_ingested_timestamp[0],
|
||||
"value": last_event_ingested_timestamp,
|
||||
}
|
||||
|
||||
dead_letter_queue_size = sync_execute("SELECT count(*) FROM events_dead_letter_queue")[0][0]
|
||||
|
||||
yield {
|
||||
"key": "dead_letter_queue_size",
|
||||
"metric": "Dead letter queue size",
|
||||
"value": dead_letter_queue_size,
|
||||
}
|
||||
|
||||
dead_letter_queue_events_last_day = sync_execute(
|
||||
"SELECT count(*) FROM events_dead_letter_queue WHERE _timestamp >= (NOW() - INTERVAL 1 DAY)"
|
||||
)[0][0]
|
||||
|
||||
yield {
|
||||
"key": "dead_letter_queue_events_last_day",
|
||||
"metric": "Events sent to dead letter queue in the last 24h",
|
||||
"value": dead_letter_queue_events_last_day,
|
||||
}
|
||||
|
||||
total_events_ingested_last_day = sync_execute(
|
||||
"SELECT count(*) as b from events WHERE _timestamp >= (NOW() - INTERVAL 1 DAY)"
|
||||
)[0][0]
|
||||
dead_letter_queue_ingestion_ratio = dead_letter_queue_events_last_day / total_events_ingested_last_day
|
||||
|
||||
# if the dead letter queue has as many events today as ingestion, issue an alert
|
||||
dead_letter_queue_events_high = (
|
||||
dead_letter_queue_ingestion_ratio >= 0.5 and dead_letter_queue_events_last_day > 10000
|
||||
)
|
||||
|
||||
yield {
|
||||
"key": "dead_letter_queue_ratio_ok",
|
||||
"metric": "Dead letter queue ratio healthy",
|
||||
"value": not dead_letter_queue_events_high,
|
||||
}
|
||||
|
||||
|
||||
|
@ -13,6 +13,9 @@ def test_system_status(db):
|
||||
"clickhouse_table_sizes",
|
||||
"clickhouse_system_metrics",
|
||||
"last_event_ingested_timestamp",
|
||||
"dead_letter_queue_size",
|
||||
"dead_letter_queue_events_last_day",
|
||||
"dead_letter_queue_ratio_ok",
|
||||
]
|
||||
assert len(results[-3]["subrows"]["rows"]) > 0
|
||||
assert len(results[-2]["subrows"]["rows"]) > 0
|
||||
assert len(results[6]["subrows"]["rows"]) > 0
|
||||
assert len(results[7]["subrows"]["rows"]) > 0
|
||||
|
@ -130,7 +130,7 @@ export const navigationLogic = kea<navigationLogicType<WarningType>>({
|
||||
}
|
||||
|
||||
// if you have status metrics these three must have `value: true`
|
||||
const aliveMetrics = ['redis_alive', 'db_alive', 'plugin_sever_alive']
|
||||
const aliveMetrics = ['redis_alive', 'db_alive', 'plugin_sever_alive', 'dead_letter_queue_ratio_ok']
|
||||
const aliveSignals = statusMetrics
|
||||
.filter((sm) => sm.key && aliveMetrics.includes(sm.key))
|
||||
.filter((sm) => sm.value).length
|
||||
|
Loading…
Reference in New Issue
Block a user