2021-05-21 08:26:01 +02:00
|
|
|
from typing import Dict, Generator, List
|
|
|
|
|
|
|
|
from dateutil.relativedelta import relativedelta
|
|
|
|
from django.utils import timezone
|
2021-04-22 07:46:29 +02:00
|
|
|
|
|
|
|
from ee.clickhouse.client import sync_execute
|
|
|
|
|
2021-05-21 08:26:01 +02:00
|
|
|
SLOW_THRESHOLD_MS = 10000
|
|
|
|
SLOW_AFTER = relativedelta(hours=6)
|
|
|
|
|
2021-04-22 07:46:29 +02:00
|
|
|
SystemStatusRow = Dict
|
|
|
|
|
|
|
|
|
|
|
|
def system_status() -> Generator[SystemStatusRow, None, None]:
|
|
|
|
alive = is_alive()
|
|
|
|
yield {"key": "clickhouse_alive", "metric": "Clickhouse database alive", "value": alive}
|
|
|
|
|
|
|
|
if not alive:
|
|
|
|
return
|
|
|
|
|
|
|
|
disk_status = sync_execute(
|
|
|
|
"SELECT formatReadableSize(total_space), formatReadableSize(free_space) FROM system.disks"
|
|
|
|
)
|
|
|
|
|
|
|
|
for index, (total_space, free_space) in enumerate(disk_status):
|
|
|
|
metric = "Clickhouse disk" if len(disk_status) == 1 else f"Clickhouse disk {index}"
|
|
|
|
yield {"key": f"clickhouse_disk_{index}_free_space", "metric": f"{metric} free space", "value": free_space}
|
|
|
|
yield {"key": f"clickhouse_disk_{index}_total_space", "metric": f"{metric} total space", "value": total_space}
|
|
|
|
|
|
|
|
table_sizes = sync_execute(
|
|
|
|
"""
|
|
|
|
SELECT
|
|
|
|
table,
|
|
|
|
formatReadableSize(sum(bytes)) AS size,
|
|
|
|
sum(rows) AS rows
|
|
|
|
FROM system.parts
|
|
|
|
WHERE active
|
|
|
|
GROUP BY table
|
|
|
|
ORDER BY rows DESC
|
|
|
|
"""
|
|
|
|
)
|
|
|
|
|
|
|
|
yield {
|
|
|
|
"key": "clickhouse_table_sizes",
|
|
|
|
"metric": "Clickhouse table sizes",
|
|
|
|
"value": "",
|
|
|
|
"subrows": {"columns": ["Table", "Size", "Rows"], "rows": table_sizes},
|
|
|
|
}
|
|
|
|
|
|
|
|
system_metrics = sync_execute("SELECT * FROM system.asynchronous_metrics")
|
|
|
|
system_metrics += sync_execute("SELECT * FROM system.metrics")
|
|
|
|
|
|
|
|
yield {
|
|
|
|
"key": "clickhouse_system_metrics",
|
|
|
|
"metric": "Clickhouse system metrics",
|
|
|
|
"value": "",
|
|
|
|
"subrows": {"columns": ["Metric", "Value", "Description"], "rows": list(sorted(system_metrics))},
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def is_alive() -> bool:
|
|
|
|
try:
|
|
|
|
sync_execute("SELECT 1")
|
|
|
|
return True
|
|
|
|
except:
|
|
|
|
return False
|
2021-05-21 08:26:01 +02:00
|
|
|
|
|
|
|
|
|
|
|
def get_clickhouse_running_queries() -> List[Dict]:
|
|
|
|
return query_with_columns(
|
|
|
|
"SELECT elapsed as duration, query, * FROM system.processes ORDER BY duration DESC",
|
|
|
|
columns_to_remove=["address", "initial_address", "elapsed"],
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def get_clickhouse_slow_log() -> List[Dict]:
|
|
|
|
return query_with_columns(
|
|
|
|
f"""
|
|
|
|
SELECT query_duration_ms as duration, query, *
|
|
|
|
FROM system.query_log
|
|
|
|
WHERE query_duration_ms > {SLOW_THRESHOLD_MS}
|
|
|
|
AND event_time > %(after)s
|
|
|
|
ORDER BY duration DESC
|
|
|
|
LIMIT 200
|
|
|
|
""",
|
|
|
|
{"after": timezone.now() - SLOW_AFTER},
|
|
|
|
columns_to_remove=[
|
|
|
|
"address",
|
|
|
|
"initial_address",
|
|
|
|
"query_duration_ms",
|
|
|
|
"event_time",
|
|
|
|
"event_date",
|
|
|
|
"query_start_time_microseconds",
|
|
|
|
"thread_ids",
|
|
|
|
"ProfileEvents.Names",
|
|
|
|
"ProfileEvents.Values",
|
|
|
|
"Settings.Names",
|
|
|
|
"Settings.Values",
|
|
|
|
],
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def query_with_columns(query, args=None, columns_to_remove=[]) -> List[Dict]:
|
|
|
|
metrics, types = sync_execute(query, args, with_column_types=True)
|
|
|
|
type_names = [key for key, _type in types]
|
|
|
|
|
|
|
|
rows = []
|
|
|
|
for row in metrics:
|
|
|
|
result = {}
|
|
|
|
for type_name, value in zip(type_names, row):
|
|
|
|
if isinstance(value, list):
|
|
|
|
value = ", ".join(map(str, value))
|
|
|
|
if type_name not in columns_to_remove:
|
|
|
|
result[type_name] = value
|
|
|
|
|
|
|
|
rows.append(result)
|
|
|
|
|
|
|
|
return rows
|