mirror of
https://github.com/PostHog/posthog.git
synced 2024-11-29 03:04:16 +01:00
437b2982cd
* Add asv code * Get a benchmark running * better output folder * Move benchmark file * Cleanup of config * Run a query and benchmark * Skip benchmarks for isort * Set up materialized columns before test * Better skipping logic * Add first proper benchmark, add some documentation * Person property filtering * Add new workflow * Show stderr in test run * Continue debugging actions * Try to save benchmark results to separate repo * Output to main directory * Scheduling * Remove if temporarily * Int for ms * Comment on the PR * Collapsible section * Update README * Clarification * Remove h3 * Remove pyproject.toml, .gitignore changes that are unneeded
82 lines
2.1 KiB
Python
82 lines
2.1 KiB
Python
import os
|
|
import sys
|
|
from contextlib import contextmanager
|
|
from functools import wraps
|
|
from os.path import dirname
|
|
|
|
from django.utils.timezone import now
|
|
|
|
os.environ["POSTHOG_DB_NAME"] = "posthog_test"
|
|
os.environ["DJANGO_SETTINGS_MODULE"] = "posthog.settings"
|
|
sys.path.append(dirname(dirname(dirname(__file__))))
|
|
|
|
import django
|
|
|
|
django.setup()
|
|
|
|
from ee.clickhouse import client
|
|
from ee.clickhouse.materialized_columns.columns import get_materialized_columns
|
|
from posthog.models.utils import UUIDT
|
|
|
|
get_column = lambda rows, index: [row[index] for row in rows]
|
|
|
|
|
|
def run_query(fn, *args):
|
|
uuid = str(UUIDT())
|
|
client._request_information = {"kind": "benchmark", "id": f"{uuid}::${fn.__name__}"}
|
|
try:
|
|
fn(*args)
|
|
return get_clickhouse_query_stats(uuid)
|
|
finally:
|
|
client._request_information = None
|
|
|
|
|
|
def get_clickhouse_query_stats(uuid):
|
|
client.sync_execute("SYSTEM FLUSH LOGS")
|
|
rows = client.sync_execute(
|
|
f"""
|
|
SELECT
|
|
query_duration_ms,
|
|
read_rows,
|
|
read_bytes,
|
|
memory_usage
|
|
FROM system.query_log
|
|
WHERE
|
|
query NOT LIKE '%%query_log%%'
|
|
AND query LIKE %(matcher)s
|
|
AND type = 'QueryFinish'
|
|
""",
|
|
{"matcher": f"%benchmark:{uuid}%"},
|
|
)
|
|
|
|
return {
|
|
"query_count": len(rows),
|
|
"ch_query_time": int(sum(get_column(rows, 0))),
|
|
"read_rows": sum(get_column(rows, 1)),
|
|
"read_bytes": sum(get_column(rows, 2)),
|
|
"memory_usage": sum(get_column(rows, 3)),
|
|
}
|
|
|
|
|
|
def benchmark_clickhouse(fn):
|
|
@wraps(fn)
|
|
def inner(*args):
|
|
samples = [run_query(fn, *args)["ch_query_time"] for _ in range(4)]
|
|
return {
|
|
"samples": samples,
|
|
"number": len(samples),
|
|
}
|
|
|
|
return inner
|
|
|
|
|
|
@contextmanager
|
|
def no_materialized_columns():
|
|
"Allows running a function without any materialized columns being used in query"
|
|
get_materialized_columns._cache = {
|
|
("events",): (now(), {}),
|
|
("person",): (now(), {}),
|
|
}
|
|
yield
|
|
get_materialized_columns._cache = {}
|