0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-28 18:26:15 +01:00
posthog/ee/clickhouse/sql/plugin_log_entries.py
Harry Waye dc8148ede6
dev(clickhouse): raise clickhouse setup errors quickly (#6069)
I was having issues with running the clickhouse/ee tests and it was just
hanging. Clickhouse appeared to be up and I could perform queries with
`clickhouse-client`. For some reason it was hanging on querying, and on closer
inspection if looks like for each of the setup queries it was hanging for 6
seconds, failing to find zookeeper, and then continuing to run setup.

It's pretty useless to continue in this case, so it seems more sensible to raise
in this case.
2021-09-22 10:22:52 +00:00

72 lines
2.0 KiB
Python

from ee.kafka_client.topics import KAFKA_PLUGIN_LOG_ENTRIES
from posthog.settings import CLICKHOUSE_CLUSTER, CLICKHOUSE_DATABASE
from posthog.tasks.delete_old_plugin_logs import TTL_WEEKS
from .clickhouse import KAFKA_COLUMNS, REPLACING_MERGE_TREE, kafka_engine, table_engine, ttl_period
PLUGIN_LOG_ENTRIES_TABLE = "plugin_log_entries"
PLUGIN_LOG_ENTRIES_TABLE_BASE_SQL = """
CREATE TABLE {table_name} ON CLUSTER {cluster}
(
id UUID,
team_id Int64,
plugin_id Int64,
plugin_config_id Int64,
timestamp DateTime64(6, 'UTC'),
source VARCHAR,
type VARCHAR,
message VARCHAR,
instance_id UUID
{extra_fields}
) ENGINE = {engine}
"""
PLUGIN_LOG_ENTRIES_TABLE_SQL = (
PLUGIN_LOG_ENTRIES_TABLE_BASE_SQL
+ """PARTITION BY plugin_id ORDER BY (team_id, id)
{ttl_period}
SETTINGS index_granularity=512
"""
).format(
table_name=PLUGIN_LOG_ENTRIES_TABLE,
cluster=CLICKHOUSE_CLUSTER,
extra_fields=KAFKA_COLUMNS,
engine=table_engine(PLUGIN_LOG_ENTRIES_TABLE, "_timestamp", REPLACING_MERGE_TREE),
ttl_period=ttl_period("timestamp", TTL_WEEKS),
)
KAFKA_PLUGIN_LOG_ENTRIES_TABLE_SQL = PLUGIN_LOG_ENTRIES_TABLE_BASE_SQL.format(
table_name="kafka_" + PLUGIN_LOG_ENTRIES_TABLE,
cluster=CLICKHOUSE_CLUSTER,
engine=kafka_engine(topic=KAFKA_PLUGIN_LOG_ENTRIES),
extra_fields="",
)
PLUGIN_LOG_ENTRIES_TABLE_MV_SQL = """
CREATE MATERIALIZED VIEW {table_name}_mv ON CLUSTER {cluster}
TO {database}.{table_name}
AS SELECT
id,
team_id,
plugin_id,
plugin_config_id,
timestamp,
source,
type,
message,
instance_id,
_timestamp,
_offset
FROM {database}.kafka_{table_name}
""".format(
table_name=PLUGIN_LOG_ENTRIES_TABLE, cluster=CLICKHOUSE_CLUSTER, database=CLICKHOUSE_DATABASE,
)
INSERT_PLUGIN_LOG_ENTRY_SQL = """
INSERT INTO plugin_log_entries SELECT %(id)s, %(team_id)s, %(plugin_id)s, %(plugin_config_id)s, %(timestamp)s, %(source)s, %(type)s, %(message)s, %(instance_id)s, now(), 0
"""
DROP_PLUGIN_LOG_ENTRIES_TABLE_SQL = f"DROP TABLE IF EXISTS {PLUGIN_LOG_ENTRIES_TABLE} ON CLUSTER {CLICKHOUSE_CLUSTER}"