From 7d80f7631e88bcee17352d447e1c3dfd0cb7b017 Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Tue, 8 Sep 2020 20:13:54 -0700 Subject: [PATCH] clickhouse views and management commands (#1616) --- ee/clickhouse/clickhouse_test_runner.py | 32 +++++++++ ee/clickhouse/views/actions.py | 21 ++++++ ee/clickhouse/views/events.py | 49 ++++++++++++++ ee/clickhouse/views/insights.py | 65 +++++++++++++++++++ ee/clickhouse/views/person.py | 21 ++++++ ee/management/commands/create_ch_migration.py | 49 ++++++++++++++ ee/management/commands/migrate_clickhouse.py | 23 +++++++ 7 files changed, 260 insertions(+) create mode 100644 ee/clickhouse/clickhouse_test_runner.py create mode 100644 ee/clickhouse/views/actions.py create mode 100644 ee/clickhouse/views/events.py create mode 100644 ee/clickhouse/views/insights.py create mode 100644 ee/clickhouse/views/person.py create mode 100644 ee/management/commands/create_ch_migration.py create mode 100644 ee/management/commands/migrate_clickhouse.py diff --git a/ee/clickhouse/clickhouse_test_runner.py b/ee/clickhouse/clickhouse_test_runner.py new file mode 100644 index 00000000000..dc42b6d69e9 --- /dev/null +++ b/ee/clickhouse/clickhouse_test_runner.py @@ -0,0 +1,32 @@ +from django.test.runner import DiscoverRunner +from infi.clickhouse_orm import Database # type: ignore + +from posthog.settings import ( + CLICKHOUSE_DATABASE, + CLICKHOUSE_HTTP_URL, + CLICKHOUSE_PASSWORD, + CLICKHOUSE_USERNAME, + CLICKHOUSE_VERIFY, +) + + +class ClickhouseTestRunner(DiscoverRunner): + def setup_databases(self, **kwargs): + Database( + CLICKHOUSE_DATABASE, + db_url=CLICKHOUSE_HTTP_URL, + username=CLICKHOUSE_USERNAME, + password=CLICKHOUSE_PASSWORD, + verify_ssl_cert=CLICKHOUSE_VERIFY, + ).migrate("ee.clickhouse.migrations") + return super().setup_databases(**kwargs) + + def teardown_databases(self, old_config, **kwargs): + Database( + CLICKHOUSE_DATABASE, + db_url=CLICKHOUSE_HTTP_URL, + username=CLICKHOUSE_USERNAME, + password=CLICKHOUSE_PASSWORD, + verify_ssl_cert=CLICKHOUSE_VERIFY, + ).drop_database() + super().teardown_databases(old_config, **kwargs) diff --git a/ee/clickhouse/views/actions.py b/ee/clickhouse/views/actions.py new file mode 100644 index 00000000000..f8f370824eb --- /dev/null +++ b/ee/clickhouse/views/actions.py @@ -0,0 +1,21 @@ +from rest_framework import status, viewsets +from rest_framework.response import Response + +# NOTE: bad django practice but /ee specifically depends on /posthog so it should be fine +from posthog.api.action import ActionSerializer + + +class ClickhouseActions(viewsets.ViewSet): + serializer_class = ActionSerializer + + def list(self, request): + # TODO: implement get list of events + return Response([]) + + def create(self, request): + # TODO: implement create event + return Response([]) + + def retrieve(self, request, pk=None): + # TODO: implement retrieve event by id + return Response([]) diff --git a/ee/clickhouse/views/events.py b/ee/clickhouse/views/events.py new file mode 100644 index 00000000000..4be757e4f24 --- /dev/null +++ b/ee/clickhouse/views/events.py @@ -0,0 +1,49 @@ +from datetime import datetime, timezone + +from rest_framework import serializers, viewsets +from rest_framework.decorators import action +from rest_framework.request import Request +from rest_framework.response import Response + +from ee.clickhouse.client import sync_execute +from ee.clickhouse.models.event import ClickhouseEventSerializer, determine_event_conditions +from ee.clickhouse.models.property import get_property_values_for_key, parse_filter +from ee.clickhouse.sql.events import SELECT_EVENT_WITH_ARRAY_PROPS_SQL, SELECT_EVENT_WITH_PROP_SQL +from posthog.models.filter import Filter +from posthog.utils import convert_property_value + + +class ClickhouseEvents(viewsets.ViewSet): + serializer_class = ClickhouseEventSerializer + + def list(self, request): + + team = request.user.team_set.get() + filter = Filter(request=request) + limit = "LIMIT 100" if not filter._date_from and not filter._date_to else "" + conditions, condition_params = determine_event_conditions(request.GET) + prop_filters, prop_filter_params = parse_filter(filter.properties) + + if prop_filters: + query_result = sync_execute( + SELECT_EVENT_WITH_PROP_SQL.format(conditions=conditions, limit=limit, filters=prop_filters), + {"team_id": team.pk, **condition_params, **prop_filter_params}, + ) + else: + query_result = sync_execute( + SELECT_EVENT_WITH_ARRAY_PROPS_SQL.format(conditions=conditions, limit=limit), + {"team_id": team.pk, **condition_params}, + ) + + result = ClickhouseEventSerializer(query_result, many=True, context={"elements": None, "people": None}).data + + return Response({"next": None, "results": result}) + + @action(methods=["GET"], detail=False) + def values(self, request: Request) -> Response: + key = request.GET.get("key") + team = request.user.team_set.get() + result = [] + if key: + result = get_property_values_for_key(key, team) + return Response([{"name": convert_property_value(value[0])} for value in result]) diff --git a/ee/clickhouse/views/insights.py b/ee/clickhouse/views/insights.py new file mode 100644 index 00000000000..46c2be2284b --- /dev/null +++ b/ee/clickhouse/views/insights.py @@ -0,0 +1,65 @@ +from abc import ABC, abstractmethod +from typing import Any + +from rest_framework import status, viewsets +from rest_framework.decorators import action +from rest_framework.request import Request +from rest_framework.response import Response + + +class InsightInterface(ABC): + @abstractmethod + def trend(self, request: Request, *args: Any, **kwargs: Any): + pass + + @abstractmethod + def session(self, request: Request, *args: Any, **kwargs: Any): + pass + + @abstractmethod + def funnel(self, request: Request, *args: Any, **kwargs: Any): + pass + + @abstractmethod + def retention(self, request: Request, *args: Any, **kwargs: Any): + pass + + @abstractmethod + def path(self, request: Request, *args: Any, **kwargs: Any): + pass + + +class ClickhouseInsights(viewsets.ViewSet, InsightInterface): + # TODO: add insight serializer + + def list(self, request): + # TODO: implement get list of insights + return Response([]) + + def create(self, request): + # TODO: implement create insights + return Response([]) + + def retrieve(self, request, pk=None): + # TODO: implement retrieve insights by id + return Response([]) + + @action(methods=["GET"], detail=False) + def trend(self, request: Request, *args: Any, **kwargs: Any) -> Response: + return Response([]) + + @action(methods=["GET"], detail=False) + def session(self, request: Request, *args: Any, **kwargs: Any) -> Response: + return Response([]) + + @action(methods=["GET"], detail=False) + def funnel(self, request: Request, *args: Any, **kwargs: Any) -> Response: + return Response([]) + + @action(methods=["GET"], detail=False) + def retention(self, request: Request, *args: Any, **kwargs: Any) -> Response: + return Response([]) + + @action(methods=["GET"], detail=False) + def path(self, request: Request, *args: Any, **kwargs: Any) -> Response: + return Response([]) diff --git a/ee/clickhouse/views/person.py b/ee/clickhouse/views/person.py new file mode 100644 index 00000000000..94f27340a95 --- /dev/null +++ b/ee/clickhouse/views/person.py @@ -0,0 +1,21 @@ +from rest_framework import status, viewsets +from rest_framework.response import Response + +# NOTE: bad django practice but /ee specifically depends on /posthog so it should be fine +from posthog.api.person import PersonSerializer + + +class ClickhousePerson(viewsets.ViewSet): + serializer_class = PersonSerializer + + def list(self, request): + # TODO: implement get list of people + return Response([]) + + def create(self, request): + # TODO: implement create person + return Response([]) + + def retrieve(self, request, pk=None): + # TODO: implement retrieve person by id + return Response([]) diff --git a/ee/management/commands/create_ch_migration.py b/ee/management/commands/create_ch_migration.py new file mode 100644 index 00000000000..331373545a2 --- /dev/null +++ b/ee/management/commands/create_ch_migration.py @@ -0,0 +1,49 @@ +import os + +from django.core.management.base import BaseCommand +from django.utils.timezone import now + +MIGRATION_PATH = "ee/clickhouse/migrations" + +FILE_DEFAULT = """ +from infi.clickhouse_orm import migrations +class Migration(migrations.Migration): + operations = [ + ] +""" + +# ex: python manage.py create_ch_migration +class Command(BaseCommand): + help = "Create blank clickhouse migration" + + def add_arguments(self, parser): + parser.add_argument("--name", type=str) + + def handle(self, *args, **options): + name = options["name"] + + # default to auto syntax + if not name: + name = now().strftime("auto_%Y%m%d_%H%M.py") + else: + name += ".py" + + entries = os.listdir(MIGRATION_PATH) + + idx = len(entries) + index_label = _format_number(idx) + file_name = "{}/{}_{}".format(MIGRATION_PATH, index_label, name) + f = open(file_name, "w") + f.write(FILE_DEFAULT) + return + + +def _format_number(num: int) -> str: + if num < 10: + return "000" + str(num) + elif num < 100: + return "00" + str(num) + elif num < 1000: + return "0" + str(num) + else: + return str(num) diff --git a/ee/management/commands/migrate_clickhouse.py b/ee/management/commands/migrate_clickhouse.py new file mode 100644 index 00000000000..90c2dc4c432 --- /dev/null +++ b/ee/management/commands/migrate_clickhouse.py @@ -0,0 +1,23 @@ +from django.core.management.base import BaseCommand +from infi.clickhouse_orm import Database + +from posthog.settings import ( + CLICKHOUSE_DATABASE, + CLICKHOUSE_HTTP_URL, + CLICKHOUSE_PASSWORD, + CLICKHOUSE_USERNAME, + CLICKHOUSE_VERIFY, +) + + +class Command(BaseCommand): + help = "Migrate clickhouse" + + def handle(self, *args, **options): + Database( + CLICKHOUSE_DATABASE, + db_url=CLICKHOUSE_HTTP_URL, + username=CLICKHOUSE_USERNAME, + password=CLICKHOUSE_PASSWORD, + verify_ssl_cert=False, + ).migrate("ee.clickhouse.migrations")