From 49e3ceef5c62b847e6c949ddfdf9b8fbb151f456 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Fri, 20 May 2022 09:56:50 +0100 Subject: [PATCH] feat(object storage): add unused object storage (#9846) * feat(object_storage): add unused object storage with health checks * only prompt debug users if object storage not available at preflight * safe plugin server health check for unused object storage * explicit object storage settings * explicit object storage settings * explicit object storage settings * downgrade pip tools * without spaces? * like this? * without updating pip? * remove object_storage from dev volumes * named volume on hobby * lazily init object storage * simplify conditional check * reproduced error locally * reproduced error locally * object_storage_endpoint not host and port * log more when checking kafka and clickhouse * don't filter docker output * add kafka to hosts before starting stack? * silly cloud tests (not my brain) --- .github/actions/run-backend-tests/action.yml | 2 +- .github/workflows/ci-backend.yml | 16 +++- .github/workflows/ci-plugin-server.yml | 24 ++++-- .github/workflows/e2e.yml | 6 +- .run/Plugin Server.run.xml | 7 +- bin/check_kafka_clickhouse_up | 4 +- docker-compose.arm64.yml | 15 ++++ docker-compose.dev.yml | 13 ++++ docker-compose.hobby.yml | 17 +++++ docker-compose.test.yml | 1 + docker-compose.yml | 15 ++++ frontend/src/mocks/fixtures/_preflight.json | 1 + .../__mocks__/preflight.initial.json | 1 + .../PreflightCheck/preflightLogic.test.ts | 14 +++- .../scenes/PreflightCheck/preflightLogic.tsx | 18 ++++- frontend/src/types.ts | 1 + package.json | 4 +- plugin-server/package.json | 4 +- plugin-server/src/config/config.ts | 14 ++++ .../src/main/services/object_storage.ts | 59 ++++++++++++++ plugin-server/src/types.ts | 8 ++ plugin-server/src/utils/db/hub.ts | 13 ++++ posthog/api/instance_status.py | 10 +++ posthog/api/test/test_instance_status.py | 44 +++++++++++ posthog/api/test/test_preflight.py | 63 +++++++++++++-- posthog/settings/__init__.py | 1 + posthog/settings/object_storage.py | 18 +++++ posthog/storage/object_storage.py | 40 ++++++++++ posthog/utils.py | 12 +++ posthog/views.py | 2 + requirements.in | 1 + requirements.txt | 76 ++++++++++++------- 32 files changed, 464 insertions(+), 60 deletions(-) create mode 100644 plugin-server/src/main/services/object_storage.ts create mode 100644 posthog/settings/object_storage.py create mode 100644 posthog/storage/object_storage.py diff --git a/.github/actions/run-backend-tests/action.yml b/.github/actions/run-backend-tests/action.yml index a7282e855e9..81782f792da 100644 --- a/.github/actions/run-backend-tests/action.yml +++ b/.github/actions/run-backend-tests/action.yml @@ -37,7 +37,7 @@ runs: run: | export CLICKHOUSE_SERVER_IMAGE=${{ inputs.clickhouse-server-image }} docker-compose -f docker-compose.dev.yml down - docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis & + docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis object_storage & - name: Set up Python uses: actions/setup-python@v2 diff --git a/.github/workflows/ci-backend.yml b/.github/workflows/ci-backend.yml index eae9799757f..13e80b6c54f 100644 --- a/.github/workflows/ci-backend.yml +++ b/.github/workflows/ci-backend.yml @@ -24,6 +24,10 @@ env: CLICKHOUSE_VERIFY: 'False' TEST: 1 CLICKHOUSE_SERVER_IMAGE_VERSION: ${{ github.event.inputs.clickhouseServerVersion || '' }} + OBJECT_STORAGE_ENABLED: 'True' + OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000' + OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user' + OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password' jobs: # Job to decide if we should run backend ci @@ -293,6 +297,10 @@ jobs: cp -r messaging deploy/ cat multi_tenancy_settings.py > deploy/posthog/settings/cloud.py cat requirements.txt >> deploy/requirements.txt + + - name: Add kafka host to /etc/hosts for kafka connectivity + run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts + - name: Start stack with Docker Compose run: | docker-compose -f deploy/docker-compose.dev.yml down @@ -335,6 +343,11 @@ jobs: with: path: 'deploy/' + # just while object_storage isn't in master + - name: Start stack with object_storage + run: | + docker-compose -f deploy/docker-compose.dev.yml up -d object_storage + - name: Install requirements.txt dependencies with pip at current branch run: | cd deploy @@ -357,9 +370,6 @@ jobs: python manage.py makemigrations --check --dry-run python manage.py migrate - - name: Add kafka host to /etc/hosts for kafka connectivity - run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts - - name: Set up needed files run: | cd deploy diff --git a/.github/workflows/ci-plugin-server.yml b/.github/workflows/ci-plugin-server.yml index 04a5bbad143..71d940f2576 100644 --- a/.github/workflows/ci-plugin-server.yml +++ b/.github/workflows/ci-plugin-server.yml @@ -11,6 +11,14 @@ on: - 'docker*.yml' - '*Dockerfile' +env: + OBJECT_STORAGE_ENABLED: true + OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000' + OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user' + OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password' + OBJECT_STORAGE_SESSION_RECORDING_FOLDER: 'session_recordings' + OBJECT_STORAGE_BUCKET: 'posthog' + jobs: code-quality: name: Code quality @@ -73,8 +81,8 @@ jobs: sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts' ping -c 1 kafka ping -c 1 zookeeper - - name: Start Kafka, ClickHouse, Zookeeper - run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse + - name: Start Kafka, ClickHouse, Zookeeper, Object Storage + run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage - name: Set up Python 3.8.12 uses: actions/setup-python@v2 @@ -154,8 +162,8 @@ jobs: sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts' ping -c 1 kafka ping -c 1 zookeeper - - name: Start Kafka, ClickHouse, Zookeeper - run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse + - name: Start Kafka, ClickHouse, Zookeeper, Object Storage + run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage - name: Set up Python 3.8.12 uses: actions/setup-python@v2 @@ -236,8 +244,8 @@ jobs: sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts' ping -c 1 kafka ping -c 1 zookeeper - - name: Start Kafka, ClickHouse, Zookeeper - run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse + - name: Start Kafka, ClickHouse, Zookeeper, Object Storage + run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage - name: Set up Python 3.8.12 uses: actions/setup-python@v2 @@ -318,8 +326,8 @@ jobs: sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts' ping -c 1 kafka ping -c 1 zookeeper - - name: Start Kafka, ClickHouse, Zookeeper - run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse + - name: Start Kafka, ClickHouse, Zookeeper, Object Storage + run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage - name: Set up Python 3.8.12 uses: actions/setup-python@v2 diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 7577037d776..39cf06a7891 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -18,6 +18,10 @@ env: SITE_URL: 'test.posthog.net' # used to test password resets NO_RESTART_LOOP: 1 CLICKHOUSE_SECURE: 0 + OBJECT_STORAGE_ENABLED: 1 + OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000' + OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user' + OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password' jobs: # Job that lists and chunks spec file names and caches node modules @@ -69,7 +73,7 @@ jobs: - name: Start stack with Docker Compose run: | docker-compose -f docker-compose.dev.yml down - docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis & + docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis object_storage & - name: Add kafka host to /etc/hosts for kafka connectivity run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts diff --git a/.run/Plugin Server.run.xml b/.run/Plugin Server.run.xml index d44fce97d7c..22c24ca814b 100644 --- a/.run/Plugin Server.run.xml +++ b/.run/Plugin Server.run.xml @@ -7,12 +7,13 @@ - + - + + - + \ No newline at end of file diff --git a/bin/check_kafka_clickhouse_up b/bin/check_kafka_clickhouse_up index f2c52262203..4217f70e693 100755 --- a/bin/check_kafka_clickhouse_up +++ b/bin/check_kafka_clickhouse_up @@ -4,10 +4,10 @@ set -e # Check Kafka while true; do -nc -z localhost 9092 && break || echo "Checking Kafka status..." && sleep 1 +nc -z localhost 9092 && break || echo 'Checking Kafka status...' && sleep 1 done # Check ClickHouse while true; do -curl -s -o /dev/null -I 'http://localhost:8123/' && break || echo "Checking ClickHouse status..." && sleep 1 +curl -s -o /dev/null -I 'http://localhost:8123/' && break || echo 'Checking ClickHouse status...' && sleep 1 done diff --git a/docker-compose.arm64.yml b/docker-compose.arm64.yml index 79dbf712f64..837f32eb0e2 100644 --- a/docker-compose.arm64.yml +++ b/docker-compose.arm64.yml @@ -78,6 +78,7 @@ services: - redis - clickhouse - kafka + - object_storage web: <<: *worker command: '${CH_WEB_SCRIPT:-./ee/bin/docker-ch-dev-web}' @@ -103,3 +104,17 @@ services: - redis - clickhouse - kafka + - object_storage + + object_storage: + image: minio/minio + ports: + - '19000:19000' + - '19001:19001' + volumes: + - ./object_storage:/data + environment: + MINIO_ROOT_USER: object_storage_root_user + MINIO_ROOT_PASSWORD: object_storage_root_password + entrypoint: sh + command: -c 'mkdir -p /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 7967d2a5449..9417607db2e 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -75,6 +75,7 @@ services: - redis - clickhouse - kafka + - object_storage web: <<: *worker command: '${CH_WEB_SCRIPT:-./ee/bin/docker-ch-dev-web}' @@ -100,3 +101,15 @@ services: - redis - clickhouse - kafka + - object_storage + + object_storage: + image: minio/minio + ports: + - '19000:19000' + - '19001:19001' + environment: + MINIO_ROOT_USER: object_storage_root_user + MINIO_ROOT_PASSWORD: object_storage_root_password + entrypoint: sh + command: -c 'mkdir -p /data/posthog && chmod u+rxw /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service diff --git a/docker-compose.hobby.yml b/docker-compose.hobby.yml index 8b24ac31fab..6e2164bdddb 100644 --- a/docker-compose.hobby.yml +++ b/docker-compose.hobby.yml @@ -84,6 +84,7 @@ services: - redis - clickhouse - kafka + - object_storage web: <<: *worker command: /compose/start @@ -117,6 +118,21 @@ services: - redis - clickhouse - kafka + - object_storage + + object_storage: + image: minio/minio + ports: + - '19000:19000' + - '19001:19001' + volumes: + - object_storage:/data + environment: + MINIO_ROOT_USER: object_storage_root_user + MINIO_ROOT_PASSWORD: object_storage_root_password + entrypoint: sh + command: -c 'mkdir -p /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service + asyncmigrationscheck: <<: *worker command: python manage.py run_async_migrations --check @@ -127,3 +143,4 @@ volumes: zookeeper-data: zookeeper-datalog: zookeeper-logs: + object_storage: diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 3499aacd524..ddd70621ba7 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -23,3 +23,4 @@ services: - redis - clickhouse - kafka + - object_storage diff --git a/docker-compose.yml b/docker-compose.yml index 3a9dbed8a9b..04c4417323e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -54,6 +54,7 @@ services: - redis - clickhouse - kafka + - object_storage environment: DATABASE_URL: postgres://posthog:posthog@db:5432/posthog REDIS_URL: redis://redis:6379/ @@ -70,6 +71,20 @@ services: ports: - 8000:8000 - 80:8000 + + object_storage: + image: minio/minio + ports: + - '19000:19000' + - '19001:19001' + volumes: + - ./object_storage:/data + environment: + MINIO_ROOT_USER: object_storage_root_user + MINIO_ROOT_PASSWORD: object_storage_root_password + entrypoint: sh + command: -c 'mkdir -p /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service + volumes: postgres-data: version: '3' diff --git a/frontend/src/mocks/fixtures/_preflight.json b/frontend/src/mocks/fixtures/_preflight.json index 3358e875f8a..1a8bf61b9f8 100644 --- a/frontend/src/mocks/fixtures/_preflight.json +++ b/frontend/src/mocks/fixtures/_preflight.json @@ -9,6 +9,7 @@ "initiated": true, "cloud": false, "demo": false, + "object_storage": true, "realm": "hosted-clickhouse", "available_social_auth_providers": { "github": false, diff --git a/frontend/src/scenes/PreflightCheck/__mocks__/preflight.initial.json b/frontend/src/scenes/PreflightCheck/__mocks__/preflight.initial.json index 2162cdb0e67..d3bc8cd6f98 100644 --- a/frontend/src/scenes/PreflightCheck/__mocks__/preflight.initial.json +++ b/frontend/src/scenes/PreflightCheck/__mocks__/preflight.initial.json @@ -28,6 +28,7 @@ "clickhouse": false, "kafka": false, "realm": "hosted", + "object_storage": true, "available_social_auth_providers": { "github": false, "gitlab": false, diff --git a/frontend/src/scenes/PreflightCheck/preflightLogic.test.ts b/frontend/src/scenes/PreflightCheck/preflightLogic.test.ts index 7e5d81edde1..c5bbbabb680 100644 --- a/frontend/src/scenes/PreflightCheck/preflightLogic.test.ts +++ b/frontend/src/scenes/PreflightCheck/preflightLogic.test.ts @@ -86,6 +86,11 @@ describe('preflightLogic', () => { status: 'warning', caption: 'Set up before ingesting real user data', }, + { + id: 'object_storage', + name: 'Object Storage', + status: 'validated', + }, ], }) }) @@ -144,6 +149,11 @@ describe('preflightLogic', () => { status: 'optional', caption: 'Not required for experimentation mode', }, + { + id: 'object_storage', + name: 'Object Storage', + status: 'validated', + }, ], }) }) @@ -156,7 +166,7 @@ describe('preflightLogic', () => { .toDispatchActions(['loadPreflightSuccess']) .toMatchValues({ checksSummary: { - summaryString: '6 successful, 1 warning, 2 errors', + summaryString: '7 successful, 1 warning, 2 errors', summaryStatus: 'error', }, }) @@ -169,7 +179,7 @@ describe('preflightLogic', () => { .toDispatchActions(['loadPreflightSuccess']) .toMatchValues({ checksSummary: { - summaryString: '6 successful, 1 warning, 1 error, 1 optional', + summaryString: '7 successful, 1 warning, 1 error, 1 optional', summaryStatus: 'error', }, }) diff --git a/frontend/src/scenes/PreflightCheck/preflightLogic.tsx b/frontend/src/scenes/PreflightCheck/preflightLogic.tsx index 23d67cbabc7..dccc3c20723 100644 --- a/frontend/src/scenes/PreflightCheck/preflightLogic.tsx +++ b/frontend/src/scenes/PreflightCheck/preflightLogic.tsx @@ -69,7 +69,7 @@ export const preflightLogic = kea< checks: [ (s) => [s.preflight, s.preflightMode], (preflight, preflightMode) => { - return [ + const preflightItems = [ { id: 'database', name: 'Application database ยท Postgres', @@ -132,7 +132,21 @@ export const preflightLogic = kea< ? 'Not required for experimentation mode' : 'Set up before ingesting real user data', }, - ] as PreflightItemInterface[] + ] + + if (preflight?.object_storage || preflight?.is_debug) { + /** __for now__, only prompt debug users if object storage is unhealthy **/ + preflightItems.push({ + id: 'object_storage', + name: 'Object Storage', + status: preflight?.object_storage ? 'validated' : 'warning', + caption: preflight?.object_storage + ? undefined + : 'Some features will not work without object storage', + }) + } + + return preflightItems as PreflightItemInterface[] }, ], checksSummary: [ diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 72ff5a6d9ad..de5d2bdb84d 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -1391,6 +1391,7 @@ export interface PreflightStatus { licensed_users_available?: number | null site_url?: string instance_preferences?: InstancePreferencesInterface + object_storage: boolean } export enum ItemMode { // todo: consolidate this and dashboardmode diff --git a/package.json b/package.json index a499b646e80..4733c7273c3 100644 --- a/package.json +++ b/package.json @@ -52,9 +52,9 @@ "arm64:ch-dev:start": "concurrently -n DOCKER,ESBUILD,TYPEGEN -c red,blue,green \"docker-compose -f docker-compose.arm64.yml pull && CH_WEB_SCRIPT=./ee/bin/docker-ch-dev-backend docker-compose -f docker-compose.arm64.yml up\" \"yarn run start-http --host 0.0.0.0\" \"yarn run typegen:watch\"", "arm64:ch-dev:clear": "docker compose -f docker-compose.arm64.yml stop && docker compose -f docker-compose.arm64.yml rm -v && docker compose -f docker-compose.arm64.yml down", "arm64:services": "yarn arm64:services:stop && yarn arm64:services:clean && yarn arm64:services:start", - "arm64:services:start": "docker-compose -f docker-compose.arm64.yml up zookeeper kafka clickhouse", + "arm64:services:start": "docker-compose -f docker-compose.arm64.yml up zookeeper kafka clickhouse object_storage", "arm64:services:stop": "docker-compose -f docker-compose.arm64.yml down", - "arm64:services:clean": "docker-compose -f docker-compose.arm64.yml rm -v zookeeper kafka clickhouse", + "arm64:services:clean": "docker-compose -f docker-compose.arm64.yml rm -v zookeeper kafka clickhouse object_storage", "dev:migrate:postgres": "export DEBUG=1 && source env/bin/activate && python manage.py migrate", "dev:migrate:clickhouse": "export DEBUG=1 && source env/bin/activate && python manage.py migrate_clickhouse", "prepare": "husky install" diff --git a/plugin-server/package.json b/plugin-server/package.json index 85381e1d07d..2883c16f6cd 100644 --- a/plugin-server/package.json +++ b/plugin-server/package.json @@ -32,9 +32,9 @@ "prepublishOnly": "yarn build", "setup:dev:clickhouse": "cd .. && DEBUG=1 python manage.py migrate_clickhouse", "setup:test": "cd .. && TEST=1 python manage.py setup_test_environment", - "services:start": "cd .. && docker-compose -f docker-compose.dev.yml up zookeeper kafka clickhouse", + "services:start": "cd .. && docker-compose -f docker-compose.dev.yml up zookeeper kafka clickhouse object_storage", "services:stop": "cd .. && docker-compose -f docker-compose.dev.yml down", - "services:clean": "cd .. && docker-compose -f docker-compose.dev.yml rm -v zookeeper kafka clickhouse", + "services:clean": "cd .. && docker-compose -f docker-compose.dev.yml rm -v zookeeper kafka clickhouse object_storage", "services": "yarn services:stop && yarn services:clean && yarn services:start" }, "bin": { diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index c6d64da37de..e311e84edb2 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -95,6 +95,12 @@ export function getDefaultConfig(): PluginsServerConfig { PERSON_INFO_CACHE_TTL: 5 * 60, // 5 min KAFKA_HEALTHCHECK_SECONDS: 20, HISTORICAL_EXPORTS_ENABLED: true, + OBJECT_STORAGE_ENABLED: false, + OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000', + OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user', + OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password', + OBJECT_STORAGE_SESSION_RECORDING_FOLDER: 'session_recordings', + OBJECT_STORAGE_BUCKET: 'posthog', } } @@ -169,6 +175,14 @@ export function getConfigHelp(): Record { CLICKHOUSE_DISABLE_EXTERNAL_SCHEMAS_TEAMS: '(advanced) a comma separated list of teams to disable clickhouse external schemas for', CLICKHOUSE_JSON_EVENTS_KAFKA_TOPIC: '(advanced) topic to send events to for clickhouse ingestion', + OBJECT_STORAGE_ENABLED: + 'Disables or enables the use of object storage. It will become mandatory to use object storage', + OBJECT_STORAGE_ENDPOINT: 'minio endpoint', + OBJECT_STORAGE_ACCESS_KEY_ID: 'access key for minio', + OBJECT_STORAGE_SECRET_ACCESS_KEY: 'secret key for minio', + OBJECT_STORAGE_SESSION_RECORDING_FOLDER: + 'the top level folder for storing session recordings inside the storage bucket', + OBJECT_STORAGE_BUCKET: 'the object storage bucket name', } } diff --git a/plugin-server/src/main/services/object_storage.ts b/plugin-server/src/main/services/object_storage.ts new file mode 100644 index 00000000000..40515b06f9b --- /dev/null +++ b/plugin-server/src/main/services/object_storage.ts @@ -0,0 +1,59 @@ +import { PluginsServerConfig } from '../../types' +import { status } from '../../utils/status' + +const aws = require('aws-sdk') + +let S3: typeof aws.S3 | null = null + +export interface ObjectStorage { + healthCheck: () => Promise +} + +export const connectObjectStorage = (serverConfig: Partial): ObjectStorage => { + let storage = { + healthCheck: async () => { + return Promise.resolve(false) + }, + } + try { + const { + OBJECT_STORAGE_ENDPOINT, + OBJECT_STORAGE_ACCESS_KEY_ID, + OBJECT_STORAGE_SECRET_ACCESS_KEY, + OBJECT_STORAGE_ENABLED, + OBJECT_STORAGE_BUCKET, + } = serverConfig + + if (OBJECT_STORAGE_ENABLED && !S3) { + S3 = new aws.S3({ + endpoint: OBJECT_STORAGE_ENDPOINT, + accessKeyId: OBJECT_STORAGE_ACCESS_KEY_ID, + secretAccessKey: OBJECT_STORAGE_SECRET_ACCESS_KEY, + s3ForcePathStyle: true, // needed with minio? + signatureVersion: 'v4', + }) + } + + storage = { + healthCheck: async () => { + if (!OBJECT_STORAGE_BUCKET) { + status.error('๐Ÿ˜ข', 'No object storage bucket configured') + return false + } + + try { + await S3.headBucket({ + Bucket: OBJECT_STORAGE_BUCKET, + }).promise() + return true + } catch (error) { + return false + } + }, + } + } catch (e) { + status.warn('๐Ÿ˜ข', `could not initialise storage: ${e}`) + } + + return storage +} diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index 5099254cece..eed2d26b4eb 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -17,6 +17,7 @@ import { Job } from 'node-schedule' import { Pool } from 'pg' import { VM } from 'vm2' +import { ObjectStorage } from './main/services/object_storage' import { DB } from './utils/db/db' import { KafkaProducerWrapper } from './utils/db/kafka-producer-wrapper' import { InternalMetrics } from './utils/internal-metrics' @@ -142,6 +143,12 @@ export interface PluginsServerConfig extends Record { PERSON_INFO_CACHE_TTL: number KAFKA_HEALTHCHECK_SECONDS: number HISTORICAL_EXPORTS_ENABLED: boolean + OBJECT_STORAGE_ENABLED: boolean + OBJECT_STORAGE_ENDPOINT: string + OBJECT_STORAGE_ACCESS_KEY_ID: string + OBJECT_STORAGE_SECRET_ACCESS_KEY: string + OBJECT_STORAGE_SESSION_RECORDING_FOLDER: string + OBJECT_STORAGE_BUCKET: string } export interface Hub extends PluginsServerConfig { @@ -155,6 +162,7 @@ export interface Hub extends PluginsServerConfig { clickhouse?: ClickHouse kafka?: Kafka kafkaProducer?: KafkaProducerWrapper + objectStorage: ObjectStorage // metrics statsd?: StatsD internalMetrics?: InternalMetrics diff --git a/plugin-server/src/utils/db/hub.ts b/plugin-server/src/utils/db/hub.ts index 319cbeefb17..95518414348 100644 --- a/plugin-server/src/utils/db/hub.ts +++ b/plugin-server/src/utils/db/hub.ts @@ -12,6 +12,7 @@ import { ConnectionOptions } from 'tls' import { defaultConfig } from '../../config/config' import { JobQueueManager } from '../../main/job-queues/job-queue-manager' +import { connectObjectStorage, ObjectStorage } from '../../main/services/object_storage' import { Hub, KafkaSecurityProtocol, PluginId, PluginServerCapabilities, PluginsServerConfig } from '../../types' import { ActionManager } from '../../worker/ingestion/action-manager' import { ActionMatcher } from '../../worker/ingestion/action-matcher' @@ -205,6 +206,18 @@ export async function createHub( ) status.info('๐Ÿ‘', `Redis`) + status.info('๐Ÿค”', `Storage`) + const objectStorage: ObjectStorage = connectObjectStorage(serverConfig) + try { + if (serverConfig.OBJECT_STORAGE_ENABLED && (await objectStorage.healthCheck())) { + status.info('๐Ÿ‘', `storage ๐Ÿชฃ`) + } else { + status.info('๐Ÿชฃ', `storage not in use`) + } + } catch (e) { + status.warn('๐Ÿชฃ', `storage failed healthcheck: ${e}`) + } + const db = new DB( postgres, redisPool, diff --git a/posthog/api/instance_status.py b/posthog/api/instance_status.py index 197b4ad3179..02e5b2e5d34 100644 --- a/posthog/api/instance_status.py +++ b/posthog/api/instance_status.py @@ -1,5 +1,6 @@ from typing import Any, Dict, List, Union +from django.conf import settings from django.db import connection from rest_framework import viewsets from rest_framework.decorators import action @@ -11,6 +12,7 @@ from posthog.async_migrations.status import async_migrations_ok from posthog.gitsha import GIT_SHA from posthog.internal_metrics.team import get_internal_metrics_dashboards from posthog.permissions import OrganizationAdminAnyPermissions, SingleTenancyOrAdmin +from posthog.storage import object_storage from posthog.utils import ( dict_from_cursor_fetchall, get_helm_info_env, @@ -129,6 +131,14 @@ class InstanceStatusViewSet(viewsets.ViewSet): {"metric": "Redis metrics", "value": f"Redis connected but then failed to return metrics: {e}"} ) + metrics.append( + {"key": "object_storage", "metric": "Object Storage enabled", "value": settings.OBJECT_STORAGE_ENABLED} + ) + if settings.OBJECT_STORAGE_ENABLED: + metrics.append( + {"key": "object_storage", "metric": "Object Storage healthy", "value": object_storage.health_check()} + ) + return Response({"results": {"overview": metrics, "internal_metrics": get_internal_metrics_dashboards()}}) # Used to capture internal metrics shown on dashboards diff --git a/posthog/api/test/test_instance_status.py b/posthog/api/test/test_instance_status.py index a2b93935976..8f4bb1f5439 100644 --- a/posthog/api/test/test_instance_status.py +++ b/posthog/api/test/test_instance_status.py @@ -22,3 +22,47 @@ class TestInstanceStatus(APIBaseTest): "/api/instance_status/capture", {"method": "timing", "metric": "bar", "value": 15.2, "tags": {"team_id": 1}} ) timing_mock.assert_called_with("bar", 15.2, {"team_id": 1}) + + def test_object_storage_when_disabled(self): + with self.settings(OBJECT_STORAGE_ENABLED=False,): + response = self.client.get("/api/instance_status") + json = response.json() + + object_storage_metrics = [o for o in json["results"]["overview"] if o.get("key", None) == "object_storage"] + self.assertEqual( + object_storage_metrics, [{"key": "object_storage", "metric": "Object Storage enabled", "value": False}] + ) + + @patch("posthog.storage.object_storage.s3_client") + def test_object_storage_when_enabled_but_unhealthy(self, patched_s3_client): + patched_s3_client.head_bucket.return_value = False + + with self.settings(OBJECT_STORAGE_ENABLED=True,): + response = self.client.get("/api/instance_status") + json = response.json() + + object_storage_metrics = [o for o in json["results"]["overview"] if o.get("key", None) == "object_storage"] + self.assertEqual( + object_storage_metrics, + [ + {"key": "object_storage", "metric": "Object Storage enabled", "value": True}, + {"key": "object_storage", "metric": "Object Storage healthy", "value": False}, + ], + ) + + @patch("posthog.storage.object_storage.s3_client") + def test_object_storage_when_enabled_and_healthy(self, patched_s3_client): + patched_s3_client.head_bucket.return_value = True + + with self.settings(OBJECT_STORAGE_ENABLED=True,): + response = self.client.get("/api/instance_status") + json = response.json() + + object_storage_metrics = [o for o in json["results"]["overview"] if o.get("key", None) == "object_storage"] + self.assertEqual( + object_storage_metrics, + [ + {"key": "object_storage", "metric": "Object Storage enabled", "value": True}, + {"key": "object_storage", "metric": "Object Storage healthy", "value": True}, + ], + ) diff --git a/posthog/api/test/test_preflight.py b/posthog/api/test/test_preflight.py index 85ce0e979f5..606fb6a7e3c 100644 --- a/posthog/api/test/test_preflight.py +++ b/posthog/api/test/test_preflight.py @@ -1,4 +1,5 @@ from typing import cast +from unittest.mock import patch import pytest from constance.test import override_config @@ -23,7 +24,7 @@ class TestPreflight(APIBaseTest): For security purposes, the information contained in an unauthenticated preflight request is minimal. """ self.client.logout() - with self.settings(MULTI_TENANCY=False): + with self.settings(MULTI_TENANCY=False, OBJECT_STORAGE_ENABLED=False): response = self.client.get("/_preflight/") self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -44,12 +45,15 @@ class TestPreflight(APIBaseTest): "available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,}, "can_create_org": False, "email_service_available": False, + "object_storage": False, }, ) def test_preflight_request(self): with self.settings( - MULTI_TENANCY=False, INSTANCE_PREFERENCES=self.instance_preferences(debug_queries=True), + MULTI_TENANCY=False, + INSTANCE_PREFERENCES=self.instance_preferences(debug_queries=True), + OBJECT_STORAGE_ENABLED=False, ): response = self.client.get("/_preflight/") self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -80,6 +84,50 @@ class TestPreflight(APIBaseTest): "site_url": "http://localhost:8000", "can_create_org": False, "instance_preferences": {"debug_queries": True, "disable_paid_fs": False,}, + "object_storage": False, + }, + ) + self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones) + + @patch("posthog.storage.object_storage.s3_client") + def test_preflight_request_with_object_storage_available(self, patched_s3_client): + patched_s3_client.head_bucket.return_value = True + + with self.settings( + MULTI_TENANCY=False, + INSTANCE_PREFERENCES=self.instance_preferences(debug_queries=True), + OBJECT_STORAGE_ENABLED=True, + ): + response = self.client.get("/_preflight/") + self.assertEqual(response.status_code, status.HTTP_200_OK) + response = response.json() + available_timezones = cast(dict, response).pop("available_timezones") + + self.assertEqual( + response, + { + "django": True, + "redis": True, + "plugins": True, + "celery": True, + "db": True, + "initiated": True, + "cloud": False, + "demo": False, + "clickhouse": True, + "kafka": True, + "realm": "hosted-clickhouse", + "available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,}, + "opt_out_capture": False, + "posthog_version": VERSION, + "email_service_available": False, + "is_debug": False, + "is_event_property_usage_enabled": True, + "licensed_users_available": None, + "site_url": "http://localhost:8000", + "can_create_org": False, + "instance_preferences": {"debug_queries": True, "disable_paid_fs": False,}, + "object_storage": True, }, ) self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones) @@ -90,7 +138,7 @@ class TestPreflight(APIBaseTest): self.client.logout() # make sure it works anonymously - with self.settings(MULTI_TENANCY=True): + with self.settings(MULTI_TENANCY=True, OBJECT_STORAGE_ENABLED=False): response = self.client.get("/_preflight/") self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -111,12 +159,13 @@ class TestPreflight(APIBaseTest): "available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,}, "can_create_org": True, "email_service_available": True, + "object_storage": False, }, ) @pytest.mark.ee def test_cloud_preflight_request(self): - with self.settings(MULTI_TENANCY=True, SITE_URL="https://app.posthog.com"): + with self.settings(MULTI_TENANCY=True, SITE_URL="https://app.posthog.com", OBJECT_STORAGE_ENABLED=False): response = self.client.get("/_preflight/") self.assertEqual(response.status_code, status.HTTP_200_OK) response = response.json() @@ -146,6 +195,7 @@ class TestPreflight(APIBaseTest): "site_url": "https://app.posthog.com", "can_create_org": True, "instance_preferences": {"debug_queries": False, "disable_paid_fs": False,}, + "object_storage": False, }, ) self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones) @@ -158,6 +208,7 @@ class TestPreflight(APIBaseTest): SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET="test_secret", MULTI_TENANCY=True, INSTANCE_PREFERENCES=self.instance_preferences(disable_paid_fs=True), + OBJECT_STORAGE_ENABLED=False, ): response = self.client.get("/_preflight/") self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -188,6 +239,7 @@ class TestPreflight(APIBaseTest): "site_url": "http://localhost:8000", "can_create_org": True, "instance_preferences": {"debug_queries": False, "disable_paid_fs": True,}, + "object_storage": False, }, ) self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones) @@ -196,7 +248,7 @@ class TestPreflight(APIBaseTest): def test_demo(self): self.client.logout() # make sure it works anonymously - with self.settings(DEMO=True): + with self.settings(DEMO=True, OBJECT_STORAGE_ENABLED=False): response = self.client.get("/_preflight/") self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -217,6 +269,7 @@ class TestPreflight(APIBaseTest): "available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,}, "can_create_org": True, "email_service_available": False, + "object_storage": False, }, ) diff --git a/posthog/settings/__init__.py b/posthog/settings/__init__.py index bfbd20ba87d..f4b2d00cc08 100644 --- a/posthog/settings/__init__.py +++ b/posthog/settings/__init__.py @@ -31,6 +31,7 @@ from posthog.settings.sentry import * from posthog.settings.shell_plus import * from posthog.settings.service_requirements import * from posthog.settings.statsd import * +from posthog.settings.object_storage import * from posthog.settings.web import * diff --git a/posthog/settings/object_storage.py b/posthog/settings/object_storage.py new file mode 100644 index 00000000000..4ae6e60eb51 --- /dev/null +++ b/posthog/settings/object_storage.py @@ -0,0 +1,18 @@ +import os + +from posthog.settings import get_from_env +from posthog.settings.base_variables import DEBUG, TEST +from posthog.utils import str_to_bool + +if TEST or DEBUG: + OBJECT_STORAGE_ENDPOINT = os.getenv("OBJECT_STORAGE_ENDPOINT", "http://localhost:19000") + OBJECT_STORAGE_ACCESS_KEY_ID = os.getenv("OBJECT_STORAGE_ACCESS_KEY_ID", "object_storage_root_user") + OBJECT_STORAGE_SECRET_ACCESS_KEY = os.getenv("OBJECT_STORAGE_SECRET_ACCESS_KEY", "object_storage_root_password") +else: + OBJECT_STORAGE_ENDPOINT = os.getenv("OBJECT_STORAGE_ENDPOINT", "") + OBJECT_STORAGE_ACCESS_KEY_ID = os.getenv("OBJECT_STORAGE_ACCESS_KEY_ID", "") + OBJECT_STORAGE_SECRET_ACCESS_KEY = os.getenv("OBJECT_STORAGE_SECRET_ACCESS_KEY", "") + +OBJECT_STORAGE_ENABLED = get_from_env("OBJECT_STORAGE_ENABLED", True if DEBUG else False, type_cast=str_to_bool) +OBJECT_STORAGE_BUCKET = os.getenv("OBJECT_STORAGE_BUCKET", "posthog") +OBJECT_STORAGE_SESSION_RECORDING_FOLDER = os.getenv("OBJECT_STORAGE_SESSION_RECORDING_FOLDER", "session_recordings") diff --git a/posthog/storage/object_storage.py b/posthog/storage/object_storage.py new file mode 100644 index 00000000000..d08de2269b0 --- /dev/null +++ b/posthog/storage/object_storage.py @@ -0,0 +1,40 @@ +import structlog +from boto3 import client +from botocore.client import Config + +logger = structlog.get_logger(__name__) + +from posthog.settings import ( + OBJECT_STORAGE_ACCESS_KEY_ID, + OBJECT_STORAGE_BUCKET, + OBJECT_STORAGE_ENDPOINT, + OBJECT_STORAGE_SECRET_ACCESS_KEY, +) + +s3_client = None + + +# boto doing some magic and gets confused if this is hinted as BaseClient +# noinspection PyMissingTypeHints +def storage_client(): + global s3_client + if not s3_client: + s3_client = client( + "s3", + endpoint_url=OBJECT_STORAGE_ENDPOINT, + aws_access_key_id=OBJECT_STORAGE_ACCESS_KEY_ID, + aws_secret_access_key=OBJECT_STORAGE_SECRET_ACCESS_KEY, + config=Config(signature_version="s3v4"), + region_name="us-east-1", + ) + return s3_client + + +def health_check() -> bool: + # noinspection PyBroadException + try: + response = storage_client().head_bucket(Bucket=OBJECT_STORAGE_BUCKET) + return bool(response) + except Exception as e: + logger.warn("object_storage.health_check_failed", error=e) + return False diff --git a/posthog/utils.py b/posthog/utils.py index 90acf0df84a..13d102b473b 100644 --- a/posthog/utils.py +++ b/posthog/utils.py @@ -581,6 +581,18 @@ def get_plugin_server_job_queues() -> Optional[List[str]]: return None +def is_object_storage_available() -> bool: + from posthog.storage import object_storage + + try: + if settings.OBJECT_STORAGE_ENABLED: + return object_storage.health_check() + else: + return False + except BaseException: + return False + + def get_redis_info() -> Mapping[str, Any]: return get_client().info() diff --git a/posthog/views.py b/posthog/views.py index 9fab160b1cb..c103d4a11d2 100644 --- a/posthog/views.py +++ b/posthog/views.py @@ -22,6 +22,7 @@ from posthog.utils import ( get_instance_available_sso_providers, get_instance_realm, is_celery_alive, + is_object_storage_available, is_plugin_server_alive, is_postgres_alive, is_redis_alive, @@ -105,6 +106,7 @@ def preflight_check(request: HttpRequest) -> JsonResponse: "available_social_auth_providers": get_instance_available_sso_providers(), "can_create_org": get_can_create_org(request.user), "email_service_available": is_email_available(with_absolute_urls=True), + "object_storage": is_object_storage_available(), } if request.user.is_authenticated: diff --git a/requirements.in b/requirements.in index d9d16daf6a7..7f99a794e85 100644 --- a/requirements.in +++ b/requirements.in @@ -8,6 +8,7 @@ django-rest-hooks@ git+https://github.com/zapier/django-rest-hooks.git@v1.6.0 amqp==2.5.2 asgiref==3.3.2 aioch==0.0.2 +boto3==1.21.29 celery==4.4.2 celery-redbeat==2.0.0 clickhouse-driver==0.2.1 diff --git a/requirements.txt b/requirements.txt index b9707fc49a8..75b042e9626 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile +# This file is autogenerated by pip-compile with python 3.8 # To update, run: # # pip-compile requirements.in @@ -20,12 +20,18 @@ backoff==1.6.0 # via posthoganalytics billiard==3.6.3.0 # via celery -celery-redbeat==2.0.0 +boto3==1.21.29 # via -r requirements.in +botocore==1.24.46 + # via + # boto3 + # s3transfer celery==4.4.2 # via # -r requirements.in # celery-redbeat +celery-redbeat==2.0.0 + # via -r requirements.in certifi==2019.11.28 # via # requests @@ -49,7 +55,7 @@ cssselect==1.1.0 # via toronado cssutils==1.0.2 # via toronado -dataclasses_json==0.5.7 +dataclasses-json==0.5.7 # via -r requirements.in defusedxml==0.6.0 # via @@ -58,6 +64,20 @@ defusedxml==0.6.0 # social-auth-core dj-database-url==0.5.0 # via -r requirements.in +django==3.2.12 + # via + # -r requirements.in + # django-axes + # django-cors-headers + # django-deprecate-fields + # django-extensions + # django-filter + # django-picklefield + # django-redis + # django-rest-hooks + # django-structlog + # djangorestframework + # drf-spectacular django-axes==5.9.0 # via -r requirements.in django-constance==2.8.0 @@ -80,28 +100,12 @@ django-picklefield==3.0.1 # via -r requirements.in django-redis==4.12.1 # via -r requirements.in -git+https://github.com/zapier/django-rest-hooks.git@v1.6.0 +django-rest-hooks @ git+https://github.com/zapier/django-rest-hooks.git@v1.6.0 # via -r requirements.in django-statsd==2.5.2 # via -r requirements.in django-structlog==2.1.3 # via -r requirements.in -django==3.2.12 - # via - # -r requirements.in - # django-axes - # django-cors-headers - # django-deprecate-fields - # django-extensions - # django-filter - # django-picklefield - # django-redis - # django-rest-hooks - # django-structlog - # djangorestframework - # drf-spectacular -djangorestframework-csv==2.1.0 - # via -r requirements.in djangorestframework==3.12.2 # via # -r requirements.in @@ -109,6 +113,8 @@ djangorestframework==3.12.2 # drf-exceptions-hog # drf-extensions # drf-spectacular +djangorestframework-csv==2.1.0 + # via -r requirements.in dnspython==2.2.1 # via -r requirements.in drf-exceptions-hog==0.2.0 @@ -129,7 +135,9 @@ idna==2.8 # requests importlib-metadata==1.6.0 # via -r requirements.in -git+https://github.com/PostHog/infi.clickhouse_orm@de88ffaecda8c36c693cc51909a3598cb9725fde +importlib-resources==5.7.1 + # via jsonschema +infi-clickhouse-orm @ git+https://github.com/PostHog/infi.clickhouse_orm@de88ffaecda8c36c693cc51909a3598cb9725fde # via -r requirements.in inflection==0.5.1 # via drf-spectacular @@ -137,6 +145,10 @@ iso8601==0.1.12 # via infi-clickhouse-orm isodate==0.6.1 # via python3-saml +jmespath==1.0.0 + # via + # boto3 + # botocore jsonschema==4.4.0 # via drf-spectacular kafka-helper==0.2 @@ -154,12 +166,12 @@ lxml==4.7.1 # xmlsec lzstring==1.0.4 # via -r requirements.in -marshmallow-enum==1.5.1 - # via dataclasses-json marshmallow==3.15.0 # via # dataclasses-json # marshmallow-enum +marshmallow-enum==1.5.1 + # via dataclasses-json mimesis==5.2.1 # via -r requirements.in monotonic==1.5 @@ -199,6 +211,7 @@ pyrsistent==0.18.1 python-dateutil==2.8.1 # via # -r requirements.in + # botocore # celery-redbeat # posthoganalytics python-statsd==2.1.0 @@ -222,10 +235,6 @@ redis==3.4.1 # -r requirements.in # celery-redbeat # django-redis -requests-oauthlib==1.3.0 - # via - # -r requirements.in - # social-auth-core requests==2.25.1 # via # -r requirements.in @@ -234,7 +243,13 @@ requests==2.25.1 # posthoganalytics # requests-oauthlib # social-auth-core -semantic_version==2.8.5 +requests-oauthlib==1.3.0 + # via + # -r requirements.in + # social-auth-core +s3transfer==0.5.2 + # via boto3 +semantic-version==2.8.5 # via -r requirements.in sentry-sdk==1.3.1 # via -r requirements.in @@ -275,6 +290,7 @@ uritemplate==4.1.1 # via drf-spectacular urllib3==1.26.5 # via + # botocore # requests # sentry-sdk vine==1.3.0 @@ -286,7 +302,9 @@ whitenoise==5.2.0 xmlsec==1.3.12 # via python3-saml zipp==3.1.0 - # via importlib-metadata + # via + # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: # setuptools