diff --git a/.github/actions/run-backend-tests/action.yml b/.github/actions/run-backend-tests/action.yml
index a7282e855e9..81782f792da 100644
--- a/.github/actions/run-backend-tests/action.yml
+++ b/.github/actions/run-backend-tests/action.yml
@@ -37,7 +37,7 @@ runs:
run: |
export CLICKHOUSE_SERVER_IMAGE=${{ inputs.clickhouse-server-image }}
docker-compose -f docker-compose.dev.yml down
- docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis &
+ docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis object_storage &
- name: Set up Python
uses: actions/setup-python@v2
diff --git a/.github/workflows/ci-backend.yml b/.github/workflows/ci-backend.yml
index eae9799757f..13e80b6c54f 100644
--- a/.github/workflows/ci-backend.yml
+++ b/.github/workflows/ci-backend.yml
@@ -24,6 +24,10 @@ env:
CLICKHOUSE_VERIFY: 'False'
TEST: 1
CLICKHOUSE_SERVER_IMAGE_VERSION: ${{ github.event.inputs.clickhouseServerVersion || '' }}
+ OBJECT_STORAGE_ENABLED: 'True'
+ OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000'
+ OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user'
+ OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password'
jobs:
# Job to decide if we should run backend ci
@@ -293,6 +297,10 @@ jobs:
cp -r messaging deploy/
cat multi_tenancy_settings.py > deploy/posthog/settings/cloud.py
cat requirements.txt >> deploy/requirements.txt
+
+ - name: Add kafka host to /etc/hosts for kafka connectivity
+ run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts
+
- name: Start stack with Docker Compose
run: |
docker-compose -f deploy/docker-compose.dev.yml down
@@ -335,6 +343,11 @@ jobs:
with:
path: 'deploy/'
+ # just while object_storage isn't in master
+ - name: Start stack with object_storage
+ run: |
+ docker-compose -f deploy/docker-compose.dev.yml up -d object_storage
+
- name: Install requirements.txt dependencies with pip at current branch
run: |
cd deploy
@@ -357,9 +370,6 @@ jobs:
python manage.py makemigrations --check --dry-run
python manage.py migrate
- - name: Add kafka host to /etc/hosts for kafka connectivity
- run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts
-
- name: Set up needed files
run: |
cd deploy
diff --git a/.github/workflows/ci-plugin-server.yml b/.github/workflows/ci-plugin-server.yml
index 04a5bbad143..71d940f2576 100644
--- a/.github/workflows/ci-plugin-server.yml
+++ b/.github/workflows/ci-plugin-server.yml
@@ -11,6 +11,14 @@ on:
- 'docker*.yml'
- '*Dockerfile'
+env:
+ OBJECT_STORAGE_ENABLED: true
+ OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000'
+ OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user'
+ OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password'
+ OBJECT_STORAGE_SESSION_RECORDING_FOLDER: 'session_recordings'
+ OBJECT_STORAGE_BUCKET: 'posthog'
+
jobs:
code-quality:
name: Code quality
@@ -73,8 +81,8 @@ jobs:
sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts'
ping -c 1 kafka
ping -c 1 zookeeper
- - name: Start Kafka, ClickHouse, Zookeeper
- run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse
+ - name: Start Kafka, ClickHouse, Zookeeper, Object Storage
+ run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage
- name: Set up Python 3.8.12
uses: actions/setup-python@v2
@@ -154,8 +162,8 @@ jobs:
sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts'
ping -c 1 kafka
ping -c 1 zookeeper
- - name: Start Kafka, ClickHouse, Zookeeper
- run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse
+ - name: Start Kafka, ClickHouse, Zookeeper, Object Storage
+ run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage
- name: Set up Python 3.8.12
uses: actions/setup-python@v2
@@ -236,8 +244,8 @@ jobs:
sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts'
ping -c 1 kafka
ping -c 1 zookeeper
- - name: Start Kafka, ClickHouse, Zookeeper
- run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse
+ - name: Start Kafka, ClickHouse, Zookeeper, Object Storage
+ run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage
- name: Set up Python 3.8.12
uses: actions/setup-python@v2
@@ -318,8 +326,8 @@ jobs:
sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts'
ping -c 1 kafka
ping -c 1 zookeeper
- - name: Start Kafka, ClickHouse, Zookeeper
- run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse
+ - name: Start Kafka, ClickHouse, Zookeeper, Object Storage
+ run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage
- name: Set up Python 3.8.12
uses: actions/setup-python@v2
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index 7577037d776..39cf06a7891 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -18,6 +18,10 @@ env:
SITE_URL: 'test.posthog.net' # used to test password resets
NO_RESTART_LOOP: 1
CLICKHOUSE_SECURE: 0
+ OBJECT_STORAGE_ENABLED: 1
+ OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000'
+ OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user'
+ OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password'
jobs:
# Job that lists and chunks spec file names and caches node modules
@@ -69,7 +73,7 @@ jobs:
- name: Start stack with Docker Compose
run: |
docker-compose -f docker-compose.dev.yml down
- docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis &
+ docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis object_storage &
- name: Add kafka host to /etc/hosts for kafka connectivity
run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts
diff --git a/.run/Plugin Server.run.xml b/.run/Plugin Server.run.xml
index d44fce97d7c..22c24ca814b 100644
--- a/.run/Plugin Server.run.xml
+++ b/.run/Plugin Server.run.xml
@@ -7,12 +7,13 @@
-
+
-
+
+
-
+
\ No newline at end of file
diff --git a/bin/check_kafka_clickhouse_up b/bin/check_kafka_clickhouse_up
index f2c52262203..4217f70e693 100755
--- a/bin/check_kafka_clickhouse_up
+++ b/bin/check_kafka_clickhouse_up
@@ -4,10 +4,10 @@ set -e
# Check Kafka
while true; do
-nc -z localhost 9092 && break || echo "Checking Kafka status..." && sleep 1
+nc -z localhost 9092 && break || echo 'Checking Kafka status...' && sleep 1
done
# Check ClickHouse
while true; do
-curl -s -o /dev/null -I 'http://localhost:8123/' && break || echo "Checking ClickHouse status..." && sleep 1
+curl -s -o /dev/null -I 'http://localhost:8123/' && break || echo 'Checking ClickHouse status...' && sleep 1
done
diff --git a/docker-compose.arm64.yml b/docker-compose.arm64.yml
index 79dbf712f64..837f32eb0e2 100644
--- a/docker-compose.arm64.yml
+++ b/docker-compose.arm64.yml
@@ -78,6 +78,7 @@ services:
- redis
- clickhouse
- kafka
+ - object_storage
web:
<<: *worker
command: '${CH_WEB_SCRIPT:-./ee/bin/docker-ch-dev-web}'
@@ -103,3 +104,17 @@ services:
- redis
- clickhouse
- kafka
+ - object_storage
+
+ object_storage:
+ image: minio/minio
+ ports:
+ - '19000:19000'
+ - '19001:19001'
+ volumes:
+ - ./object_storage:/data
+ environment:
+ MINIO_ROOT_USER: object_storage_root_user
+ MINIO_ROOT_PASSWORD: object_storage_root_password
+ entrypoint: sh
+ command: -c 'mkdir -p /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
index 7967d2a5449..9417607db2e 100644
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -75,6 +75,7 @@ services:
- redis
- clickhouse
- kafka
+ - object_storage
web:
<<: *worker
command: '${CH_WEB_SCRIPT:-./ee/bin/docker-ch-dev-web}'
@@ -100,3 +101,15 @@ services:
- redis
- clickhouse
- kafka
+ - object_storage
+
+ object_storage:
+ image: minio/minio
+ ports:
+ - '19000:19000'
+ - '19001:19001'
+ environment:
+ MINIO_ROOT_USER: object_storage_root_user
+ MINIO_ROOT_PASSWORD: object_storage_root_password
+ entrypoint: sh
+ command: -c 'mkdir -p /data/posthog && chmod u+rxw /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service
diff --git a/docker-compose.hobby.yml b/docker-compose.hobby.yml
index 8b24ac31fab..6e2164bdddb 100644
--- a/docker-compose.hobby.yml
+++ b/docker-compose.hobby.yml
@@ -84,6 +84,7 @@ services:
- redis
- clickhouse
- kafka
+ - object_storage
web:
<<: *worker
command: /compose/start
@@ -117,6 +118,21 @@ services:
- redis
- clickhouse
- kafka
+ - object_storage
+
+ object_storage:
+ image: minio/minio
+ ports:
+ - '19000:19000'
+ - '19001:19001'
+ volumes:
+ - object_storage:/data
+ environment:
+ MINIO_ROOT_USER: object_storage_root_user
+ MINIO_ROOT_PASSWORD: object_storage_root_password
+ entrypoint: sh
+ command: -c 'mkdir -p /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service
+
asyncmigrationscheck:
<<: *worker
command: python manage.py run_async_migrations --check
@@ -127,3 +143,4 @@ volumes:
zookeeper-data:
zookeeper-datalog:
zookeeper-logs:
+ object_storage:
diff --git a/docker-compose.test.yml b/docker-compose.test.yml
index 3499aacd524..ddd70621ba7 100644
--- a/docker-compose.test.yml
+++ b/docker-compose.test.yml
@@ -23,3 +23,4 @@ services:
- redis
- clickhouse
- kafka
+ - object_storage
diff --git a/docker-compose.yml b/docker-compose.yml
index 3a9dbed8a9b..04c4417323e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -54,6 +54,7 @@ services:
- redis
- clickhouse
- kafka
+ - object_storage
environment:
DATABASE_URL: postgres://posthog:posthog@db:5432/posthog
REDIS_URL: redis://redis:6379/
@@ -70,6 +71,20 @@ services:
ports:
- 8000:8000
- 80:8000
+
+ object_storage:
+ image: minio/minio
+ ports:
+ - '19000:19000'
+ - '19001:19001'
+ volumes:
+ - ./object_storage:/data
+ environment:
+ MINIO_ROOT_USER: object_storage_root_user
+ MINIO_ROOT_PASSWORD: object_storage_root_password
+ entrypoint: sh
+ command: -c 'mkdir -p /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service
+
volumes:
postgres-data:
version: '3'
diff --git a/frontend/src/mocks/fixtures/_preflight.json b/frontend/src/mocks/fixtures/_preflight.json
index 3358e875f8a..1a8bf61b9f8 100644
--- a/frontend/src/mocks/fixtures/_preflight.json
+++ b/frontend/src/mocks/fixtures/_preflight.json
@@ -9,6 +9,7 @@
"initiated": true,
"cloud": false,
"demo": false,
+ "object_storage": true,
"realm": "hosted-clickhouse",
"available_social_auth_providers": {
"github": false,
diff --git a/frontend/src/scenes/PreflightCheck/__mocks__/preflight.initial.json b/frontend/src/scenes/PreflightCheck/__mocks__/preflight.initial.json
index 2162cdb0e67..d3bc8cd6f98 100644
--- a/frontend/src/scenes/PreflightCheck/__mocks__/preflight.initial.json
+++ b/frontend/src/scenes/PreflightCheck/__mocks__/preflight.initial.json
@@ -28,6 +28,7 @@
"clickhouse": false,
"kafka": false,
"realm": "hosted",
+ "object_storage": true,
"available_social_auth_providers": {
"github": false,
"gitlab": false,
diff --git a/frontend/src/scenes/PreflightCheck/preflightLogic.test.ts b/frontend/src/scenes/PreflightCheck/preflightLogic.test.ts
index 7e5d81edde1..c5bbbabb680 100644
--- a/frontend/src/scenes/PreflightCheck/preflightLogic.test.ts
+++ b/frontend/src/scenes/PreflightCheck/preflightLogic.test.ts
@@ -86,6 +86,11 @@ describe('preflightLogic', () => {
status: 'warning',
caption: 'Set up before ingesting real user data',
},
+ {
+ id: 'object_storage',
+ name: 'Object Storage',
+ status: 'validated',
+ },
],
})
})
@@ -144,6 +149,11 @@ describe('preflightLogic', () => {
status: 'optional',
caption: 'Not required for experimentation mode',
},
+ {
+ id: 'object_storage',
+ name: 'Object Storage',
+ status: 'validated',
+ },
],
})
})
@@ -156,7 +166,7 @@ describe('preflightLogic', () => {
.toDispatchActions(['loadPreflightSuccess'])
.toMatchValues({
checksSummary: {
- summaryString: '6 successful, 1 warning, 2 errors',
+ summaryString: '7 successful, 1 warning, 2 errors',
summaryStatus: 'error',
},
})
@@ -169,7 +179,7 @@ describe('preflightLogic', () => {
.toDispatchActions(['loadPreflightSuccess'])
.toMatchValues({
checksSummary: {
- summaryString: '6 successful, 1 warning, 1 error, 1 optional',
+ summaryString: '7 successful, 1 warning, 1 error, 1 optional',
summaryStatus: 'error',
},
})
diff --git a/frontend/src/scenes/PreflightCheck/preflightLogic.tsx b/frontend/src/scenes/PreflightCheck/preflightLogic.tsx
index 23d67cbabc7..dccc3c20723 100644
--- a/frontend/src/scenes/PreflightCheck/preflightLogic.tsx
+++ b/frontend/src/scenes/PreflightCheck/preflightLogic.tsx
@@ -69,7 +69,7 @@ export const preflightLogic = kea<
checks: [
(s) => [s.preflight, s.preflightMode],
(preflight, preflightMode) => {
- return [
+ const preflightItems = [
{
id: 'database',
name: 'Application database ยท Postgres',
@@ -132,7 +132,21 @@ export const preflightLogic = kea<
? 'Not required for experimentation mode'
: 'Set up before ingesting real user data',
},
- ] as PreflightItemInterface[]
+ ]
+
+ if (preflight?.object_storage || preflight?.is_debug) {
+ /** __for now__, only prompt debug users if object storage is unhealthy **/
+ preflightItems.push({
+ id: 'object_storage',
+ name: 'Object Storage',
+ status: preflight?.object_storage ? 'validated' : 'warning',
+ caption: preflight?.object_storage
+ ? undefined
+ : 'Some features will not work without object storage',
+ })
+ }
+
+ return preflightItems as PreflightItemInterface[]
},
],
checksSummary: [
diff --git a/frontend/src/types.ts b/frontend/src/types.ts
index 72ff5a6d9ad..de5d2bdb84d 100644
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@@ -1391,6 +1391,7 @@ export interface PreflightStatus {
licensed_users_available?: number | null
site_url?: string
instance_preferences?: InstancePreferencesInterface
+ object_storage: boolean
}
export enum ItemMode { // todo: consolidate this and dashboardmode
diff --git a/package.json b/package.json
index a499b646e80..4733c7273c3 100644
--- a/package.json
+++ b/package.json
@@ -52,9 +52,9 @@
"arm64:ch-dev:start": "concurrently -n DOCKER,ESBUILD,TYPEGEN -c red,blue,green \"docker-compose -f docker-compose.arm64.yml pull && CH_WEB_SCRIPT=./ee/bin/docker-ch-dev-backend docker-compose -f docker-compose.arm64.yml up\" \"yarn run start-http --host 0.0.0.0\" \"yarn run typegen:watch\"",
"arm64:ch-dev:clear": "docker compose -f docker-compose.arm64.yml stop && docker compose -f docker-compose.arm64.yml rm -v && docker compose -f docker-compose.arm64.yml down",
"arm64:services": "yarn arm64:services:stop && yarn arm64:services:clean && yarn arm64:services:start",
- "arm64:services:start": "docker-compose -f docker-compose.arm64.yml up zookeeper kafka clickhouse",
+ "arm64:services:start": "docker-compose -f docker-compose.arm64.yml up zookeeper kafka clickhouse object_storage",
"arm64:services:stop": "docker-compose -f docker-compose.arm64.yml down",
- "arm64:services:clean": "docker-compose -f docker-compose.arm64.yml rm -v zookeeper kafka clickhouse",
+ "arm64:services:clean": "docker-compose -f docker-compose.arm64.yml rm -v zookeeper kafka clickhouse object_storage",
"dev:migrate:postgres": "export DEBUG=1 && source env/bin/activate && python manage.py migrate",
"dev:migrate:clickhouse": "export DEBUG=1 && source env/bin/activate && python manage.py migrate_clickhouse",
"prepare": "husky install"
diff --git a/plugin-server/package.json b/plugin-server/package.json
index 85381e1d07d..2883c16f6cd 100644
--- a/plugin-server/package.json
+++ b/plugin-server/package.json
@@ -32,9 +32,9 @@
"prepublishOnly": "yarn build",
"setup:dev:clickhouse": "cd .. && DEBUG=1 python manage.py migrate_clickhouse",
"setup:test": "cd .. && TEST=1 python manage.py setup_test_environment",
- "services:start": "cd .. && docker-compose -f docker-compose.dev.yml up zookeeper kafka clickhouse",
+ "services:start": "cd .. && docker-compose -f docker-compose.dev.yml up zookeeper kafka clickhouse object_storage",
"services:stop": "cd .. && docker-compose -f docker-compose.dev.yml down",
- "services:clean": "cd .. && docker-compose -f docker-compose.dev.yml rm -v zookeeper kafka clickhouse",
+ "services:clean": "cd .. && docker-compose -f docker-compose.dev.yml rm -v zookeeper kafka clickhouse object_storage",
"services": "yarn services:stop && yarn services:clean && yarn services:start"
},
"bin": {
diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts
index c6d64da37de..e311e84edb2 100644
--- a/plugin-server/src/config/config.ts
+++ b/plugin-server/src/config/config.ts
@@ -95,6 +95,12 @@ export function getDefaultConfig(): PluginsServerConfig {
PERSON_INFO_CACHE_TTL: 5 * 60, // 5 min
KAFKA_HEALTHCHECK_SECONDS: 20,
HISTORICAL_EXPORTS_ENABLED: true,
+ OBJECT_STORAGE_ENABLED: false,
+ OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000',
+ OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user',
+ OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password',
+ OBJECT_STORAGE_SESSION_RECORDING_FOLDER: 'session_recordings',
+ OBJECT_STORAGE_BUCKET: 'posthog',
}
}
@@ -169,6 +175,14 @@ export function getConfigHelp(): Record {
CLICKHOUSE_DISABLE_EXTERNAL_SCHEMAS_TEAMS:
'(advanced) a comma separated list of teams to disable clickhouse external schemas for',
CLICKHOUSE_JSON_EVENTS_KAFKA_TOPIC: '(advanced) topic to send events to for clickhouse ingestion',
+ OBJECT_STORAGE_ENABLED:
+ 'Disables or enables the use of object storage. It will become mandatory to use object storage',
+ OBJECT_STORAGE_ENDPOINT: 'minio endpoint',
+ OBJECT_STORAGE_ACCESS_KEY_ID: 'access key for minio',
+ OBJECT_STORAGE_SECRET_ACCESS_KEY: 'secret key for minio',
+ OBJECT_STORAGE_SESSION_RECORDING_FOLDER:
+ 'the top level folder for storing session recordings inside the storage bucket',
+ OBJECT_STORAGE_BUCKET: 'the object storage bucket name',
}
}
diff --git a/plugin-server/src/main/services/object_storage.ts b/plugin-server/src/main/services/object_storage.ts
new file mode 100644
index 00000000000..40515b06f9b
--- /dev/null
+++ b/plugin-server/src/main/services/object_storage.ts
@@ -0,0 +1,59 @@
+import { PluginsServerConfig } from '../../types'
+import { status } from '../../utils/status'
+
+const aws = require('aws-sdk')
+
+let S3: typeof aws.S3 | null = null
+
+export interface ObjectStorage {
+ healthCheck: () => Promise
+}
+
+export const connectObjectStorage = (serverConfig: Partial): ObjectStorage => {
+ let storage = {
+ healthCheck: async () => {
+ return Promise.resolve(false)
+ },
+ }
+ try {
+ const {
+ OBJECT_STORAGE_ENDPOINT,
+ OBJECT_STORAGE_ACCESS_KEY_ID,
+ OBJECT_STORAGE_SECRET_ACCESS_KEY,
+ OBJECT_STORAGE_ENABLED,
+ OBJECT_STORAGE_BUCKET,
+ } = serverConfig
+
+ if (OBJECT_STORAGE_ENABLED && !S3) {
+ S3 = new aws.S3({
+ endpoint: OBJECT_STORAGE_ENDPOINT,
+ accessKeyId: OBJECT_STORAGE_ACCESS_KEY_ID,
+ secretAccessKey: OBJECT_STORAGE_SECRET_ACCESS_KEY,
+ s3ForcePathStyle: true, // needed with minio?
+ signatureVersion: 'v4',
+ })
+ }
+
+ storage = {
+ healthCheck: async () => {
+ if (!OBJECT_STORAGE_BUCKET) {
+ status.error('๐ข', 'No object storage bucket configured')
+ return false
+ }
+
+ try {
+ await S3.headBucket({
+ Bucket: OBJECT_STORAGE_BUCKET,
+ }).promise()
+ return true
+ } catch (error) {
+ return false
+ }
+ },
+ }
+ } catch (e) {
+ status.warn('๐ข', `could not initialise storage: ${e}`)
+ }
+
+ return storage
+}
diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts
index 5099254cece..eed2d26b4eb 100644
--- a/plugin-server/src/types.ts
+++ b/plugin-server/src/types.ts
@@ -17,6 +17,7 @@ import { Job } from 'node-schedule'
import { Pool } from 'pg'
import { VM } from 'vm2'
+import { ObjectStorage } from './main/services/object_storage'
import { DB } from './utils/db/db'
import { KafkaProducerWrapper } from './utils/db/kafka-producer-wrapper'
import { InternalMetrics } from './utils/internal-metrics'
@@ -142,6 +143,12 @@ export interface PluginsServerConfig extends Record {
PERSON_INFO_CACHE_TTL: number
KAFKA_HEALTHCHECK_SECONDS: number
HISTORICAL_EXPORTS_ENABLED: boolean
+ OBJECT_STORAGE_ENABLED: boolean
+ OBJECT_STORAGE_ENDPOINT: string
+ OBJECT_STORAGE_ACCESS_KEY_ID: string
+ OBJECT_STORAGE_SECRET_ACCESS_KEY: string
+ OBJECT_STORAGE_SESSION_RECORDING_FOLDER: string
+ OBJECT_STORAGE_BUCKET: string
}
export interface Hub extends PluginsServerConfig {
@@ -155,6 +162,7 @@ export interface Hub extends PluginsServerConfig {
clickhouse?: ClickHouse
kafka?: Kafka
kafkaProducer?: KafkaProducerWrapper
+ objectStorage: ObjectStorage
// metrics
statsd?: StatsD
internalMetrics?: InternalMetrics
diff --git a/plugin-server/src/utils/db/hub.ts b/plugin-server/src/utils/db/hub.ts
index 319cbeefb17..95518414348 100644
--- a/plugin-server/src/utils/db/hub.ts
+++ b/plugin-server/src/utils/db/hub.ts
@@ -12,6 +12,7 @@ import { ConnectionOptions } from 'tls'
import { defaultConfig } from '../../config/config'
import { JobQueueManager } from '../../main/job-queues/job-queue-manager'
+import { connectObjectStorage, ObjectStorage } from '../../main/services/object_storage'
import { Hub, KafkaSecurityProtocol, PluginId, PluginServerCapabilities, PluginsServerConfig } from '../../types'
import { ActionManager } from '../../worker/ingestion/action-manager'
import { ActionMatcher } from '../../worker/ingestion/action-matcher'
@@ -205,6 +206,18 @@ export async function createHub(
)
status.info('๐', `Redis`)
+ status.info('๐ค', `Storage`)
+ const objectStorage: ObjectStorage = connectObjectStorage(serverConfig)
+ try {
+ if (serverConfig.OBJECT_STORAGE_ENABLED && (await objectStorage.healthCheck())) {
+ status.info('๐', `storage ๐ชฃ`)
+ } else {
+ status.info('๐ชฃ', `storage not in use`)
+ }
+ } catch (e) {
+ status.warn('๐ชฃ', `storage failed healthcheck: ${e}`)
+ }
+
const db = new DB(
postgres,
redisPool,
diff --git a/posthog/api/instance_status.py b/posthog/api/instance_status.py
index 197b4ad3179..02e5b2e5d34 100644
--- a/posthog/api/instance_status.py
+++ b/posthog/api/instance_status.py
@@ -1,5 +1,6 @@
from typing import Any, Dict, List, Union
+from django.conf import settings
from django.db import connection
from rest_framework import viewsets
from rest_framework.decorators import action
@@ -11,6 +12,7 @@ from posthog.async_migrations.status import async_migrations_ok
from posthog.gitsha import GIT_SHA
from posthog.internal_metrics.team import get_internal_metrics_dashboards
from posthog.permissions import OrganizationAdminAnyPermissions, SingleTenancyOrAdmin
+from posthog.storage import object_storage
from posthog.utils import (
dict_from_cursor_fetchall,
get_helm_info_env,
@@ -129,6 +131,14 @@ class InstanceStatusViewSet(viewsets.ViewSet):
{"metric": "Redis metrics", "value": f"Redis connected but then failed to return metrics: {e}"}
)
+ metrics.append(
+ {"key": "object_storage", "metric": "Object Storage enabled", "value": settings.OBJECT_STORAGE_ENABLED}
+ )
+ if settings.OBJECT_STORAGE_ENABLED:
+ metrics.append(
+ {"key": "object_storage", "metric": "Object Storage healthy", "value": object_storage.health_check()}
+ )
+
return Response({"results": {"overview": metrics, "internal_metrics": get_internal_metrics_dashboards()}})
# Used to capture internal metrics shown on dashboards
diff --git a/posthog/api/test/test_instance_status.py b/posthog/api/test/test_instance_status.py
index a2b93935976..8f4bb1f5439 100644
--- a/posthog/api/test/test_instance_status.py
+++ b/posthog/api/test/test_instance_status.py
@@ -22,3 +22,47 @@ class TestInstanceStatus(APIBaseTest):
"/api/instance_status/capture", {"method": "timing", "metric": "bar", "value": 15.2, "tags": {"team_id": 1}}
)
timing_mock.assert_called_with("bar", 15.2, {"team_id": 1})
+
+ def test_object_storage_when_disabled(self):
+ with self.settings(OBJECT_STORAGE_ENABLED=False,):
+ response = self.client.get("/api/instance_status")
+ json = response.json()
+
+ object_storage_metrics = [o for o in json["results"]["overview"] if o.get("key", None) == "object_storage"]
+ self.assertEqual(
+ object_storage_metrics, [{"key": "object_storage", "metric": "Object Storage enabled", "value": False}]
+ )
+
+ @patch("posthog.storage.object_storage.s3_client")
+ def test_object_storage_when_enabled_but_unhealthy(self, patched_s3_client):
+ patched_s3_client.head_bucket.return_value = False
+
+ with self.settings(OBJECT_STORAGE_ENABLED=True,):
+ response = self.client.get("/api/instance_status")
+ json = response.json()
+
+ object_storage_metrics = [o for o in json["results"]["overview"] if o.get("key", None) == "object_storage"]
+ self.assertEqual(
+ object_storage_metrics,
+ [
+ {"key": "object_storage", "metric": "Object Storage enabled", "value": True},
+ {"key": "object_storage", "metric": "Object Storage healthy", "value": False},
+ ],
+ )
+
+ @patch("posthog.storage.object_storage.s3_client")
+ def test_object_storage_when_enabled_and_healthy(self, patched_s3_client):
+ patched_s3_client.head_bucket.return_value = True
+
+ with self.settings(OBJECT_STORAGE_ENABLED=True,):
+ response = self.client.get("/api/instance_status")
+ json = response.json()
+
+ object_storage_metrics = [o for o in json["results"]["overview"] if o.get("key", None) == "object_storage"]
+ self.assertEqual(
+ object_storage_metrics,
+ [
+ {"key": "object_storage", "metric": "Object Storage enabled", "value": True},
+ {"key": "object_storage", "metric": "Object Storage healthy", "value": True},
+ ],
+ )
diff --git a/posthog/api/test/test_preflight.py b/posthog/api/test/test_preflight.py
index 85ce0e979f5..606fb6a7e3c 100644
--- a/posthog/api/test/test_preflight.py
+++ b/posthog/api/test/test_preflight.py
@@ -1,4 +1,5 @@
from typing import cast
+from unittest.mock import patch
import pytest
from constance.test import override_config
@@ -23,7 +24,7 @@ class TestPreflight(APIBaseTest):
For security purposes, the information contained in an unauthenticated preflight request is minimal.
"""
self.client.logout()
- with self.settings(MULTI_TENANCY=False):
+ with self.settings(MULTI_TENANCY=False, OBJECT_STORAGE_ENABLED=False):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@@ -44,12 +45,15 @@ class TestPreflight(APIBaseTest):
"available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,},
"can_create_org": False,
"email_service_available": False,
+ "object_storage": False,
},
)
def test_preflight_request(self):
with self.settings(
- MULTI_TENANCY=False, INSTANCE_PREFERENCES=self.instance_preferences(debug_queries=True),
+ MULTI_TENANCY=False,
+ INSTANCE_PREFERENCES=self.instance_preferences(debug_queries=True),
+ OBJECT_STORAGE_ENABLED=False,
):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@@ -80,6 +84,50 @@ class TestPreflight(APIBaseTest):
"site_url": "http://localhost:8000",
"can_create_org": False,
"instance_preferences": {"debug_queries": True, "disable_paid_fs": False,},
+ "object_storage": False,
+ },
+ )
+ self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones)
+
+ @patch("posthog.storage.object_storage.s3_client")
+ def test_preflight_request_with_object_storage_available(self, patched_s3_client):
+ patched_s3_client.head_bucket.return_value = True
+
+ with self.settings(
+ MULTI_TENANCY=False,
+ INSTANCE_PREFERENCES=self.instance_preferences(debug_queries=True),
+ OBJECT_STORAGE_ENABLED=True,
+ ):
+ response = self.client.get("/_preflight/")
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ response = response.json()
+ available_timezones = cast(dict, response).pop("available_timezones")
+
+ self.assertEqual(
+ response,
+ {
+ "django": True,
+ "redis": True,
+ "plugins": True,
+ "celery": True,
+ "db": True,
+ "initiated": True,
+ "cloud": False,
+ "demo": False,
+ "clickhouse": True,
+ "kafka": True,
+ "realm": "hosted-clickhouse",
+ "available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,},
+ "opt_out_capture": False,
+ "posthog_version": VERSION,
+ "email_service_available": False,
+ "is_debug": False,
+ "is_event_property_usage_enabled": True,
+ "licensed_users_available": None,
+ "site_url": "http://localhost:8000",
+ "can_create_org": False,
+ "instance_preferences": {"debug_queries": True, "disable_paid_fs": False,},
+ "object_storage": True,
},
)
self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones)
@@ -90,7 +138,7 @@ class TestPreflight(APIBaseTest):
self.client.logout() # make sure it works anonymously
- with self.settings(MULTI_TENANCY=True):
+ with self.settings(MULTI_TENANCY=True, OBJECT_STORAGE_ENABLED=False):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@@ -111,12 +159,13 @@ class TestPreflight(APIBaseTest):
"available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,},
"can_create_org": True,
"email_service_available": True,
+ "object_storage": False,
},
)
@pytest.mark.ee
def test_cloud_preflight_request(self):
- with self.settings(MULTI_TENANCY=True, SITE_URL="https://app.posthog.com"):
+ with self.settings(MULTI_TENANCY=True, SITE_URL="https://app.posthog.com", OBJECT_STORAGE_ENABLED=False):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
response = response.json()
@@ -146,6 +195,7 @@ class TestPreflight(APIBaseTest):
"site_url": "https://app.posthog.com",
"can_create_org": True,
"instance_preferences": {"debug_queries": False, "disable_paid_fs": False,},
+ "object_storage": False,
},
)
self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones)
@@ -158,6 +208,7 @@ class TestPreflight(APIBaseTest):
SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET="test_secret",
MULTI_TENANCY=True,
INSTANCE_PREFERENCES=self.instance_preferences(disable_paid_fs=True),
+ OBJECT_STORAGE_ENABLED=False,
):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@@ -188,6 +239,7 @@ class TestPreflight(APIBaseTest):
"site_url": "http://localhost:8000",
"can_create_org": True,
"instance_preferences": {"debug_queries": False, "disable_paid_fs": True,},
+ "object_storage": False,
},
)
self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones)
@@ -196,7 +248,7 @@ class TestPreflight(APIBaseTest):
def test_demo(self):
self.client.logout() # make sure it works anonymously
- with self.settings(DEMO=True):
+ with self.settings(DEMO=True, OBJECT_STORAGE_ENABLED=False):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@@ -217,6 +269,7 @@ class TestPreflight(APIBaseTest):
"available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,},
"can_create_org": True,
"email_service_available": False,
+ "object_storage": False,
},
)
diff --git a/posthog/settings/__init__.py b/posthog/settings/__init__.py
index bfbd20ba87d..f4b2d00cc08 100644
--- a/posthog/settings/__init__.py
+++ b/posthog/settings/__init__.py
@@ -31,6 +31,7 @@ from posthog.settings.sentry import *
from posthog.settings.shell_plus import *
from posthog.settings.service_requirements import *
from posthog.settings.statsd import *
+from posthog.settings.object_storage import *
from posthog.settings.web import *
diff --git a/posthog/settings/object_storage.py b/posthog/settings/object_storage.py
new file mode 100644
index 00000000000..4ae6e60eb51
--- /dev/null
+++ b/posthog/settings/object_storage.py
@@ -0,0 +1,18 @@
+import os
+
+from posthog.settings import get_from_env
+from posthog.settings.base_variables import DEBUG, TEST
+from posthog.utils import str_to_bool
+
+if TEST or DEBUG:
+ OBJECT_STORAGE_ENDPOINT = os.getenv("OBJECT_STORAGE_ENDPOINT", "http://localhost:19000")
+ OBJECT_STORAGE_ACCESS_KEY_ID = os.getenv("OBJECT_STORAGE_ACCESS_KEY_ID", "object_storage_root_user")
+ OBJECT_STORAGE_SECRET_ACCESS_KEY = os.getenv("OBJECT_STORAGE_SECRET_ACCESS_KEY", "object_storage_root_password")
+else:
+ OBJECT_STORAGE_ENDPOINT = os.getenv("OBJECT_STORAGE_ENDPOINT", "")
+ OBJECT_STORAGE_ACCESS_KEY_ID = os.getenv("OBJECT_STORAGE_ACCESS_KEY_ID", "")
+ OBJECT_STORAGE_SECRET_ACCESS_KEY = os.getenv("OBJECT_STORAGE_SECRET_ACCESS_KEY", "")
+
+OBJECT_STORAGE_ENABLED = get_from_env("OBJECT_STORAGE_ENABLED", True if DEBUG else False, type_cast=str_to_bool)
+OBJECT_STORAGE_BUCKET = os.getenv("OBJECT_STORAGE_BUCKET", "posthog")
+OBJECT_STORAGE_SESSION_RECORDING_FOLDER = os.getenv("OBJECT_STORAGE_SESSION_RECORDING_FOLDER", "session_recordings")
diff --git a/posthog/storage/object_storage.py b/posthog/storage/object_storage.py
new file mode 100644
index 00000000000..d08de2269b0
--- /dev/null
+++ b/posthog/storage/object_storage.py
@@ -0,0 +1,40 @@
+import structlog
+from boto3 import client
+from botocore.client import Config
+
+logger = structlog.get_logger(__name__)
+
+from posthog.settings import (
+ OBJECT_STORAGE_ACCESS_KEY_ID,
+ OBJECT_STORAGE_BUCKET,
+ OBJECT_STORAGE_ENDPOINT,
+ OBJECT_STORAGE_SECRET_ACCESS_KEY,
+)
+
+s3_client = None
+
+
+# boto doing some magic and gets confused if this is hinted as BaseClient
+# noinspection PyMissingTypeHints
+def storage_client():
+ global s3_client
+ if not s3_client:
+ s3_client = client(
+ "s3",
+ endpoint_url=OBJECT_STORAGE_ENDPOINT,
+ aws_access_key_id=OBJECT_STORAGE_ACCESS_KEY_ID,
+ aws_secret_access_key=OBJECT_STORAGE_SECRET_ACCESS_KEY,
+ config=Config(signature_version="s3v4"),
+ region_name="us-east-1",
+ )
+ return s3_client
+
+
+def health_check() -> bool:
+ # noinspection PyBroadException
+ try:
+ response = storage_client().head_bucket(Bucket=OBJECT_STORAGE_BUCKET)
+ return bool(response)
+ except Exception as e:
+ logger.warn("object_storage.health_check_failed", error=e)
+ return False
diff --git a/posthog/utils.py b/posthog/utils.py
index 90acf0df84a..13d102b473b 100644
--- a/posthog/utils.py
+++ b/posthog/utils.py
@@ -581,6 +581,18 @@ def get_plugin_server_job_queues() -> Optional[List[str]]:
return None
+def is_object_storage_available() -> bool:
+ from posthog.storage import object_storage
+
+ try:
+ if settings.OBJECT_STORAGE_ENABLED:
+ return object_storage.health_check()
+ else:
+ return False
+ except BaseException:
+ return False
+
+
def get_redis_info() -> Mapping[str, Any]:
return get_client().info()
diff --git a/posthog/views.py b/posthog/views.py
index 9fab160b1cb..c103d4a11d2 100644
--- a/posthog/views.py
+++ b/posthog/views.py
@@ -22,6 +22,7 @@ from posthog.utils import (
get_instance_available_sso_providers,
get_instance_realm,
is_celery_alive,
+ is_object_storage_available,
is_plugin_server_alive,
is_postgres_alive,
is_redis_alive,
@@ -105,6 +106,7 @@ def preflight_check(request: HttpRequest) -> JsonResponse:
"available_social_auth_providers": get_instance_available_sso_providers(),
"can_create_org": get_can_create_org(request.user),
"email_service_available": is_email_available(with_absolute_urls=True),
+ "object_storage": is_object_storage_available(),
}
if request.user.is_authenticated:
diff --git a/requirements.in b/requirements.in
index d9d16daf6a7..7f99a794e85 100644
--- a/requirements.in
+++ b/requirements.in
@@ -8,6 +8,7 @@ django-rest-hooks@ git+https://github.com/zapier/django-rest-hooks.git@v1.6.0
amqp==2.5.2
asgiref==3.3.2
aioch==0.0.2
+boto3==1.21.29
celery==4.4.2
celery-redbeat==2.0.0
clickhouse-driver==0.2.1
diff --git a/requirements.txt b/requirements.txt
index b9707fc49a8..75b042e9626 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
#
-# This file is autogenerated by pip-compile
+# This file is autogenerated by pip-compile with python 3.8
# To update, run:
#
# pip-compile requirements.in
@@ -20,12 +20,18 @@ backoff==1.6.0
# via posthoganalytics
billiard==3.6.3.0
# via celery
-celery-redbeat==2.0.0
+boto3==1.21.29
# via -r requirements.in
+botocore==1.24.46
+ # via
+ # boto3
+ # s3transfer
celery==4.4.2
# via
# -r requirements.in
# celery-redbeat
+celery-redbeat==2.0.0
+ # via -r requirements.in
certifi==2019.11.28
# via
# requests
@@ -49,7 +55,7 @@ cssselect==1.1.0
# via toronado
cssutils==1.0.2
# via toronado
-dataclasses_json==0.5.7
+dataclasses-json==0.5.7
# via -r requirements.in
defusedxml==0.6.0
# via
@@ -58,6 +64,20 @@ defusedxml==0.6.0
# social-auth-core
dj-database-url==0.5.0
# via -r requirements.in
+django==3.2.12
+ # via
+ # -r requirements.in
+ # django-axes
+ # django-cors-headers
+ # django-deprecate-fields
+ # django-extensions
+ # django-filter
+ # django-picklefield
+ # django-redis
+ # django-rest-hooks
+ # django-structlog
+ # djangorestframework
+ # drf-spectacular
django-axes==5.9.0
# via -r requirements.in
django-constance==2.8.0
@@ -80,28 +100,12 @@ django-picklefield==3.0.1
# via -r requirements.in
django-redis==4.12.1
# via -r requirements.in
-git+https://github.com/zapier/django-rest-hooks.git@v1.6.0
+django-rest-hooks @ git+https://github.com/zapier/django-rest-hooks.git@v1.6.0
# via -r requirements.in
django-statsd==2.5.2
# via -r requirements.in
django-structlog==2.1.3
# via -r requirements.in
-django==3.2.12
- # via
- # -r requirements.in
- # django-axes
- # django-cors-headers
- # django-deprecate-fields
- # django-extensions
- # django-filter
- # django-picklefield
- # django-redis
- # django-rest-hooks
- # django-structlog
- # djangorestframework
- # drf-spectacular
-djangorestframework-csv==2.1.0
- # via -r requirements.in
djangorestframework==3.12.2
# via
# -r requirements.in
@@ -109,6 +113,8 @@ djangorestframework==3.12.2
# drf-exceptions-hog
# drf-extensions
# drf-spectacular
+djangorestframework-csv==2.1.0
+ # via -r requirements.in
dnspython==2.2.1
# via -r requirements.in
drf-exceptions-hog==0.2.0
@@ -129,7 +135,9 @@ idna==2.8
# requests
importlib-metadata==1.6.0
# via -r requirements.in
-git+https://github.com/PostHog/infi.clickhouse_orm@de88ffaecda8c36c693cc51909a3598cb9725fde
+importlib-resources==5.7.1
+ # via jsonschema
+infi-clickhouse-orm @ git+https://github.com/PostHog/infi.clickhouse_orm@de88ffaecda8c36c693cc51909a3598cb9725fde
# via -r requirements.in
inflection==0.5.1
# via drf-spectacular
@@ -137,6 +145,10 @@ iso8601==0.1.12
# via infi-clickhouse-orm
isodate==0.6.1
# via python3-saml
+jmespath==1.0.0
+ # via
+ # boto3
+ # botocore
jsonschema==4.4.0
# via drf-spectacular
kafka-helper==0.2
@@ -154,12 +166,12 @@ lxml==4.7.1
# xmlsec
lzstring==1.0.4
# via -r requirements.in
-marshmallow-enum==1.5.1
- # via dataclasses-json
marshmallow==3.15.0
# via
# dataclasses-json
# marshmallow-enum
+marshmallow-enum==1.5.1
+ # via dataclasses-json
mimesis==5.2.1
# via -r requirements.in
monotonic==1.5
@@ -199,6 +211,7 @@ pyrsistent==0.18.1
python-dateutil==2.8.1
# via
# -r requirements.in
+ # botocore
# celery-redbeat
# posthoganalytics
python-statsd==2.1.0
@@ -222,10 +235,6 @@ redis==3.4.1
# -r requirements.in
# celery-redbeat
# django-redis
-requests-oauthlib==1.3.0
- # via
- # -r requirements.in
- # social-auth-core
requests==2.25.1
# via
# -r requirements.in
@@ -234,7 +243,13 @@ requests==2.25.1
# posthoganalytics
# requests-oauthlib
# social-auth-core
-semantic_version==2.8.5
+requests-oauthlib==1.3.0
+ # via
+ # -r requirements.in
+ # social-auth-core
+s3transfer==0.5.2
+ # via boto3
+semantic-version==2.8.5
# via -r requirements.in
sentry-sdk==1.3.1
# via -r requirements.in
@@ -275,6 +290,7 @@ uritemplate==4.1.1
# via drf-spectacular
urllib3==1.26.5
# via
+ # botocore
# requests
# sentry-sdk
vine==1.3.0
@@ -286,7 +302,9 @@ whitenoise==5.2.0
xmlsec==1.3.12
# via python3-saml
zipp==3.1.0
- # via importlib-metadata
+ # via
+ # importlib-metadata
+ # importlib-resources
# The following packages are considered to be unsafe in a requirements file:
# setuptools