0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-21 13:39:22 +01:00

feat(object storage): add unused object storage (#9846)

* feat(object_storage): add unused object storage with health checks

* only prompt debug users if object storage not available at preflight

* safe plugin server health check for unused object storage

* explicit object storage settings

* explicit object storage settings

* explicit object storage settings

* downgrade pip tools

* without spaces?

* like this?

* without updating pip?

* remove object_storage from dev volumes

* named volume on hobby

* lazily init object storage

* simplify conditional check

* reproduced error locally

* reproduced error locally

* object_storage_endpoint not host and port

* log more when checking kafka and clickhouse

* don't filter docker output

* add kafka to hosts before starting stack?

* silly cloud tests (not my brain)
This commit is contained in:
Paul D'Ambra 2022-05-20 09:56:50 +01:00 committed by GitHub
parent 3c2f9aa9b5
commit 49e3ceef5c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 464 additions and 60 deletions

View File

@ -37,7 +37,7 @@ runs:
run: |
export CLICKHOUSE_SERVER_IMAGE=${{ inputs.clickhouse-server-image }}
docker-compose -f docker-compose.dev.yml down
docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis &
docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis object_storage &
- name: Set up Python
uses: actions/setup-python@v2

View File

@ -24,6 +24,10 @@ env:
CLICKHOUSE_VERIFY: 'False'
TEST: 1
CLICKHOUSE_SERVER_IMAGE_VERSION: ${{ github.event.inputs.clickhouseServerVersion || '' }}
OBJECT_STORAGE_ENABLED: 'True'
OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000'
OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user'
OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password'
jobs:
# Job to decide if we should run backend ci
@ -293,6 +297,10 @@ jobs:
cp -r messaging deploy/
cat multi_tenancy_settings.py > deploy/posthog/settings/cloud.py
cat requirements.txt >> deploy/requirements.txt
- name: Add kafka host to /etc/hosts for kafka connectivity
run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts
- name: Start stack with Docker Compose
run: |
docker-compose -f deploy/docker-compose.dev.yml down
@ -335,6 +343,11 @@ jobs:
with:
path: 'deploy/'
# just while object_storage isn't in master
- name: Start stack with object_storage
run: |
docker-compose -f deploy/docker-compose.dev.yml up -d object_storage
- name: Install requirements.txt dependencies with pip at current branch
run: |
cd deploy
@ -357,9 +370,6 @@ jobs:
python manage.py makemigrations --check --dry-run
python manage.py migrate
- name: Add kafka host to /etc/hosts for kafka connectivity
run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts
- name: Set up needed files
run: |
cd deploy

View File

@ -11,6 +11,14 @@ on:
- 'docker*.yml'
- '*Dockerfile'
env:
OBJECT_STORAGE_ENABLED: true
OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000'
OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user'
OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password'
OBJECT_STORAGE_SESSION_RECORDING_FOLDER: 'session_recordings'
OBJECT_STORAGE_BUCKET: 'posthog'
jobs:
code-quality:
name: Code quality
@ -73,8 +81,8 @@ jobs:
sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts'
ping -c 1 kafka
ping -c 1 zookeeper
- name: Start Kafka, ClickHouse, Zookeeper
run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse
- name: Start Kafka, ClickHouse, Zookeeper, Object Storage
run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage
- name: Set up Python 3.8.12
uses: actions/setup-python@v2
@ -154,8 +162,8 @@ jobs:
sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts'
ping -c 1 kafka
ping -c 1 zookeeper
- name: Start Kafka, ClickHouse, Zookeeper
run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse
- name: Start Kafka, ClickHouse, Zookeeper, Object Storage
run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage
- name: Set up Python 3.8.12
uses: actions/setup-python@v2
@ -236,8 +244,8 @@ jobs:
sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts'
ping -c 1 kafka
ping -c 1 zookeeper
- name: Start Kafka, ClickHouse, Zookeeper
run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse
- name: Start Kafka, ClickHouse, Zookeeper, Object Storage
run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage
- name: Set up Python 3.8.12
uses: actions/setup-python@v2
@ -318,8 +326,8 @@ jobs:
sudo bash -c 'echo "127.0.0.1 kafka zookeeper" >> /etc/hosts'
ping -c 1 kafka
ping -c 1 zookeeper
- name: Start Kafka, ClickHouse, Zookeeper
run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse
- name: Start Kafka, ClickHouse, Zookeeper, Object Storage
run: docker-compose -f docker-compose.dev.yml up -d zookeeper kafka clickhouse object_storage
- name: Set up Python 3.8.12
uses: actions/setup-python@v2

View File

@ -18,6 +18,10 @@ env:
SITE_URL: 'test.posthog.net' # used to test password resets
NO_RESTART_LOOP: 1
CLICKHOUSE_SECURE: 0
OBJECT_STORAGE_ENABLED: 1
OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000'
OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user'
OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password'
jobs:
# Job that lists and chunks spec file names and caches node modules
@ -69,7 +73,7 @@ jobs:
- name: Start stack with Docker Compose
run: |
docker-compose -f docker-compose.dev.yml down
docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis &
docker-compose -f docker-compose.dev.yml up -d db clickhouse zookeeper kafka redis object_storage &
- name: Add kafka host to /etc/hosts for kafka connectivity
run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts

View File

@ -7,12 +7,13 @@
</scripts>
<node-interpreter value="project" />
<envs>
<env name="WORKER_CONCURRENCY" value="2" />
<env name="CLICKHOUSE_SECURE" value="False" />
<env name="DATABASE_URL" value="postgres://posthog:posthog@localhost:5432/posthog" />
<env name="KAFKA_ENABLED" value="true" />
<env name="CLICKHOUSE_SECURE" value="False" />
<env name="KAFKA_HOSTS" value="localhost:9092" />
<env name="WORKER_CONCURRENCY" value="2" />
<env name="OBJECT_STORAGE_ENABLED" value="True" />
</envs>
<method v="2" />
</configuration>
</component>
</component>

View File

@ -4,10 +4,10 @@ set -e
# Check Kafka
while true; do
nc -z localhost 9092 && break || echo "Checking Kafka status..." && sleep 1
nc -z localhost 9092 && break || echo 'Checking Kafka status...' && sleep 1
done
# Check ClickHouse
while true; do
curl -s -o /dev/null -I 'http://localhost:8123/' && break || echo "Checking ClickHouse status..." && sleep 1
curl -s -o /dev/null -I 'http://localhost:8123/' && break || echo 'Checking ClickHouse status...' && sleep 1
done

View File

@ -78,6 +78,7 @@ services:
- redis
- clickhouse
- kafka
- object_storage
web:
<<: *worker
command: '${CH_WEB_SCRIPT:-./ee/bin/docker-ch-dev-web}'
@ -103,3 +104,17 @@ services:
- redis
- clickhouse
- kafka
- object_storage
object_storage:
image: minio/minio
ports:
- '19000:19000'
- '19001:19001'
volumes:
- ./object_storage:/data
environment:
MINIO_ROOT_USER: object_storage_root_user
MINIO_ROOT_PASSWORD: object_storage_root_password
entrypoint: sh
command: -c 'mkdir -p /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service

View File

@ -75,6 +75,7 @@ services:
- redis
- clickhouse
- kafka
- object_storage
web:
<<: *worker
command: '${CH_WEB_SCRIPT:-./ee/bin/docker-ch-dev-web}'
@ -100,3 +101,15 @@ services:
- redis
- clickhouse
- kafka
- object_storage
object_storage:
image: minio/minio
ports:
- '19000:19000'
- '19001:19001'
environment:
MINIO_ROOT_USER: object_storage_root_user
MINIO_ROOT_PASSWORD: object_storage_root_password
entrypoint: sh
command: -c 'mkdir -p /data/posthog && chmod u+rxw /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service

View File

@ -84,6 +84,7 @@ services:
- redis
- clickhouse
- kafka
- object_storage
web:
<<: *worker
command: /compose/start
@ -117,6 +118,21 @@ services:
- redis
- clickhouse
- kafka
- object_storage
object_storage:
image: minio/minio
ports:
- '19000:19000'
- '19001:19001'
volumes:
- object_storage:/data
environment:
MINIO_ROOT_USER: object_storage_root_user
MINIO_ROOT_PASSWORD: object_storage_root_password
entrypoint: sh
command: -c 'mkdir -p /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service
asyncmigrationscheck:
<<: *worker
command: python manage.py run_async_migrations --check
@ -127,3 +143,4 @@ volumes:
zookeeper-data:
zookeeper-datalog:
zookeeper-logs:
object_storage:

View File

@ -23,3 +23,4 @@ services:
- redis
- clickhouse
- kafka
- object_storage

View File

@ -54,6 +54,7 @@ services:
- redis
- clickhouse
- kafka
- object_storage
environment:
DATABASE_URL: postgres://posthog:posthog@db:5432/posthog
REDIS_URL: redis://redis:6379/
@ -70,6 +71,20 @@ services:
ports:
- 8000:8000
- 80:8000
object_storage:
image: minio/minio
ports:
- '19000:19000'
- '19001:19001'
volumes:
- ./object_storage:/data
environment:
MINIO_ROOT_USER: object_storage_root_user
MINIO_ROOT_PASSWORD: object_storage_root_password
entrypoint: sh
command: -c 'mkdir -p /data/posthog && minio server --address ":19000" --console-address ":19001" /data' # create the 'posthog' bucket before starting the service
volumes:
postgres-data:
version: '3'

View File

@ -9,6 +9,7 @@
"initiated": true,
"cloud": false,
"demo": false,
"object_storage": true,
"realm": "hosted-clickhouse",
"available_social_auth_providers": {
"github": false,

View File

@ -28,6 +28,7 @@
"clickhouse": false,
"kafka": false,
"realm": "hosted",
"object_storage": true,
"available_social_auth_providers": {
"github": false,
"gitlab": false,

View File

@ -86,6 +86,11 @@ describe('preflightLogic', () => {
status: 'warning',
caption: 'Set up before ingesting real user data',
},
{
id: 'object_storage',
name: 'Object Storage',
status: 'validated',
},
],
})
})
@ -144,6 +149,11 @@ describe('preflightLogic', () => {
status: 'optional',
caption: 'Not required for experimentation mode',
},
{
id: 'object_storage',
name: 'Object Storage',
status: 'validated',
},
],
})
})
@ -156,7 +166,7 @@ describe('preflightLogic', () => {
.toDispatchActions(['loadPreflightSuccess'])
.toMatchValues({
checksSummary: {
summaryString: '6 successful, 1 warning, 2 errors',
summaryString: '7 successful, 1 warning, 2 errors',
summaryStatus: 'error',
},
})
@ -169,7 +179,7 @@ describe('preflightLogic', () => {
.toDispatchActions(['loadPreflightSuccess'])
.toMatchValues({
checksSummary: {
summaryString: '6 successful, 1 warning, 1 error, 1 optional',
summaryString: '7 successful, 1 warning, 1 error, 1 optional',
summaryStatus: 'error',
},
})

View File

@ -69,7 +69,7 @@ export const preflightLogic = kea<
checks: [
(s) => [s.preflight, s.preflightMode],
(preflight, preflightMode) => {
return [
const preflightItems = [
{
id: 'database',
name: 'Application database · Postgres',
@ -132,7 +132,21 @@ export const preflightLogic = kea<
? 'Not required for experimentation mode'
: 'Set up before ingesting real user data',
},
] as PreflightItemInterface[]
]
if (preflight?.object_storage || preflight?.is_debug) {
/** __for now__, only prompt debug users if object storage is unhealthy **/
preflightItems.push({
id: 'object_storage',
name: 'Object Storage',
status: preflight?.object_storage ? 'validated' : 'warning',
caption: preflight?.object_storage
? undefined
: 'Some features will not work without object storage',
})
}
return preflightItems as PreflightItemInterface[]
},
],
checksSummary: [

View File

@ -1391,6 +1391,7 @@ export interface PreflightStatus {
licensed_users_available?: number | null
site_url?: string
instance_preferences?: InstancePreferencesInterface
object_storage: boolean
}
export enum ItemMode { // todo: consolidate this and dashboardmode

View File

@ -52,9 +52,9 @@
"arm64:ch-dev:start": "concurrently -n DOCKER,ESBUILD,TYPEGEN -c red,blue,green \"docker-compose -f docker-compose.arm64.yml pull && CH_WEB_SCRIPT=./ee/bin/docker-ch-dev-backend docker-compose -f docker-compose.arm64.yml up\" \"yarn run start-http --host 0.0.0.0\" \"yarn run typegen:watch\"",
"arm64:ch-dev:clear": "docker compose -f docker-compose.arm64.yml stop && docker compose -f docker-compose.arm64.yml rm -v && docker compose -f docker-compose.arm64.yml down",
"arm64:services": "yarn arm64:services:stop && yarn arm64:services:clean && yarn arm64:services:start",
"arm64:services:start": "docker-compose -f docker-compose.arm64.yml up zookeeper kafka clickhouse",
"arm64:services:start": "docker-compose -f docker-compose.arm64.yml up zookeeper kafka clickhouse object_storage",
"arm64:services:stop": "docker-compose -f docker-compose.arm64.yml down",
"arm64:services:clean": "docker-compose -f docker-compose.arm64.yml rm -v zookeeper kafka clickhouse",
"arm64:services:clean": "docker-compose -f docker-compose.arm64.yml rm -v zookeeper kafka clickhouse object_storage",
"dev:migrate:postgres": "export DEBUG=1 && source env/bin/activate && python manage.py migrate",
"dev:migrate:clickhouse": "export DEBUG=1 && source env/bin/activate && python manage.py migrate_clickhouse",
"prepare": "husky install"

View File

@ -32,9 +32,9 @@
"prepublishOnly": "yarn build",
"setup:dev:clickhouse": "cd .. && DEBUG=1 python manage.py migrate_clickhouse",
"setup:test": "cd .. && TEST=1 python manage.py setup_test_environment",
"services:start": "cd .. && docker-compose -f docker-compose.dev.yml up zookeeper kafka clickhouse",
"services:start": "cd .. && docker-compose -f docker-compose.dev.yml up zookeeper kafka clickhouse object_storage",
"services:stop": "cd .. && docker-compose -f docker-compose.dev.yml down",
"services:clean": "cd .. && docker-compose -f docker-compose.dev.yml rm -v zookeeper kafka clickhouse",
"services:clean": "cd .. && docker-compose -f docker-compose.dev.yml rm -v zookeeper kafka clickhouse object_storage",
"services": "yarn services:stop && yarn services:clean && yarn services:start"
},
"bin": {

View File

@ -95,6 +95,12 @@ export function getDefaultConfig(): PluginsServerConfig {
PERSON_INFO_CACHE_TTL: 5 * 60, // 5 min
KAFKA_HEALTHCHECK_SECONDS: 20,
HISTORICAL_EXPORTS_ENABLED: true,
OBJECT_STORAGE_ENABLED: false,
OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000',
OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user',
OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password',
OBJECT_STORAGE_SESSION_RECORDING_FOLDER: 'session_recordings',
OBJECT_STORAGE_BUCKET: 'posthog',
}
}
@ -169,6 +175,14 @@ export function getConfigHelp(): Record<keyof PluginsServerConfig, string> {
CLICKHOUSE_DISABLE_EXTERNAL_SCHEMAS_TEAMS:
'(advanced) a comma separated list of teams to disable clickhouse external schemas for',
CLICKHOUSE_JSON_EVENTS_KAFKA_TOPIC: '(advanced) topic to send events to for clickhouse ingestion',
OBJECT_STORAGE_ENABLED:
'Disables or enables the use of object storage. It will become mandatory to use object storage',
OBJECT_STORAGE_ENDPOINT: 'minio endpoint',
OBJECT_STORAGE_ACCESS_KEY_ID: 'access key for minio',
OBJECT_STORAGE_SECRET_ACCESS_KEY: 'secret key for minio',
OBJECT_STORAGE_SESSION_RECORDING_FOLDER:
'the top level folder for storing session recordings inside the storage bucket',
OBJECT_STORAGE_BUCKET: 'the object storage bucket name',
}
}

View File

@ -0,0 +1,59 @@
import { PluginsServerConfig } from '../../types'
import { status } from '../../utils/status'
const aws = require('aws-sdk')
let S3: typeof aws.S3 | null = null
export interface ObjectStorage {
healthCheck: () => Promise<boolean>
}
export const connectObjectStorage = (serverConfig: Partial<PluginsServerConfig>): ObjectStorage => {
let storage = {
healthCheck: async () => {
return Promise.resolve(false)
},
}
try {
const {
OBJECT_STORAGE_ENDPOINT,
OBJECT_STORAGE_ACCESS_KEY_ID,
OBJECT_STORAGE_SECRET_ACCESS_KEY,
OBJECT_STORAGE_ENABLED,
OBJECT_STORAGE_BUCKET,
} = serverConfig
if (OBJECT_STORAGE_ENABLED && !S3) {
S3 = new aws.S3({
endpoint: OBJECT_STORAGE_ENDPOINT,
accessKeyId: OBJECT_STORAGE_ACCESS_KEY_ID,
secretAccessKey: OBJECT_STORAGE_SECRET_ACCESS_KEY,
s3ForcePathStyle: true, // needed with minio?
signatureVersion: 'v4',
})
}
storage = {
healthCheck: async () => {
if (!OBJECT_STORAGE_BUCKET) {
status.error('😢', 'No object storage bucket configured')
return false
}
try {
await S3.headBucket({
Bucket: OBJECT_STORAGE_BUCKET,
}).promise()
return true
} catch (error) {
return false
}
},
}
} catch (e) {
status.warn('😢', `could not initialise storage: ${e}`)
}
return storage
}

View File

@ -17,6 +17,7 @@ import { Job } from 'node-schedule'
import { Pool } from 'pg'
import { VM } from 'vm2'
import { ObjectStorage } from './main/services/object_storage'
import { DB } from './utils/db/db'
import { KafkaProducerWrapper } from './utils/db/kafka-producer-wrapper'
import { InternalMetrics } from './utils/internal-metrics'
@ -142,6 +143,12 @@ export interface PluginsServerConfig extends Record<string, any> {
PERSON_INFO_CACHE_TTL: number
KAFKA_HEALTHCHECK_SECONDS: number
HISTORICAL_EXPORTS_ENABLED: boolean
OBJECT_STORAGE_ENABLED: boolean
OBJECT_STORAGE_ENDPOINT: string
OBJECT_STORAGE_ACCESS_KEY_ID: string
OBJECT_STORAGE_SECRET_ACCESS_KEY: string
OBJECT_STORAGE_SESSION_RECORDING_FOLDER: string
OBJECT_STORAGE_BUCKET: string
}
export interface Hub extends PluginsServerConfig {
@ -155,6 +162,7 @@ export interface Hub extends PluginsServerConfig {
clickhouse?: ClickHouse
kafka?: Kafka
kafkaProducer?: KafkaProducerWrapper
objectStorage: ObjectStorage
// metrics
statsd?: StatsD
internalMetrics?: InternalMetrics

View File

@ -12,6 +12,7 @@ import { ConnectionOptions } from 'tls'
import { defaultConfig } from '../../config/config'
import { JobQueueManager } from '../../main/job-queues/job-queue-manager'
import { connectObjectStorage, ObjectStorage } from '../../main/services/object_storage'
import { Hub, KafkaSecurityProtocol, PluginId, PluginServerCapabilities, PluginsServerConfig } from '../../types'
import { ActionManager } from '../../worker/ingestion/action-manager'
import { ActionMatcher } from '../../worker/ingestion/action-matcher'
@ -205,6 +206,18 @@ export async function createHub(
)
status.info('👍', `Redis`)
status.info('🤔', `Storage`)
const objectStorage: ObjectStorage = connectObjectStorage(serverConfig)
try {
if (serverConfig.OBJECT_STORAGE_ENABLED && (await objectStorage.healthCheck())) {
status.info('👍', `storage 🪣`)
} else {
status.info('🪣', `storage not in use`)
}
} catch (e) {
status.warn('🪣', `storage failed healthcheck: ${e}`)
}
const db = new DB(
postgres,
redisPool,

View File

@ -1,5 +1,6 @@
from typing import Any, Dict, List, Union
from django.conf import settings
from django.db import connection
from rest_framework import viewsets
from rest_framework.decorators import action
@ -11,6 +12,7 @@ from posthog.async_migrations.status import async_migrations_ok
from posthog.gitsha import GIT_SHA
from posthog.internal_metrics.team import get_internal_metrics_dashboards
from posthog.permissions import OrganizationAdminAnyPermissions, SingleTenancyOrAdmin
from posthog.storage import object_storage
from posthog.utils import (
dict_from_cursor_fetchall,
get_helm_info_env,
@ -129,6 +131,14 @@ class InstanceStatusViewSet(viewsets.ViewSet):
{"metric": "Redis metrics", "value": f"Redis connected but then failed to return metrics: {e}"}
)
metrics.append(
{"key": "object_storage", "metric": "Object Storage enabled", "value": settings.OBJECT_STORAGE_ENABLED}
)
if settings.OBJECT_STORAGE_ENABLED:
metrics.append(
{"key": "object_storage", "metric": "Object Storage healthy", "value": object_storage.health_check()}
)
return Response({"results": {"overview": metrics, "internal_metrics": get_internal_metrics_dashboards()}})
# Used to capture internal metrics shown on dashboards

View File

@ -22,3 +22,47 @@ class TestInstanceStatus(APIBaseTest):
"/api/instance_status/capture", {"method": "timing", "metric": "bar", "value": 15.2, "tags": {"team_id": 1}}
)
timing_mock.assert_called_with("bar", 15.2, {"team_id": 1})
def test_object_storage_when_disabled(self):
with self.settings(OBJECT_STORAGE_ENABLED=False,):
response = self.client.get("/api/instance_status")
json = response.json()
object_storage_metrics = [o for o in json["results"]["overview"] if o.get("key", None) == "object_storage"]
self.assertEqual(
object_storage_metrics, [{"key": "object_storage", "metric": "Object Storage enabled", "value": False}]
)
@patch("posthog.storage.object_storage.s3_client")
def test_object_storage_when_enabled_but_unhealthy(self, patched_s3_client):
patched_s3_client.head_bucket.return_value = False
with self.settings(OBJECT_STORAGE_ENABLED=True,):
response = self.client.get("/api/instance_status")
json = response.json()
object_storage_metrics = [o for o in json["results"]["overview"] if o.get("key", None) == "object_storage"]
self.assertEqual(
object_storage_metrics,
[
{"key": "object_storage", "metric": "Object Storage enabled", "value": True},
{"key": "object_storage", "metric": "Object Storage healthy", "value": False},
],
)
@patch("posthog.storage.object_storage.s3_client")
def test_object_storage_when_enabled_and_healthy(self, patched_s3_client):
patched_s3_client.head_bucket.return_value = True
with self.settings(OBJECT_STORAGE_ENABLED=True,):
response = self.client.get("/api/instance_status")
json = response.json()
object_storage_metrics = [o for o in json["results"]["overview"] if o.get("key", None) == "object_storage"]
self.assertEqual(
object_storage_metrics,
[
{"key": "object_storage", "metric": "Object Storage enabled", "value": True},
{"key": "object_storage", "metric": "Object Storage healthy", "value": True},
],
)

View File

@ -1,4 +1,5 @@
from typing import cast
from unittest.mock import patch
import pytest
from constance.test import override_config
@ -23,7 +24,7 @@ class TestPreflight(APIBaseTest):
For security purposes, the information contained in an unauthenticated preflight request is minimal.
"""
self.client.logout()
with self.settings(MULTI_TENANCY=False):
with self.settings(MULTI_TENANCY=False, OBJECT_STORAGE_ENABLED=False):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@ -44,12 +45,15 @@ class TestPreflight(APIBaseTest):
"available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,},
"can_create_org": False,
"email_service_available": False,
"object_storage": False,
},
)
def test_preflight_request(self):
with self.settings(
MULTI_TENANCY=False, INSTANCE_PREFERENCES=self.instance_preferences(debug_queries=True),
MULTI_TENANCY=False,
INSTANCE_PREFERENCES=self.instance_preferences(debug_queries=True),
OBJECT_STORAGE_ENABLED=False,
):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@ -80,6 +84,50 @@ class TestPreflight(APIBaseTest):
"site_url": "http://localhost:8000",
"can_create_org": False,
"instance_preferences": {"debug_queries": True, "disable_paid_fs": False,},
"object_storage": False,
},
)
self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones)
@patch("posthog.storage.object_storage.s3_client")
def test_preflight_request_with_object_storage_available(self, patched_s3_client):
patched_s3_client.head_bucket.return_value = True
with self.settings(
MULTI_TENANCY=False,
INSTANCE_PREFERENCES=self.instance_preferences(debug_queries=True),
OBJECT_STORAGE_ENABLED=True,
):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
response = response.json()
available_timezones = cast(dict, response).pop("available_timezones")
self.assertEqual(
response,
{
"django": True,
"redis": True,
"plugins": True,
"celery": True,
"db": True,
"initiated": True,
"cloud": False,
"demo": False,
"clickhouse": True,
"kafka": True,
"realm": "hosted-clickhouse",
"available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,},
"opt_out_capture": False,
"posthog_version": VERSION,
"email_service_available": False,
"is_debug": False,
"is_event_property_usage_enabled": True,
"licensed_users_available": None,
"site_url": "http://localhost:8000",
"can_create_org": False,
"instance_preferences": {"debug_queries": True, "disable_paid_fs": False,},
"object_storage": True,
},
)
self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones)
@ -90,7 +138,7 @@ class TestPreflight(APIBaseTest):
self.client.logout() # make sure it works anonymously
with self.settings(MULTI_TENANCY=True):
with self.settings(MULTI_TENANCY=True, OBJECT_STORAGE_ENABLED=False):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@ -111,12 +159,13 @@ class TestPreflight(APIBaseTest):
"available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,},
"can_create_org": True,
"email_service_available": True,
"object_storage": False,
},
)
@pytest.mark.ee
def test_cloud_preflight_request(self):
with self.settings(MULTI_TENANCY=True, SITE_URL="https://app.posthog.com"):
with self.settings(MULTI_TENANCY=True, SITE_URL="https://app.posthog.com", OBJECT_STORAGE_ENABLED=False):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
response = response.json()
@ -146,6 +195,7 @@ class TestPreflight(APIBaseTest):
"site_url": "https://app.posthog.com",
"can_create_org": True,
"instance_preferences": {"debug_queries": False, "disable_paid_fs": False,},
"object_storage": False,
},
)
self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones)
@ -158,6 +208,7 @@ class TestPreflight(APIBaseTest):
SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET="test_secret",
MULTI_TENANCY=True,
INSTANCE_PREFERENCES=self.instance_preferences(disable_paid_fs=True),
OBJECT_STORAGE_ENABLED=False,
):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@ -188,6 +239,7 @@ class TestPreflight(APIBaseTest):
"site_url": "http://localhost:8000",
"can_create_org": True,
"instance_preferences": {"debug_queries": False, "disable_paid_fs": True,},
"object_storage": False,
},
)
self.assertDictContainsSubset({"Europe/Moscow": 3, "UTC": 0}, available_timezones)
@ -196,7 +248,7 @@ class TestPreflight(APIBaseTest):
def test_demo(self):
self.client.logout() # make sure it works anonymously
with self.settings(DEMO=True):
with self.settings(DEMO=True, OBJECT_STORAGE_ENABLED=False):
response = self.client.get("/_preflight/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@ -217,6 +269,7 @@ class TestPreflight(APIBaseTest):
"available_social_auth_providers": {"google-oauth2": False, "github": False, "gitlab": False,},
"can_create_org": True,
"email_service_available": False,
"object_storage": False,
},
)

View File

@ -31,6 +31,7 @@ from posthog.settings.sentry import *
from posthog.settings.shell_plus import *
from posthog.settings.service_requirements import *
from posthog.settings.statsd import *
from posthog.settings.object_storage import *
from posthog.settings.web import *

View File

@ -0,0 +1,18 @@
import os
from posthog.settings import get_from_env
from posthog.settings.base_variables import DEBUG, TEST
from posthog.utils import str_to_bool
if TEST or DEBUG:
OBJECT_STORAGE_ENDPOINT = os.getenv("OBJECT_STORAGE_ENDPOINT", "http://localhost:19000")
OBJECT_STORAGE_ACCESS_KEY_ID = os.getenv("OBJECT_STORAGE_ACCESS_KEY_ID", "object_storage_root_user")
OBJECT_STORAGE_SECRET_ACCESS_KEY = os.getenv("OBJECT_STORAGE_SECRET_ACCESS_KEY", "object_storage_root_password")
else:
OBJECT_STORAGE_ENDPOINT = os.getenv("OBJECT_STORAGE_ENDPOINT", "")
OBJECT_STORAGE_ACCESS_KEY_ID = os.getenv("OBJECT_STORAGE_ACCESS_KEY_ID", "")
OBJECT_STORAGE_SECRET_ACCESS_KEY = os.getenv("OBJECT_STORAGE_SECRET_ACCESS_KEY", "")
OBJECT_STORAGE_ENABLED = get_from_env("OBJECT_STORAGE_ENABLED", True if DEBUG else False, type_cast=str_to_bool)
OBJECT_STORAGE_BUCKET = os.getenv("OBJECT_STORAGE_BUCKET", "posthog")
OBJECT_STORAGE_SESSION_RECORDING_FOLDER = os.getenv("OBJECT_STORAGE_SESSION_RECORDING_FOLDER", "session_recordings")

View File

@ -0,0 +1,40 @@
import structlog
from boto3 import client
from botocore.client import Config
logger = structlog.get_logger(__name__)
from posthog.settings import (
OBJECT_STORAGE_ACCESS_KEY_ID,
OBJECT_STORAGE_BUCKET,
OBJECT_STORAGE_ENDPOINT,
OBJECT_STORAGE_SECRET_ACCESS_KEY,
)
s3_client = None
# boto doing some magic and gets confused if this is hinted as BaseClient
# noinspection PyMissingTypeHints
def storage_client():
global s3_client
if not s3_client:
s3_client = client(
"s3",
endpoint_url=OBJECT_STORAGE_ENDPOINT,
aws_access_key_id=OBJECT_STORAGE_ACCESS_KEY_ID,
aws_secret_access_key=OBJECT_STORAGE_SECRET_ACCESS_KEY,
config=Config(signature_version="s3v4"),
region_name="us-east-1",
)
return s3_client
def health_check() -> bool:
# noinspection PyBroadException
try:
response = storage_client().head_bucket(Bucket=OBJECT_STORAGE_BUCKET)
return bool(response)
except Exception as e:
logger.warn("object_storage.health_check_failed", error=e)
return False

View File

@ -581,6 +581,18 @@ def get_plugin_server_job_queues() -> Optional[List[str]]:
return None
def is_object_storage_available() -> bool:
from posthog.storage import object_storage
try:
if settings.OBJECT_STORAGE_ENABLED:
return object_storage.health_check()
else:
return False
except BaseException:
return False
def get_redis_info() -> Mapping[str, Any]:
return get_client().info()

View File

@ -22,6 +22,7 @@ from posthog.utils import (
get_instance_available_sso_providers,
get_instance_realm,
is_celery_alive,
is_object_storage_available,
is_plugin_server_alive,
is_postgres_alive,
is_redis_alive,
@ -105,6 +106,7 @@ def preflight_check(request: HttpRequest) -> JsonResponse:
"available_social_auth_providers": get_instance_available_sso_providers(),
"can_create_org": get_can_create_org(request.user),
"email_service_available": is_email_available(with_absolute_urls=True),
"object_storage": is_object_storage_available(),
}
if request.user.is_authenticated:

View File

@ -8,6 +8,7 @@ django-rest-hooks@ git+https://github.com/zapier/django-rest-hooks.git@v1.6.0
amqp==2.5.2
asgiref==3.3.2
aioch==0.0.2
boto3==1.21.29
celery==4.4.2
celery-redbeat==2.0.0
clickhouse-driver==0.2.1

View File

@ -1,5 +1,5 @@
#
# This file is autogenerated by pip-compile
# This file is autogenerated by pip-compile with python 3.8
# To update, run:
#
# pip-compile requirements.in
@ -20,12 +20,18 @@ backoff==1.6.0
# via posthoganalytics
billiard==3.6.3.0
# via celery
celery-redbeat==2.0.0
boto3==1.21.29
# via -r requirements.in
botocore==1.24.46
# via
# boto3
# s3transfer
celery==4.4.2
# via
# -r requirements.in
# celery-redbeat
celery-redbeat==2.0.0
# via -r requirements.in
certifi==2019.11.28
# via
# requests
@ -49,7 +55,7 @@ cssselect==1.1.0
# via toronado
cssutils==1.0.2
# via toronado
dataclasses_json==0.5.7
dataclasses-json==0.5.7
# via -r requirements.in
defusedxml==0.6.0
# via
@ -58,6 +64,20 @@ defusedxml==0.6.0
# social-auth-core
dj-database-url==0.5.0
# via -r requirements.in
django==3.2.12
# via
# -r requirements.in
# django-axes
# django-cors-headers
# django-deprecate-fields
# django-extensions
# django-filter
# django-picklefield
# django-redis
# django-rest-hooks
# django-structlog
# djangorestframework
# drf-spectacular
django-axes==5.9.0
# via -r requirements.in
django-constance==2.8.0
@ -80,28 +100,12 @@ django-picklefield==3.0.1
# via -r requirements.in
django-redis==4.12.1
# via -r requirements.in
git+https://github.com/zapier/django-rest-hooks.git@v1.6.0
django-rest-hooks @ git+https://github.com/zapier/django-rest-hooks.git@v1.6.0
# via -r requirements.in
django-statsd==2.5.2
# via -r requirements.in
django-structlog==2.1.3
# via -r requirements.in
django==3.2.12
# via
# -r requirements.in
# django-axes
# django-cors-headers
# django-deprecate-fields
# django-extensions
# django-filter
# django-picklefield
# django-redis
# django-rest-hooks
# django-structlog
# djangorestframework
# drf-spectacular
djangorestframework-csv==2.1.0
# via -r requirements.in
djangorestframework==3.12.2
# via
# -r requirements.in
@ -109,6 +113,8 @@ djangorestframework==3.12.2
# drf-exceptions-hog
# drf-extensions
# drf-spectacular
djangorestframework-csv==2.1.0
# via -r requirements.in
dnspython==2.2.1
# via -r requirements.in
drf-exceptions-hog==0.2.0
@ -129,7 +135,9 @@ idna==2.8
# requests
importlib-metadata==1.6.0
# via -r requirements.in
git+https://github.com/PostHog/infi.clickhouse_orm@de88ffaecda8c36c693cc51909a3598cb9725fde
importlib-resources==5.7.1
# via jsonschema
infi-clickhouse-orm @ git+https://github.com/PostHog/infi.clickhouse_orm@de88ffaecda8c36c693cc51909a3598cb9725fde
# via -r requirements.in
inflection==0.5.1
# via drf-spectacular
@ -137,6 +145,10 @@ iso8601==0.1.12
# via infi-clickhouse-orm
isodate==0.6.1
# via python3-saml
jmespath==1.0.0
# via
# boto3
# botocore
jsonschema==4.4.0
# via drf-spectacular
kafka-helper==0.2
@ -154,12 +166,12 @@ lxml==4.7.1
# xmlsec
lzstring==1.0.4
# via -r requirements.in
marshmallow-enum==1.5.1
# via dataclasses-json
marshmallow==3.15.0
# via
# dataclasses-json
# marshmallow-enum
marshmallow-enum==1.5.1
# via dataclasses-json
mimesis==5.2.1
# via -r requirements.in
monotonic==1.5
@ -199,6 +211,7 @@ pyrsistent==0.18.1
python-dateutil==2.8.1
# via
# -r requirements.in
# botocore
# celery-redbeat
# posthoganalytics
python-statsd==2.1.0
@ -222,10 +235,6 @@ redis==3.4.1
# -r requirements.in
# celery-redbeat
# django-redis
requests-oauthlib==1.3.0
# via
# -r requirements.in
# social-auth-core
requests==2.25.1
# via
# -r requirements.in
@ -234,7 +243,13 @@ requests==2.25.1
# posthoganalytics
# requests-oauthlib
# social-auth-core
semantic_version==2.8.5
requests-oauthlib==1.3.0
# via
# -r requirements.in
# social-auth-core
s3transfer==0.5.2
# via boto3
semantic-version==2.8.5
# via -r requirements.in
sentry-sdk==1.3.1
# via -r requirements.in
@ -275,6 +290,7 @@ uritemplate==4.1.1
# via drf-spectacular
urllib3==1.26.5
# via
# botocore
# requests
# sentry-sdk
vine==1.3.0
@ -286,7 +302,9 @@ whitenoise==5.2.0
xmlsec==1.3.12
# via python3-saml
zipp==3.1.0
# via importlib-metadata
# via
# importlib-metadata
# importlib-resources
# The following packages are considered to be unsafe in a requirements file:
# setuptools