From 8e5d1da3aae594bca650d6ab50431deda27a3e0c Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Mon, 25 Jul 2022 17:20:11 -0700 Subject: [PATCH] feat: Add GeoIP2 capability to Django app (for feature flags) (#10890) * feat: add libmaxminddb0 as dependency. C library will speed things up significantly * pin libmaxminddb to 1.5 for whats available from APK * get geolite2 db during build * add settings for geoip2 django contrib library * black formatting * consistently use share director * isort fixes * remove GeoLite2-City.mmdb from git and add script to ./bin/start to download it if file does not exist * remove GeoLite2-City.mmdb from git * add doc for share directory expaining why it exists * relative path for curl in build * shared vs share consistency * Update snapshots * brotli decompress * ..everywhere Co-authored-by: Neil Kakkar Co-authored-by: neilkakkar --- .dockerignore | 1 + .github/actions/run-backend-tests/action.yml | 1 + .gitignore | 1 + bin/start | 2 ++ plugin-server/src/main/services/mmdb.ts | 1 + posthog/settings/__init__.py | 1 + posthog/settings/geoip.py | 5 ++++ production.Dockerfile | 19 ++++++++++++++ requirements.in | 1 + requirements.txt | 26 ++++++++++++++++++++ share/share.md | 9 +++++++ 11 files changed, 67 insertions(+) create mode 100644 posthog/settings/geoip.py create mode 100644 share/share.md diff --git a/.dockerignore b/.dockerignore index 10d4f1254fb..534dce2d5c0 100644 --- a/.dockerignore +++ b/.dockerignore @@ -29,3 +29,4 @@ !plugin-server/src !plugin-server/.eslintrc.js !plugin-server/.prettierrc +!share/GeoLite2-City.mmdb \ No newline at end of file diff --git a/.github/actions/run-backend-tests/action.yml b/.github/actions/run-backend-tests/action.yml index 40d9bcc7b05..9fac7dff5dd 100644 --- a/.github/actions/run-backend-tests/action.yml +++ b/.github/actions/run-backend-tests/action.yml @@ -76,6 +76,7 @@ runs: touch frontend/dist/index.html touch frontend/dist/layout.html touch frontend/dist/exporter.html + [ ! -f ./share/GeoLite2-City.mmdb ] && ( curl -L "https://mmdbcdn.posthog.net/" | brotli --decompress --output=./share/GeoLite2-City.mmdb ) - name: Wait for Clickhouse & Kafka shell: bash diff --git a/.gitignore b/.gitignore index e17bc7d93f5..07bb82adaa3 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,4 @@ ee/benchmarks/results coverage-*.xml object_storage/ __emails__ +share/GeoLite2-City.mmdb \ No newline at end of file diff --git a/bin/start b/bin/start index 3679cc13472..c918ad7f1c1 100755 --- a/bin/start +++ b/bin/start @@ -14,6 +14,8 @@ service_warning() { nc -z localhost 9092 || ( service_warning 'Kafka'; bin/check_kafka_clickhouse_up ) curl -s 'http://localhost:8123/ping' || ( service_warning 'ClickHouse'; bin/check_kafka_clickhouse_up ) +[ ! -f ./share/GeoLite2-City.mmdb ] && ( curl -L "https://mmdbcdn.posthog.net/" | brotli --decompress --output=./share/GeoLite2-City.mmdb ) + ./bin/start-worker & ./bin/start-backend & ./bin/start-frontend & diff --git a/plugin-server/src/main/services/mmdb.ts b/plugin-server/src/main/services/mmdb.ts index 53524f5cb23..45fa8d38b34 100644 --- a/plugin-server/src/main/services/mmdb.ts +++ b/plugin-server/src/main/services/mmdb.ts @@ -61,6 +61,7 @@ async function decompressAndOpenMmdb(brotliContents: Buffer, filename: string): async function fetchAndInsertFreshMmdb(hub: Hub): Promise { const { db } = hub + // TODO: use local GeoLite2 on container at share/GeoLite2-City.mmdb instead of downloading it each time status.info('⏳', 'Downloading GeoLite2 database from PostHog servers...') const response = await fetch(MMDB_ENDPOINT, { compress: false }) const contentType = response.headers.get('content-type') diff --git a/posthog/settings/__init__.py b/posthog/settings/__init__.py index a2c3917b889..0c9000284de 100644 --- a/posthog/settings/__init__.py +++ b/posthog/settings/__init__.py @@ -27,6 +27,7 @@ from posthog.settings.dynamic_settings import * from posthog.settings.ee import * from posthog.settings.ingestion import * from posthog.settings.feature_flags import * +from posthog.settings.geoip import * from posthog.settings.logs import * from posthog.settings.sentry import * from posthog.settings.shell_plus import * diff --git a/posthog/settings/geoip.py b/posthog/settings/geoip.py new file mode 100644 index 00000000000..f479b227908 --- /dev/null +++ b/posthog/settings/geoip.py @@ -0,0 +1,5 @@ +import os + +from django.conf import settings + +GEOIP_PATH = os.path.join(settings.BASE_DIR, "share") diff --git a/production.Dockerfile b/production.Dockerfile index de741a3c24b..e5401857237 100644 --- a/production.Dockerfile +++ b/production.Dockerfile @@ -73,6 +73,24 @@ RUN apk --update --no-cache add \ "chromium-chromedriver~=93" \ "xmlsec~=1.2" +# Curl the GeoLite2-City database that will be used for IP geolocation within Django +# +# Notes: +# +# - We are doing this here because it makes sense to ensure the stack will work +# even if the database is not available at the time of boot. +# It's better here to fail at build then it is to fail at boot time. + +RUN apk --update --no-cache --virtual .geolite-deps add \ + "curl~=7" \ + "brotli~=1.0.9" \ + && \ + mkdir share \ + && \ + ( curl -L "https://mmdbcdn.posthog.net/" | brotli --decompress --output=./share/GeoLite2-City.mmdb ) \ + && \ + apk del .geolite-deps + # Compile and install Python dependencies. # @@ -96,6 +114,7 @@ RUN apk --update --no-cache --virtual .build-deps add \ "libxslt-dev~=1.1" \ "xmlsec-dev~=1.2" \ "postgresql-dev~=13" \ + "libmaxminddb~=1.5" \ && \ pip install -r requirements.txt --compile --no-cache-dir \ && \ diff --git a/requirements.in b/requirements.in index 8f3b4120e66..6a6e3130997 100644 --- a/requirements.in +++ b/requirements.in @@ -32,6 +32,7 @@ dnspython==2.2.1 drf-exceptions-hog==0.2.0 drf-extensions==0.7.0 drf-spectacular==0.21.1 +geoip2==4.6.0 google-cloud-sqlcommenter==2.0.0 gunicorn==20.1.0 idna==2.8 diff --git a/requirements.txt b/requirements.txt index 2c191b527fe..970c87dc1a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,10 @@ # # pip-compile requirements.in # +aiohttp==3.8.1 + # via geoip2 +aiosignal==1.2.0 + # via aiohttp amqp==2.6.0 # via # -r requirements.in @@ -14,8 +18,11 @@ async-generator==1.10 # via # trio # trio-websocket +async-timeout==4.0.2 + # via aiohttp attrs==21.4.0 # via + # aiohttp # jsonschema # outcome # trio @@ -44,6 +51,8 @@ cffi==1.14.5 # via cryptography chardet==3.0.4 # via requests +charset-normalizer==2.1.0 + # via aiohttp clickhouse-driver==0.2.1 # via # -r requirements.in @@ -130,8 +139,14 @@ drf-extensions==0.7.0 # via -r requirements.in drf-spectacular==0.21.1 # via -r requirements.in +frozenlist==1.3.0 + # via + # aiohttp + # aiosignal future==0.18.2 # via lzstring +geoip2==4.6.0 + # via -r requirements.in google-cloud-sqlcommenter==2.0.0 # via -r requirements.in gunicorn==20.1.0 @@ -144,6 +159,7 @@ idna==2.8 # requests # trio # urllib3 + # yarl importlib-metadata==1.6.0 # via -r requirements.in importlib-resources==5.8.0 @@ -183,10 +199,16 @@ marshmallow==3.15.0 # marshmallow-enum marshmallow-enum==1.5.1 # via dataclasses-json +maxminddb==2.2.0 + # via geoip2 mimesis==5.2.1 # via -r requirements.in monotonic==1.5 # via posthoganalytics +multidict==6.0.2 + # via + # aiohttp + # yarl mypy-extensions==0.4.3 # via typing-inspect numpy==1.21.4 @@ -258,6 +280,7 @@ requests==2.25.1 # via # -r requirements.in # django-rest-hooks + # geoip2 # infi-clickhouse-orm # posthoganalytics # requests-oauthlib @@ -324,6 +347,7 @@ uritemplate==4.1.1 urllib3[secure,socks]==1.26.5 # via # botocore + # geoip2 # requests # selenium # sentry-sdk @@ -339,6 +363,8 @@ wsproto==1.1.0 # via trio-websocket xmlsec==1.3.12 # via python3-saml +yarl==1.7.2 + # via aiohttp zipp==3.1.0 # via # importlib-metadata diff --git a/share/share.md b/share/share.md new file mode 100644 index 00000000000..3497826e87a --- /dev/null +++ b/share/share.md @@ -0,0 +1,9 @@ +# Share folder + +Put here any resources that should be shared across all projects (events, web, worker, plugins, etc.). Most likely this will be things like small static databases or other resources. + +Examples: +- GeoLite2-City.mmdb +- Some small lookup Sqlite db +- random data? +