0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-21 13:39:22 +01:00

feat: Add GeoIP2 capability to Django app (for feature flags) (#10890)

* feat: add libmaxminddb0 as dependency. C library will speed things up significantly

* pin libmaxminddb to 1.5 for whats available from APK

* get geolite2 db during build

* add settings for geoip2 django contrib library

* black formatting

* consistently use share director

* isort fixes

* remove GeoLite2-City.mmdb from git and add script to ./bin/start to download it if file does not exist

* remove GeoLite2-City.mmdb from git

* add doc for share directory expaining why it exists

* relative path for curl in build

* shared vs share consistency

* Update snapshots

* brotli decompress

* ..everywhere

Co-authored-by: Neil Kakkar <neilkakkar@gmail.com>
Co-authored-by: neilkakkar <neilkakkar@users.noreply.github.com>
This commit is contained in:
James Greenhill 2022-07-25 17:20:11 -07:00 committed by GitHub
parent ed49d1338b
commit 8e5d1da3aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 67 additions and 0 deletions

View File

@ -29,3 +29,4 @@
!plugin-server/src
!plugin-server/.eslintrc.js
!plugin-server/.prettierrc
!share/GeoLite2-City.mmdb

View File

@ -76,6 +76,7 @@ runs:
touch frontend/dist/index.html
touch frontend/dist/layout.html
touch frontend/dist/exporter.html
[ ! -f ./share/GeoLite2-City.mmdb ] && ( curl -L "https://mmdbcdn.posthog.net/" | brotli --decompress --output=./share/GeoLite2-City.mmdb )
- name: Wait for Clickhouse & Kafka
shell: bash

1
.gitignore vendored
View File

@ -41,3 +41,4 @@ ee/benchmarks/results
coverage-*.xml
object_storage/
__emails__
share/GeoLite2-City.mmdb

View File

@ -14,6 +14,8 @@ service_warning() {
nc -z localhost 9092 || ( service_warning 'Kafka'; bin/check_kafka_clickhouse_up )
curl -s 'http://localhost:8123/ping' || ( service_warning 'ClickHouse'; bin/check_kafka_clickhouse_up )
[ ! -f ./share/GeoLite2-City.mmdb ] && ( curl -L "https://mmdbcdn.posthog.net/" | brotli --decompress --output=./share/GeoLite2-City.mmdb )
./bin/start-worker &
./bin/start-backend &
./bin/start-frontend &

View File

@ -61,6 +61,7 @@ async function decompressAndOpenMmdb(brotliContents: Buffer, filename: string):
async function fetchAndInsertFreshMmdb(hub: Hub): Promise<ReaderModel> {
const { db } = hub
// TODO: use local GeoLite2 on container at share/GeoLite2-City.mmdb instead of downloading it each time
status.info('⏳', 'Downloading GeoLite2 database from PostHog servers...')
const response = await fetch(MMDB_ENDPOINT, { compress: false })
const contentType = response.headers.get('content-type')

View File

@ -27,6 +27,7 @@ from posthog.settings.dynamic_settings import *
from posthog.settings.ee import *
from posthog.settings.ingestion import *
from posthog.settings.feature_flags import *
from posthog.settings.geoip import *
from posthog.settings.logs import *
from posthog.settings.sentry import *
from posthog.settings.shell_plus import *

View File

@ -0,0 +1,5 @@
import os
from django.conf import settings
GEOIP_PATH = os.path.join(settings.BASE_DIR, "share")

View File

@ -73,6 +73,24 @@ RUN apk --update --no-cache add \
"chromium-chromedriver~=93" \
"xmlsec~=1.2"
# Curl the GeoLite2-City database that will be used for IP geolocation within Django
#
# Notes:
#
# - We are doing this here because it makes sense to ensure the stack will work
# even if the database is not available at the time of boot.
# It's better here to fail at build then it is to fail at boot time.
RUN apk --update --no-cache --virtual .geolite-deps add \
"curl~=7" \
"brotli~=1.0.9" \
&& \
mkdir share \
&& \
( curl -L "https://mmdbcdn.posthog.net/" | brotli --decompress --output=./share/GeoLite2-City.mmdb ) \
&& \
apk del .geolite-deps
# Compile and install Python dependencies.
#
@ -96,6 +114,7 @@ RUN apk --update --no-cache --virtual .build-deps add \
"libxslt-dev~=1.1" \
"xmlsec-dev~=1.2" \
"postgresql-dev~=13" \
"libmaxminddb~=1.5" \
&& \
pip install -r requirements.txt --compile --no-cache-dir \
&& \

View File

@ -32,6 +32,7 @@ dnspython==2.2.1
drf-exceptions-hog==0.2.0
drf-extensions==0.7.0
drf-spectacular==0.21.1
geoip2==4.6.0
google-cloud-sqlcommenter==2.0.0
gunicorn==20.1.0
idna==2.8

View File

@ -4,6 +4,10 @@
#
# pip-compile requirements.in
#
aiohttp==3.8.1
# via geoip2
aiosignal==1.2.0
# via aiohttp
amqp==2.6.0
# via
# -r requirements.in
@ -14,8 +18,11 @@ async-generator==1.10
# via
# trio
# trio-websocket
async-timeout==4.0.2
# via aiohttp
attrs==21.4.0
# via
# aiohttp
# jsonschema
# outcome
# trio
@ -44,6 +51,8 @@ cffi==1.14.5
# via cryptography
chardet==3.0.4
# via requests
charset-normalizer==2.1.0
# via aiohttp
clickhouse-driver==0.2.1
# via
# -r requirements.in
@ -130,8 +139,14 @@ drf-extensions==0.7.0
# via -r requirements.in
drf-spectacular==0.21.1
# via -r requirements.in
frozenlist==1.3.0
# via
# aiohttp
# aiosignal
future==0.18.2
# via lzstring
geoip2==4.6.0
# via -r requirements.in
google-cloud-sqlcommenter==2.0.0
# via -r requirements.in
gunicorn==20.1.0
@ -144,6 +159,7 @@ idna==2.8
# requests
# trio
# urllib3
# yarl
importlib-metadata==1.6.0
# via -r requirements.in
importlib-resources==5.8.0
@ -183,10 +199,16 @@ marshmallow==3.15.0
# marshmallow-enum
marshmallow-enum==1.5.1
# via dataclasses-json
maxminddb==2.2.0
# via geoip2
mimesis==5.2.1
# via -r requirements.in
monotonic==1.5
# via posthoganalytics
multidict==6.0.2
# via
# aiohttp
# yarl
mypy-extensions==0.4.3
# via typing-inspect
numpy==1.21.4
@ -258,6 +280,7 @@ requests==2.25.1
# via
# -r requirements.in
# django-rest-hooks
# geoip2
# infi-clickhouse-orm
# posthoganalytics
# requests-oauthlib
@ -324,6 +347,7 @@ uritemplate==4.1.1
urllib3[secure,socks]==1.26.5
# via
# botocore
# geoip2
# requests
# selenium
# sentry-sdk
@ -339,6 +363,8 @@ wsproto==1.1.0
# via trio-websocket
xmlsec==1.3.12
# via python3-saml
yarl==1.7.2
# via aiohttp
zipp==3.1.0
# via
# importlib-metadata

9
share/share.md Normal file
View File

@ -0,0 +1,9 @@
# Share folder
Put here any resources that should be shared across all projects (events, web, worker, plugins, etc.). Most likely this will be things like small static databases or other resources.
Examples:
- GeoLite2-City.mmdb
- Some small lookup Sqlite db
- random data?