mirror of
https://github.com/PostHog/posthog.git
synced 2024-11-30 19:41:46 +01:00
6d9f4d7f77
* Use postgres to grab person * Fix type
234 lines
7.5 KiB
Python
234 lines
7.5 KiB
Python
import datetime
|
|
import json
|
|
from typing import Any, Dict, List, Optional
|
|
from uuid import UUID
|
|
|
|
from django.db.models.query import QuerySet
|
|
from django.db.models.signals import post_delete, post_save
|
|
from django.dispatch import receiver
|
|
from django.utils.timezone import now
|
|
from rest_framework import serializers
|
|
|
|
from ee.clickhouse.client import sync_execute
|
|
from ee.clickhouse.sql.person import (
|
|
DELETE_PERSON_BY_ID,
|
|
DELETE_PERSON_DISTINCT_ID_BY_PERSON_ID,
|
|
GET_DISTINCT_IDS_SQL,
|
|
GET_DISTINCT_IDS_SQL_BY_ID,
|
|
GET_PERSON_BY_DISTINCT_ID,
|
|
GET_PERSON_SQL,
|
|
GET_PERSONS_BY_DISTINCT_IDS,
|
|
INSERT_PERSON_DISTINCT_ID,
|
|
INSERT_PERSON_SQL,
|
|
PERSON_DISTINCT_ID_EXISTS_SQL,
|
|
PERSON_EXISTS_SQL,
|
|
UPDATE_PERSON_ATTACHED_DISTINCT_ID,
|
|
UPDATE_PERSON_IS_IDENTIFIED,
|
|
UPDATE_PERSON_PROPERTIES,
|
|
)
|
|
from ee.kafka.client import ClickhouseProducer, KafkaProducer
|
|
from ee.kafka.topics import KAFKA_OMNI_PERSON, KAFKA_PERSON, KAFKA_PERSON_UNIQUE_ID
|
|
from posthog import settings
|
|
from posthog.ee import check_ee_enabled
|
|
from posthog.models.person import Person, PersonDistinctId
|
|
from posthog.models.utils import UUIDT
|
|
|
|
if settings.EE_AVAILABLE and check_ee_enabled():
|
|
|
|
@receiver(post_save, sender=Person)
|
|
def person_created(sender, instance: Person, created, **kwargs):
|
|
create_person(
|
|
team_id=instance.team.pk,
|
|
properties=instance.properties,
|
|
uuid=str(instance.uuid),
|
|
is_identified=instance.is_identified,
|
|
)
|
|
|
|
@receiver(post_save, sender=PersonDistinctId)
|
|
def person_distinct_id_created(sender, instance: PersonDistinctId, created, **kwargs):
|
|
create_person_distinct_id(instance.pk, instance.team.pk, instance.distinct_id, str(instance.person.uuid))
|
|
|
|
@receiver(post_delete, sender=Person)
|
|
def person_deleted(sender, instance: Person, **kwargs):
|
|
delete_person(instance.uuid)
|
|
|
|
|
|
def emit_omni_person(
|
|
event_uuid: UUID,
|
|
team_id: int,
|
|
distinct_id: str,
|
|
uuid: Optional[UUID] = None,
|
|
properties: Optional[Dict] = {},
|
|
sync: bool = False,
|
|
is_identified: bool = False,
|
|
timestamp: Optional[datetime.datetime] = None,
|
|
) -> UUID:
|
|
if not uuid:
|
|
uuid = UUIDT()
|
|
|
|
if not timestamp:
|
|
timestamp = now()
|
|
|
|
data = {
|
|
"event_uuid": str(event_uuid),
|
|
"uuid": str(uuid),
|
|
"distinct_id": distinct_id,
|
|
"team_id": team_id,
|
|
"properties": json.dumps(properties),
|
|
"is_identified": int(is_identified),
|
|
"ts": timestamp.strftime("%Y-%m-%d %H:%M:%S.%f"),
|
|
}
|
|
p = KafkaProducer()
|
|
p.produce(topic=KAFKA_OMNI_PERSON, data=data)
|
|
return uuid
|
|
|
|
|
|
def create_person(
|
|
team_id: int,
|
|
uuid: Optional[str] = None,
|
|
properties: Optional[Dict] = {},
|
|
sync: bool = False,
|
|
is_identified: bool = False,
|
|
timestamp: Optional[datetime.datetime] = None,
|
|
) -> str:
|
|
if uuid:
|
|
uuid = str(uuid)
|
|
else:
|
|
uuid = str(UUIDT())
|
|
if not timestamp:
|
|
timestamp = now()
|
|
|
|
data = {
|
|
"id": str(uuid),
|
|
"team_id": team_id,
|
|
"properties": json.dumps(properties),
|
|
"is_identified": int(is_identified),
|
|
"created_at": timestamp.strftime("%Y-%m-%d %H:%M:%S.%f"),
|
|
}
|
|
p = ClickhouseProducer()
|
|
p.produce(topic=KAFKA_PERSON, sql=INSERT_PERSON_SQL, data=data, sync=sync)
|
|
return uuid
|
|
|
|
|
|
def update_person_properties(team_id: int, id: str, properties: Dict) -> None:
|
|
sync_execute(UPDATE_PERSON_PROPERTIES, {"team_id": team_id, "id": id, "properties": json.dumps(properties)})
|
|
|
|
|
|
def update_person_is_identified(team_id: int, id: str, is_identified: bool) -> None:
|
|
sync_execute(
|
|
UPDATE_PERSON_IS_IDENTIFIED, {"team_id": team_id, "id": id, "is_identified": "1" if is_identified else "0"}
|
|
)
|
|
|
|
|
|
def create_person_distinct_id(id: int, team_id: int, distinct_id: str, person_id: str) -> None:
|
|
data = {"id": id, "distinct_id": distinct_id, "person_id": person_id, "team_id": team_id}
|
|
p = ClickhouseProducer()
|
|
p.produce(topic=KAFKA_PERSON_UNIQUE_ID, sql=INSERT_PERSON_DISTINCT_ID, data=data)
|
|
|
|
|
|
def distinct_ids_exist(team_id: int, ids: List[str]) -> bool:
|
|
return bool(sync_execute(PERSON_DISTINCT_ID_EXISTS_SQL.format([str(id) for id in ids]), {"team_id": team_id})[0][0])
|
|
|
|
|
|
def person_exists(id: int) -> bool:
|
|
return bool(sync_execute(PERSON_EXISTS_SQL, {"id": id})[0][0])
|
|
|
|
|
|
def get_persons(team_id: int):
|
|
result = sync_execute(GET_PERSON_SQL, {"team_id": team_id})
|
|
return ClickhousePersonSerializer(result, many=True).data
|
|
|
|
|
|
def get_person_distinct_ids(team_id: int):
|
|
result = sync_execute(GET_DISTINCT_IDS_SQL, {"team_id": team_id})
|
|
return ClickhousePersonDistinctIdSerializer(result, many=True).data
|
|
|
|
|
|
def get_person_by_distinct_id(team_id: int, distinct_id: str) -> Dict[str, Any]:
|
|
result = sync_execute(GET_PERSON_BY_DISTINCT_ID, {"team_id": team_id, "distinct_id": distinct_id.__str__()})
|
|
if len(result) > 0:
|
|
return ClickhousePersonSerializer(result[0], many=False).data
|
|
return {}
|
|
|
|
|
|
def get_persons_by_distinct_ids(team_id: int, distinct_ids: List[str]) -> QuerySet:
|
|
return Person.objects.filter(team_id=team_id, persondistinctid__distinct_id__in=distinct_ids)
|
|
|
|
|
|
def merge_people(team_id: int, target: Dict, old_id: int, old_props: Dict) -> None:
|
|
# merge the properties
|
|
properties = {**old_props, **target["properties"]}
|
|
|
|
update_person_properties(team_id=team_id, id=target["id"], properties=properties)
|
|
|
|
other_person_distinct_ids = sync_execute(
|
|
GET_DISTINCT_IDS_SQL_BY_ID, {"person_id": old_id, "team_id": target["team_id"]}
|
|
)
|
|
|
|
parsed_other_person_distinct_ids = ClickhousePersonDistinctIdSerializer(other_person_distinct_ids, many=True).data
|
|
|
|
for person_distinct_id in parsed_other_person_distinct_ids:
|
|
sync_execute(
|
|
UPDATE_PERSON_ATTACHED_DISTINCT_ID,
|
|
{"person_id": target["id"], "distinct_id": person_distinct_id["distinct_id"]},
|
|
)
|
|
delete_person(old_id)
|
|
|
|
|
|
def delete_person(person_id):
|
|
sync_execute(DELETE_PERSON_BY_ID, {"id": person_id,})
|
|
sync_execute(DELETE_PERSON_DISTINCT_ID_BY_PERSON_ID, {"id": person_id,})
|
|
|
|
|
|
class ClickhousePersonSerializer(serializers.Serializer):
|
|
id = serializers.SerializerMethodField()
|
|
created_at = serializers.SerializerMethodField()
|
|
team_id = serializers.SerializerMethodField()
|
|
properties = serializers.SerializerMethodField()
|
|
is_identified = serializers.SerializerMethodField()
|
|
name = serializers.SerializerMethodField()
|
|
distinct_ids = serializers.SerializerMethodField()
|
|
|
|
def get_name(self, person):
|
|
props = json.loads(person[3])
|
|
email = props.get("email", None)
|
|
return email or person[0]
|
|
|
|
def get_id(self, person):
|
|
return person[0]
|
|
|
|
def get_created_at(self, person):
|
|
return person[1]
|
|
|
|
def get_team_id(self, person):
|
|
return person[2]
|
|
|
|
def get_properties(self, person):
|
|
return json.loads(person[3])
|
|
|
|
def get_is_identified(self, person):
|
|
return person[4]
|
|
|
|
# all queries might not retrieve distinct_ids
|
|
def get_distinct_ids(self, person):
|
|
return person[5] if len(person) > 5 else []
|
|
|
|
|
|
class ClickhousePersonDistinctIdSerializer(serializers.Serializer):
|
|
id = serializers.SerializerMethodField()
|
|
distinct_id = serializers.SerializerMethodField()
|
|
person_id = serializers.SerializerMethodField()
|
|
team_id = serializers.SerializerMethodField()
|
|
|
|
def get_id(self, pid):
|
|
return pid[0]
|
|
|
|
def get_distinct_id(self, pid):
|
|
return pid[1]
|
|
|
|
def get_person_id(self, pid):
|
|
return pid[2]
|
|
|
|
def get_team_id(self, pid):
|
|
return pid[3]
|