2020-09-09 06:00:37 +02:00
|
|
|
import datetime
|
2020-10-21 20:35:07 +02:00
|
|
|
import json
|
2020-09-29 15:39:18 +02:00
|
|
|
from typing import Dict, Optional
|
|
|
|
from uuid import UUID
|
2020-09-09 06:00:37 +02:00
|
|
|
|
2020-11-04 02:06:52 +01:00
|
|
|
import statsd
|
2020-09-09 06:00:37 +02:00
|
|
|
from celery import shared_task
|
2020-11-04 02:06:52 +01:00
|
|
|
from dateutil import parser
|
|
|
|
from dateutil.relativedelta import relativedelta
|
|
|
|
from django.conf import settings
|
2020-10-30 20:08:23 +01:00
|
|
|
from django.db.utils import IntegrityError
|
2020-09-09 06:00:37 +02:00
|
|
|
|
|
|
|
from ee.clickhouse.models.event import create_event
|
2020-10-28 21:22:16 +01:00
|
|
|
from ee.clickhouse.models.session_recording_event import create_session_recording_event
|
2020-10-21 20:35:07 +02:00
|
|
|
from ee.kafka_client.client import KafkaProducer
|
|
|
|
from ee.kafka_client.topics import KAFKA_EVENTS_WAL
|
2020-09-09 06:00:37 +02:00
|
|
|
from posthog.ee import check_ee_enabled
|
|
|
|
from posthog.models.element import Element
|
2020-10-30 20:08:23 +01:00
|
|
|
from posthog.models.person import Person
|
2020-09-09 06:00:37 +02:00
|
|
|
from posthog.models.team import Team
|
2020-09-29 15:39:18 +02:00
|
|
|
from posthog.models.utils import UUIDT
|
2020-11-04 02:06:52 +01:00
|
|
|
from posthog.tasks.process_event import handle_identify_or_alias, store_names_and_properties
|
|
|
|
|
|
|
|
statsd.Connection.set_defaults(host=settings.STATSD_HOST, port=settings.STATSD_PORT)
|
2020-09-09 06:00:37 +02:00
|
|
|
|
|
|
|
|
|
|
|
def _capture_ee(
|
2020-09-25 12:05:50 +02:00
|
|
|
event_uuid: UUID,
|
|
|
|
person_uuid: UUID,
|
2020-09-09 06:00:37 +02:00
|
|
|
ip: str,
|
|
|
|
site_url: str,
|
|
|
|
team_id: int,
|
|
|
|
event: str,
|
|
|
|
distinct_id: str,
|
|
|
|
properties: Dict,
|
2020-09-25 12:05:50 +02:00
|
|
|
timestamp: datetime.datetime,
|
2020-09-09 06:00:37 +02:00
|
|
|
) -> None:
|
|
|
|
elements = properties.get("$elements")
|
|
|
|
elements_list = []
|
|
|
|
if elements:
|
|
|
|
del properties["$elements"]
|
|
|
|
elements_list = [
|
|
|
|
Element(
|
|
|
|
text=el["$el_text"][0:400] if el.get("$el_text") else None,
|
|
|
|
tag_name=el["tag_name"],
|
|
|
|
href=el["attr__href"][0:2048] if el.get("attr__href") else None,
|
|
|
|
attr_class=el["attr__class"].split(" ") if el.get("attr__class") else None,
|
|
|
|
attr_id=el.get("attr__id"),
|
|
|
|
nth_child=el.get("nth_child"),
|
|
|
|
nth_of_type=el.get("nth_of_type"),
|
|
|
|
attributes={key: value for key, value in el.items() if key.startswith("attr__")},
|
|
|
|
)
|
|
|
|
for index, el in enumerate(elements)
|
|
|
|
]
|
|
|
|
|
|
|
|
team = Team.objects.only("slack_incoming_webhook", "event_names", "event_properties", "anonymize_ips").get(
|
|
|
|
pk=team_id
|
|
|
|
)
|
|
|
|
|
2020-09-30 13:54:26 +02:00
|
|
|
if not team.anonymize_ips and "$ip" not in properties:
|
2020-09-09 06:00:37 +02:00
|
|
|
properties["$ip"] = ip
|
|
|
|
|
|
|
|
store_names_and_properties(team=team, event=event, properties=properties)
|
|
|
|
|
2020-10-30 20:08:23 +01:00
|
|
|
if not Person.objects.distinct_ids_exist(team_id=team_id, distinct_ids=[str(distinct_id)]):
|
|
|
|
# Catch race condition where in between getting and creating,
|
|
|
|
# another request already created this user
|
|
|
|
try:
|
|
|
|
Person.objects.create(team_id=team_id, distinct_ids=[str(distinct_id)])
|
|
|
|
except IntegrityError:
|
|
|
|
pass
|
|
|
|
|
2020-09-09 06:00:37 +02:00
|
|
|
# # determine create events
|
|
|
|
create_event(
|
2020-09-25 12:05:50 +02:00
|
|
|
event_uuid=event_uuid,
|
2020-09-09 06:00:37 +02:00
|
|
|
event=event,
|
|
|
|
properties=properties,
|
|
|
|
timestamp=timestamp,
|
|
|
|
team=team,
|
|
|
|
distinct_id=distinct_id,
|
2020-10-16 14:07:03 +02:00
|
|
|
elements=elements_list,
|
2020-11-05 14:40:30 +01:00
|
|
|
site_url=site_url,
|
2020-09-09 06:00:37 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2020-11-04 02:06:52 +01:00
|
|
|
def handle_timestamp(data: dict, now: datetime.datetime, sent_at: Optional[datetime.datetime]) -> datetime.datetime:
|
|
|
|
if data.get("timestamp"):
|
|
|
|
if sent_at:
|
|
|
|
# sent_at - timestamp == now - x
|
|
|
|
# x = now + (timestamp - sent_at)
|
|
|
|
try:
|
|
|
|
# timestamp and sent_at must both be in the same format: either both with or both without timezones
|
|
|
|
# otherwise we can't get a diff to add to now
|
|
|
|
return now + (parser.isoparse(data["timestamp"]) - sent_at)
|
|
|
|
except TypeError as e:
|
|
|
|
pass
|
|
|
|
return parser.isoparse(data["timestamp"])
|
|
|
|
now_datetime = now
|
|
|
|
if data.get("offset"):
|
|
|
|
return now_datetime - relativedelta(microseconds=data["offset"] * 1000)
|
|
|
|
return now_datetime
|
|
|
|
|
|
|
|
|
2020-09-09 06:00:37 +02:00
|
|
|
if check_ee_enabled():
|
|
|
|
|
|
|
|
def process_event_ee(
|
2020-11-04 02:06:52 +01:00
|
|
|
distinct_id: str,
|
|
|
|
ip: str,
|
|
|
|
site_url: str,
|
|
|
|
data: dict,
|
|
|
|
team_id: int,
|
|
|
|
now: datetime.datetime,
|
|
|
|
sent_at: Optional[datetime.datetime],
|
2020-09-09 06:00:37 +02:00
|
|
|
) -> None:
|
2020-11-04 02:06:52 +01:00
|
|
|
timer = statsd.Timer("%s_posthog_cloud" % (settings.STATSD_PREFIX,))
|
|
|
|
timer.start()
|
2020-10-30 20:08:23 +01:00
|
|
|
properties = data.get("properties", {})
|
|
|
|
if data.get("$set"):
|
|
|
|
properties["$set"] = data["$set"]
|
|
|
|
|
2020-09-29 15:39:18 +02:00
|
|
|
person_uuid = UUIDT()
|
|
|
|
event_uuid = UUIDT()
|
2020-09-25 12:05:50 +02:00
|
|
|
ts = handle_timestamp(data, now, sent_at)
|
2020-10-30 20:08:23 +01:00
|
|
|
handle_identify_or_alias(data["event"], properties, distinct_id, team_id)
|
2020-09-25 12:05:50 +02:00
|
|
|
|
2020-10-28 21:22:16 +01:00
|
|
|
if data["event"] == "$snapshot":
|
|
|
|
create_session_recording_event(
|
|
|
|
uuid=event_uuid,
|
|
|
|
team_id=team_id,
|
|
|
|
distinct_id=distinct_id,
|
2020-10-30 20:08:23 +01:00
|
|
|
session_id=properties["$session_id"],
|
|
|
|
snapshot_data=properties["$snapshot_data"],
|
2020-10-28 21:22:16 +01:00
|
|
|
timestamp=ts,
|
|
|
|
)
|
|
|
|
return
|
|
|
|
|
2020-09-09 06:00:37 +02:00
|
|
|
_capture_ee(
|
2020-09-25 12:05:50 +02:00
|
|
|
event_uuid=event_uuid,
|
|
|
|
person_uuid=person_uuid,
|
2020-09-09 06:00:37 +02:00
|
|
|
ip=ip,
|
|
|
|
site_url=site_url,
|
|
|
|
team_id=team_id,
|
|
|
|
event=data["event"],
|
|
|
|
distinct_id=distinct_id,
|
|
|
|
properties=properties,
|
2020-09-25 12:05:50 +02:00
|
|
|
timestamp=ts,
|
2020-09-09 06:00:37 +02:00
|
|
|
)
|
2020-11-04 02:06:52 +01:00
|
|
|
timer.stop("process_event_ee")
|
2020-09-09 06:00:37 +02:00
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
2020-11-04 02:06:52 +01:00
|
|
|
def process_event_ee(
|
|
|
|
distinct_id: str,
|
|
|
|
ip: str,
|
|
|
|
site_url: str,
|
|
|
|
data: dict,
|
|
|
|
team_id: int,
|
|
|
|
now: datetime.datetime,
|
|
|
|
sent_at: Optional[datetime.datetime],
|
|
|
|
) -> None:
|
2020-09-09 06:00:37 +02:00
|
|
|
# Noop if ee is not enabled
|
|
|
|
return
|
2020-10-21 20:35:07 +02:00
|
|
|
|
|
|
|
|
|
|
|
def log_event(
|
|
|
|
distinct_id: str,
|
|
|
|
ip: str,
|
|
|
|
site_url: str,
|
|
|
|
data: dict,
|
|
|
|
team_id: int,
|
|
|
|
now: datetime.datetime,
|
|
|
|
sent_at: Optional[datetime.datetime],
|
|
|
|
) -> None:
|
|
|
|
data = {
|
|
|
|
"distinct_id": distinct_id,
|
|
|
|
"ip": ip,
|
|
|
|
"site_url": site_url,
|
|
|
|
"data": json.dumps(data),
|
|
|
|
"team_id": team_id,
|
|
|
|
"now": now.strftime("%Y-%m-%d %H:%M:%S.%f"),
|
|
|
|
"sent_at": sent_at.strftime("%Y-%m-%d %H:%M:%S.%f") if sent_at else "",
|
|
|
|
}
|
|
|
|
p = KafkaProducer()
|
|
|
|
p.produce(topic=KAFKA_EVENTS_WAL, data=data)
|