0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-24 09:14:46 +01:00

feat(err): Emit to clickhouse from cymbal (#26244)

This commit is contained in:
Oliver Browne 2024-11-19 11:00:08 +02:00 committed by GitHub
parent 1466949318
commit 6a424dfc0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 145 additions and 60 deletions

View File

@ -1,4 +1,4 @@
import { Person, PreIngestionEvent, RawClickHouseEvent } from '../../../types'
import { Person, PreIngestionEvent, RawKafkaEvent } from '../../../types'
import { EventPipelineRunner } from './runner'
export function createEventStep(
@ -6,6 +6,6 @@ export function createEventStep(
event: PreIngestionEvent,
person: Person,
processPerson: boolean
): [RawClickHouseEvent, Promise<void>] {
): RawKafkaEvent {
return runner.eventsProcessor.createEvent(event, person, processPerson)
}

View File

@ -0,0 +1,6 @@
import { RawKafkaEvent } from '../../../types'
import { EventPipelineRunner } from './runner'
export function emitEventStep(runner: EventPipelineRunner, event: RawKafkaEvent): [Promise<void>] {
return [runner.eventsProcessor.emitEvent(event)]
}

View File

@ -1,10 +1,10 @@
import { RawClickHouseEvent } from '../../../types'
import { RawKafkaEvent } from '../../../types'
import { status } from '../../../utils/status'
import { EventPipelineRunner } from './runner'
export function produceExceptionSymbolificationEventStep(
runner: EventPipelineRunner,
event: RawClickHouseEvent
event: RawKafkaEvent
): Promise<[Promise<void>]> {
const ack = runner.hub.kafkaProducer
.produce({

View File

@ -11,6 +11,7 @@ import { status } from '../../../utils/status'
import { EventsProcessor } from '../process-event'
import { captureIngestionWarning, generateEventDeadLetterQueueMessage } from '../utils'
import { createEventStep } from './createEventStep'
import { emitEventStep } from './emitEventStep'
import { enrichExceptionEventStep } from './enrichExceptionEventStep'
import { extractHeatmapDataStep } from './extractHeatmapDataStep'
import {
@ -259,24 +260,25 @@ export class EventPipelineRunner {
event.team_id
)
const [rawClickhouseEvent, eventAck] = await this.runStep(
const rawEvent = await this.runStep(
createEventStep,
[this, enrichedIfErrorEvent, person, processPerson],
event.team_id
)
kafkaAcks.push(eventAck)
if (event.event === '$exception' && event.team_id == 2) {
const [exceptionAck] = await this.runStep(
produceExceptionSymbolificationEventStep,
[this, rawClickhouseEvent],
[this, rawEvent],
event.team_id
)
kafkaAcks.push(exceptionAck)
return this.registerLastStep('produceExceptionSymbolificationEventStep', [rawClickhouseEvent], kafkaAcks)
return this.registerLastStep('produceExceptionSymbolificationEventStep', [rawEvent], kafkaAcks)
} else {
const [clickhouseAck] = await this.runStep(emitEventStep, [this, rawEvent], event.team_id)
kafkaAcks.push(clickhouseAck)
return this.registerLastStep('emitEventStep', [rawEvent], kafkaAcks)
}
return this.registerLastStep('createEventStep', [rawClickhouseEvent], kafkaAcks)
}
registerLastStep(stepName: string, args: any[], ackPromises?: Array<Promise<void>>): EventPipelineResult {

View File

@ -204,11 +204,7 @@ export class EventsProcessor {
return res
}
createEvent(
preIngestionEvent: PreIngestionEvent,
person: Person,
processPerson: boolean
): [RawKafkaEvent, Promise<void>] {
createEvent(preIngestionEvent: PreIngestionEvent, person: Person, processPerson: boolean): RawKafkaEvent {
const { eventUuid: uuid, event, teamId, projectId, distinctId, properties, timestamp } = preIngestionEvent
let elementsChain = ''
@ -264,10 +260,14 @@ export class EventsProcessor {
person_mode: personMode,
}
return rawEvent
}
emitEvent(rawEvent: RawKafkaEvent): Promise<void> {
const ack = this.kafkaProducer
.produce({
topic: this.pluginsServer.CLICKHOUSE_JSON_EVENTS_KAFKA_TOPIC,
key: uuid,
key: rawEvent.uuid,
value: Buffer.from(JSON.stringify(rawEvent)),
waitForAck: true,
})
@ -275,16 +275,16 @@ export class EventsProcessor {
// Some messages end up significantly larger than the original
// after plugin processing, person & group enrichment, etc.
if (error instanceof MessageSizeTooLarge) {
await captureIngestionWarning(this.db.kafkaProducer, teamId, 'message_size_too_large', {
eventUuid: uuid,
distinctId: distinctId,
await captureIngestionWarning(this.db.kafkaProducer, rawEvent.team_id, 'message_size_too_large', {
eventUuid: rawEvent.uuid,
distinctId: rawEvent.distinct_id,
})
} else {
throw error
}
})
return [rawEvent, ack]
return ack
}
private async upsertGroup(

View File

@ -144,5 +144,24 @@ Array [
true,
],
],
Array [
"emitEventStep",
Array [
Object {
"created_at": "2024-11-18 14:54:33.606",
"distinct_id": "my_id",
"elements_chain": "",
"event": "$pageview",
"person_created_at": "2024-11-18 14:54:33",
"person_mode": "full",
"person_properties": "{}",
"project_id": 1,
"properties": "{}",
"team_id": 2,
"timestamp": "2020-02-23 02:15:00.000",
"uuid": "uuid1",
},
],
],
]
`;

View File

@ -120,7 +120,7 @@ describe('prepareEventStep()', () => {
it('extracts elements_chain from properties', async () => {
const event: PluginEvent = { ...pluginEvent, ip: null, properties: { $elements_chain: 'random string', a: 1 } }
const preppedEvent = await prepareEventStep(runner, event)
const [chEvent, _] = runner.eventsProcessor.createEvent(preppedEvent, person)
const chEvent = runner.eventsProcessor.createEvent(preppedEvent, person)
expect(chEvent.elements_chain).toEqual('random string')
expect(chEvent.properties).toEqual('{"a":1}')
@ -137,7 +137,7 @@ describe('prepareEventStep()', () => {
},
}
const preppedEvent = await prepareEventStep(runner, event)
const [chEvent, _] = runner.eventsProcessor.createEvent(preppedEvent, person)
const chEvent = runner.eventsProcessor.createEvent(preppedEvent, person)
expect(chEvent.elements_chain).toEqual('random string')
expect(chEvent.properties).toEqual('{"a":1}')
@ -151,7 +151,7 @@ describe('prepareEventStep()', () => {
properties: { a: 1, $elements: [{ tag_name: 'div', nth_child: 1, nth_of_type: 2, $el_text: 'text' }] },
}
const preppedEvent = await prepareEventStep(runner, event)
const [chEvent, _] = runner.eventsProcessor.createEvent(preppedEvent, person)
const chEvent = runner.eventsProcessor.createEvent(preppedEvent, person)
expect(chEvent.elements_chain).toEqual('div:nth-child="1"nth-of-type="2"text="text"')
expect(chEvent.properties).toEqual('{"a":1}')

View File

@ -1,9 +1,10 @@
import { PluginEvent } from '@posthog/plugin-scaffold'
import { DateTime } from 'luxon'
import { ISOTimestamp, Person, PipelineEvent, PreIngestionEvent } from '../../../../src/types'
import { ISOTimestamp, Person, PipelineEvent, PreIngestionEvent, RawKafkaEvent } from '../../../../src/types'
import { createEventsToDropByToken } from '../../../../src/utils/db/hub'
import { createEventStep } from '../../../../src/worker/ingestion/event-pipeline/createEventStep'
import { emitEventStep } from '../../../../src/worker/ingestion/event-pipeline/emitEventStep'
import * as metrics from '../../../../src/worker/ingestion/event-pipeline/metrics'
import { pluginsProcessEventStep } from '../../../../src/worker/ingestion/event-pipeline/pluginsProcessEventStep'
import { populateTeamDataStep } from '../../../../src/worker/ingestion/event-pipeline/populateTeamDataStep'
@ -18,6 +19,7 @@ jest.mock('../../../../src/worker/ingestion/event-pipeline/pluginsProcessEventSt
jest.mock('../../../../src/worker/ingestion/event-pipeline/processPersonsStep')
jest.mock('../../../../src/worker/ingestion/event-pipeline/prepareEventStep')
jest.mock('../../../../src/worker/ingestion/event-pipeline/createEventStep')
jest.mock('../../../../src/worker/ingestion/event-pipeline/emitEventStep')
jest.mock('../../../../src/worker/ingestion/event-pipeline/runAsyncHandlersStep')
class TestEventPipelineRunner extends EventPipelineRunner {
@ -74,6 +76,21 @@ const preIngestionEvent: PreIngestionEvent = {
elementsList: [],
}
const createdEvent: RawKafkaEvent = {
created_at: '2024-11-18 14:54:33.606',
distinct_id: 'my_id',
elements_chain: '',
event: '$pageview',
person_created_at: '2024-11-18 14:54:33',
person_mode: 'full',
person_properties: '{}',
project_id: 1,
properties: '{}',
team_id: 2,
timestamp: '2020-02-23 02:15:00.000',
uuid: 'uuid1',
}
const person: Person = {
id: 123,
team_id: 2,
@ -112,7 +129,8 @@ describe('EventPipelineRunner', () => {
{ person, personUpdateProperties: {}, get: () => Promise.resolve(person) } as any,
])
jest.mocked(prepareEventStep).mockResolvedValue(preIngestionEvent)
jest.mocked(createEventStep).mockResolvedValue([null, Promise.resolve()])
jest.mocked(createEventStep).mockResolvedValue(createdEvent)
jest.mocked(emitEventStep).mockResolvedValue([Promise.resolve()])
jest.mocked(processOnEventStep).mockResolvedValue(null)
})
@ -129,6 +147,7 @@ describe('EventPipelineRunner', () => {
'extractHeatmapDataStep',
'enrichExceptionEventStep',
'createEventStep',
'emitEventStep',
])
expect(runner.stepsWithArgs).toMatchSnapshot()
})
@ -158,6 +177,7 @@ describe('EventPipelineRunner', () => {
'extractHeatmapDataStep',
'enrichExceptionEventStep',
'createEventStep',
'emitEventStep',
])
})
@ -179,11 +199,11 @@ describe('EventPipelineRunner', () => {
const result = await runner.runEventPipeline(pipelineEvent)
expect(result.error).toBeUndefined()
expect(pipelineStepMsSummarySpy).toHaveBeenCalledTimes(8)
expect(pipelineStepMsSummarySpy).toHaveBeenCalledTimes(9)
expect(pipelineLastStepCounterSpy).toHaveBeenCalledTimes(1)
expect(eventProcessedAndIngestedCounterSpy).toHaveBeenCalledTimes(1)
expect(pipelineStepMsSummarySpy).toHaveBeenCalledWith('createEventStep')
expect(pipelineLastStepCounterSpy).toHaveBeenCalledWith('createEventStep')
expect(pipelineStepMsSummarySpy).toHaveBeenCalledWith('emitEventStep')
expect(pipelineLastStepCounterSpy).toHaveBeenCalledWith('emitEventStep')
expect(pipelineStepErrorCounterSpy).not.toHaveBeenCalled()
})
@ -380,6 +400,7 @@ describe('EventPipelineRunner', () => {
'extractHeatmapDataStep',
'enrichExceptionEventStep',
'createEventStep',
'emitEventStep',
])
})
})

View File

@ -89,7 +89,7 @@ describe('EventsProcessor#createEvent()', () => {
it('emits event with person columns, re-using event properties', async () => {
const processPerson = true
eventsProcessor.createEvent(preIngestionEvent, person, processPerson)
eventsProcessor.emitEvent(eventsProcessor.createEvent(preIngestionEvent, person, processPerson))
await eventsProcessor.kafkaProducer.flush()
@ -147,10 +147,12 @@ describe('EventsProcessor#createEvent()', () => {
)
const processPerson = true
eventsProcessor.createEvent(
{ ...preIngestionEvent, properties: { $group_0: 'group_key' } },
person,
processPerson
eventsProcessor.emitEvent(
eventsProcessor.createEvent(
{ ...preIngestionEvent, properties: { $group_0: 'group_key' } },
person,
processPerson
)
)
const events = await delayUntilEventIngested(() => hub.db.fetchEvents())
@ -169,10 +171,12 @@ describe('EventsProcessor#createEvent()', () => {
it('when $process_person_profile=false, emits event with without person properties or groups', async () => {
const processPerson = false
eventsProcessor.createEvent(
{ ...preIngestionEvent, properties: { $group_0: 'group_key' } },
person,
processPerson
eventsProcessor.emitEvent(
eventsProcessor.createEvent(
{ ...preIngestionEvent, properties: { $group_0: 'group_key' } },
person,
processPerson
)
)
await eventsProcessor.kafkaProducer.flush()
@ -199,10 +203,12 @@ describe('EventsProcessor#createEvent()', () => {
it('force_upgrade persons are recorded as such', async () => {
const processPerson = false
person.force_upgrade = true
eventsProcessor.createEvent(
{ ...preIngestionEvent, properties: { $group_0: 'group_key' } },
person,
processPerson
eventsProcessor.emitEvent(
eventsProcessor.createEvent(
{ ...preIngestionEvent, properties: { $group_0: 'group_key' } },
person,
processPerson
)
)
await eventsProcessor.kafkaProducer.flush()
@ -236,10 +242,12 @@ describe('EventsProcessor#createEvent()', () => {
uuid: uuid,
}
const processPerson = true
eventsProcessor.createEvent(
{ ...preIngestionEvent, distinctId: 'no-such-person' },
nonExistingPerson,
processPerson
eventsProcessor.emitEvent(
eventsProcessor.createEvent(
{ ...preIngestionEvent, distinctId: 'no-such-person' },
nonExistingPerson,
processPerson
)
)
await eventsProcessor.kafkaProducer.flush()

View File

@ -90,15 +90,28 @@ pub async fn send_iter_to_kafka<T>(
topic: &str,
iter: impl IntoIterator<Item = T>,
) -> Result<(), KafkaProduceError>
where
T: Serialize,
{
send_keyed_iter_to_kafka(kafka_producer, topic, |_| None, iter).await
}
pub async fn send_keyed_iter_to_kafka<T>(
kafka_producer: &FutureProducer<KafkaContext>,
topic: &str,
key_extractor: impl Fn(&T) -> Option<String>,
iter: impl IntoIterator<Item = T>,
) -> Result<(), KafkaProduceError>
where
T: Serialize,
{
let mut payloads = Vec::new();
for i in iter {
let key = key_extractor(&i);
let payload = serde_json::to_string(&i)
.map_err(|e| KafkaProduceError::SerializationError { error: e })?;
payloads.push(payload);
payloads.push((key, payload));
}
if payloads.is_empty() {
@ -107,12 +120,12 @@ where
let mut delivery_futures = Vec::new();
for payload in payloads {
for (key, payload) in payloads {
match kafka_producer.send_result(FutureRecord {
topic,
payload: Some(&payload),
partition: None,
key: None::<&str>,
key: key.as_deref(),
timestamp: None,
headers: None,
}) {

View File

@ -39,6 +39,7 @@ pub enum PersonMode {
pub struct ClickHouseEvent {
pub uuid: Uuid,
pub team_id: i32,
pub project_id: i32,
pub event: String,
pub distinct_id: String,
#[serde(skip_serializing_if = "Option::is_none")]

View File

@ -31,6 +31,7 @@ pub struct AppContext {
pub pool: PgPool,
pub catalog: Catalog,
pub resolver: Resolver,
pub config: Config,
}
impl AppContext {
@ -103,6 +104,7 @@ impl AppContext {
pool,
catalog,
resolver,
config: config.clone(),
})
}
}

View File

@ -12,6 +12,12 @@ pub struct Config {
#[envconfig(nested = true)]
pub kafka: KafkaConfig,
#[envconfig(default = "clickhouse_events_json")]
pub events_topic: String,
#[envconfig(default = "clickhouse_error_tracking_issue_fingerprint")]
pub issue_overrides_topic: String,
#[envconfig(nested = true)]
pub consumer: ConsumerConfig,

View File

@ -179,7 +179,6 @@ where
conn.rollback().await?;
} else {
conn.commit().await?;
// TODO - emit new issue and override to kafka
}
Ok(fingerprinted.to_output(issue_override.issue_id))

View File

@ -22,13 +22,13 @@ pub mod types;
pub async fn handle_event(
context: Arc<AppContext>,
mut event: ClickHouseEvent,
) -> Result<Option<ClickHouseEvent>, UnhandledError> {
) -> Result<ClickHouseEvent, UnhandledError> {
let mut props = match get_props(&event) {
Ok(r) => r,
Err(e) => {
warn!("Failed to get props: {}", e);
add_error_to_event(&mut event, e)?;
return Ok(Some(event));
return Ok(event);
}
};
@ -37,7 +37,7 @@ pub async fn handle_event(
if exceptions.is_empty() {
props.add_error_message("No exceptions found on exception event");
event.properties = Some(serde_json::to_string(&props).unwrap());
return Ok(Some(event));
return Ok(event);
}
let mut results = Vec::new();
@ -56,7 +56,7 @@ pub async fn handle_event(
event.properties = Some(serde_json::to_string(&output).unwrap());
Ok(Some(event))
Ok(event)
}
fn get_props(event: &ClickHouseEvent) -> Result<RawErrProps, EventError> {

View File

@ -1,7 +1,7 @@
use std::{future::ready, sync::Arc};
use axum::{routing::get, Router};
use common_kafka::kafka_consumer::RecvErr;
use common_kafka::{kafka_consumer::RecvErr, kafka_producer::send_keyed_iter_to_kafka};
use common_metrics::{serve, setup_metrics_routes};
use common_types::ClickHouseEvent;
use cymbal::{
@ -78,11 +78,8 @@ async fn main() {
};
metrics::counter!(EVENT_RECEIVED).increment(1);
let _processed_event = match handle_event(context.clone(), event).await {
Ok(r) => {
offset.store().unwrap();
r
}
let event = match handle_event(context.clone(), event).await {
Ok(e) => e,
Err(e) => {
error!("Error handling event: {:?}", e);
// If we get an unhandled error, it means we have some logical error in the code, or a
@ -91,7 +88,16 @@ async fn main() {
}
};
// TODO - emit the event to the next Kafka topic
send_keyed_iter_to_kafka(
&context.kafka_producer,
&context.config.events_topic,
|ev| Some(ev.uuid.to_string()),
&[event],
)
.await
.expect("Failed to send event to Kafka");
offset.store().unwrap();
metrics::counter!(STACK_PROCESSED).increment(1);
whole_loop.label("finished", "true").fin();

File diff suppressed because one or more lines are too long

View File

@ -4,6 +4,7 @@
"properties": "{\"$os\":\"Mac OS X\",\"$os_version\":\"10.15.7\",\"$browser\":\"Chrome\",\"$device_type\":\"Desktop\",\"$current_url\":\"https://eu.posthog.com/project/12557/feature_flags/31624\",\"$host\":\"eu.posthog.com\",\"$pathname\":\"/project/12557/feature_flags/31624\",\"$raw_user_agent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36\",\"$browser_version\":129,\"$browser_language\":\"en-GB\",\"$screen_height\":1080,\"$screen_width\":1920,\"$viewport_height\":934,\"$viewport_width\":1920,\"$lib\":\"web\",\"$lib_version\":\"1.170.1\",\"$insert_id\":\"xjfjg606eo2x7n4x\",\"$time\":1729088278.943,\"distinct_id\":\"pQC9X9Fe7BPzJXVxpY0fx37UwFOCd1vXHzh8rjUPv1G\",\"$device_id\":\"018ccedb-d598-79bb-94e0-4751a3b956f4\",\"$console_log_recording_enabled_server_side\":true,\"$autocapture_disabled_server_side\":false,\"$web_vitals_enabled_server_side\":true,\"$exception_capture_enabled_server_side\":true,\"$exception_capture_endpoint\":\"/e/\",\"realm\":\"cloud\",\"email_service_available\":true,\"slack_service_available\":true,\"commit_sha\":\"bafa32953e\",\"$user_id\":\"pQC9X9Fe7BPzJXVxpY0fx37UwFOCd1vXHzh8rjUPv1G\",\"is_demo_project\":false,\"$groups\":{\"project\":\"018c1057-288d-0000-93bb-3bd44c845f22\",\"organization\":\"018afaa6-8b2e-0000-2311-d58d2df832ad\",\"customer\":\"cus_P5B9QmoUKLAUlx\",\"instance\":\"https://eu.posthog.com\"},\"has_billing_plan\":true,\"$referrer\":\"$direct\",\"$referring_domain\":\"$direct\",\"$session_recording_start_reason\":\"session_id_changed\",\"$exception_list\":[{\"type\":\"UnhandledRejection\",\"value\":\"Unexpected usage\",\"stacktrace\":{\"type\": \"raw\", \"frames\":[{\"filename\":\"https://app-static.eu.posthog.com/static/chunk-PGUQKT6S.js\",\"function\":\"?\",\"in_app\":true,\"lineno\":64,\"colno\":25112},{\"filename\":\"https://app-static.eu.posthog.com/static/chunk-PGUQKT6S.js\",\"function\":\"n.loadForeignModule\",\"in_app\":true,\"lineno\":64,\"colno\":15003}]},\"mechanism\":{\"handled\":false,\"synthetic\":false}}],\"$exception_level\":\"error\",\"$exception_personURL\":\"https://us.posthog.com/project/sTMFPsFhdP1Ssg/person/pQC9X9Fe7BPzJXVxpY0fx37UwFOCd1vXHzh8rjUPv1G\",\"token\":\"sTMFPsFhdP1Ssg\",\"$session_id\":\"019295b0-db2b-7e02-8010-0a1c4db680df\",\"$window_id\":\"019295b0-db2b-7e02-8010-0a1dee88e5f5\",\"$lib_custom_api_host\":\"https://internal-t.posthog.com\",\"$is_identified\":true,\"$lib_rate_limit_remaining_tokens\":97.28999999999999,\"$sent_at\":\"2024-10-16T14:17:59.543000+00:00\",\"$geoip_city_name\":\"Lisbon\",\"$geoip_city_confidence\":null,\"$geoip_country_name\":\"Portugal\",\"$geoip_country_code\":\"PT\",\"$geoip_country_confidence\":null,\"$geoip_continent_name\":\"Europe\",\"$geoip_continent_code\":\"EU\",\"$geoip_postal_code\":\"1269-001\",\"$geoip_postal_code_confidence\":null,\"$geoip_latitude\":38.731,\"$geoip_longitude\":-9.1373,\"$geoip_accuracy_radius\":100,\"$geoip_time_zone\":\"Europe/Lisbon\",\"$geoip_subdivision_1_code\":\"11\",\"$geoip_subdivision_1_name\":\"Lisbon\",\"$geoip_subdivision_1_confidence\":null,\"$lib_version__major\":1,\"$lib_version__minor\":170,\"$lib_version__patch\":1,\"$group_2\":\"018c1057-288d-0000-93bb-3bd44c845f22\",\"$group_0\":\"018afaa6-8b2e-0000-2311-d58d2df832ad\",\"$group_3\":\"cus_P5B9QmoUKLAUlx\",\"$group_1\":\"https://eu.posthog.com\"}",
"timestamp": "2024-10-16T07:17:59.088000-07:00",
"team_id": 2,
"project_id": 2,
"distinct_id": "pQC9X9Fe7BPzJXVxpY0fx37UwFOCd1vXHzh8rjUPv1G",
"elements_chain": "",
"created_at": "2024-10-16T07:18:00.100000-07:00",