mirror of
https://github.com/PostHog/posthog.git
synced 2024-11-25 11:17:50 +01:00
7ba6fa7148
* chore(plugin-server): remove piscina workers Using Piscina workers introduces complexity that would rather be avoided. It does offer the ability to scale work across multiple CPUs, but we can achieve this via starting multiple processes instead. It may also provide some protection from deadlocking the worker process, which I believe Piscina will handle by killing worker processes and respawning, but we have K8s liveness checks that will also handle this. This should simplify 1. prom metrics exporting, and 2. using node-rdkafka. * remove piscina from package.json * use createWorker * wip * wip * wip * wip * fix export test * wip * wip * fix server stop tests * wip * mock process.exit everywhere * fix health server tests * Remove collectMetrics * wip
149 lines
5.2 KiB
TypeScript
149 lines
5.2 KiB
TypeScript
import { PluginEvent } from '@posthog/plugin-scaffold'
|
|
|
|
import { defaultConfig } from '../src/config/config'
|
|
import { startPluginsServer } from '../src/main/pluginsServer'
|
|
import { EnqueuedPluginJob, Hub, LogLevel, PluginsServerConfig } from '../src/types'
|
|
import { UUIDT } from '../src/utils/utils'
|
|
import { EventPipelineRunner } from '../src/worker/ingestion/event-pipeline/runner'
|
|
import Piscina, { makePiscina } from '../src/worker/piscina'
|
|
import { writeToFile } from '../src/worker/vm/extensions/test-utils'
|
|
import { delayUntilEventIngested, resetTestDatabaseClickhouse } from './helpers/clickhouse'
|
|
import { resetGraphileWorkerSchema } from './helpers/graphile-worker'
|
|
import { resetKafka } from './helpers/kafka'
|
|
import { pluginConfig39 } from './helpers/plugins'
|
|
import { resetTestDatabase } from './helpers/sql'
|
|
|
|
jest.mock('../src/utils/status')
|
|
jest.setTimeout(60000) // 60 sec timeout
|
|
|
|
const { console: testConsole } = writeToFile
|
|
|
|
const extraServerConfig: Partial<PluginsServerConfig> = {
|
|
WORKER_CONCURRENCY: 2,
|
|
LOG_LEVEL: LogLevel.Log,
|
|
CONVERSION_BUFFER_ENABLED: false,
|
|
HISTORICAL_EXPORTS_ENABLED: true,
|
|
HISTORICAL_EXPORTS_FETCH_WINDOW_MULTIPLIER: 2,
|
|
HISTORICAL_EXPORTS_INITIAL_FETCH_TIME_WINDOW: 8 * 60 * 60 * 1000, // 8 hours
|
|
}
|
|
|
|
const indexJs = `
|
|
import { console as testConsole } from 'test-utils/write-to-file'
|
|
|
|
export async function exportEvents(events) {
|
|
for (const event of events) {
|
|
if (event.properties && event.properties['$$is_historical_export_event']) {
|
|
testConsole.log('exported historical event', event)
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
describe('Historical Export (v2)', () => {
|
|
let hub: Hub
|
|
let stopServer: () => Promise<void>
|
|
let piscina: Piscina
|
|
|
|
beforeAll(async () => {
|
|
await resetKafka(extraServerConfig)
|
|
})
|
|
|
|
beforeEach(async () => {
|
|
console.info = jest.fn()
|
|
|
|
testConsole.reset()
|
|
await Promise.all([
|
|
await resetTestDatabase(indexJs),
|
|
await resetTestDatabaseClickhouse(extraServerConfig),
|
|
await resetGraphileWorkerSchema(defaultConfig),
|
|
])
|
|
|
|
const startResponse = await startPluginsServer(extraServerConfig, makePiscina, undefined)
|
|
hub = startResponse.hub!
|
|
piscina = startResponse.piscina!
|
|
stopServer = startResponse.stop!
|
|
})
|
|
|
|
afterEach(async () => {
|
|
await stopServer()
|
|
})
|
|
|
|
afterAll(async () => {
|
|
await resetGraphileWorkerSchema(defaultConfig)
|
|
})
|
|
|
|
async function ingestEvent(timestamp: string, overrides: Partial<PluginEvent> = {}) {
|
|
const pluginEvent: PluginEvent = {
|
|
event: 'some_event',
|
|
distinct_id: 'some_user',
|
|
site_url: '',
|
|
team_id: 2,
|
|
timestamp: timestamp,
|
|
now: timestamp,
|
|
ip: '',
|
|
uuid: new UUIDT().toString(),
|
|
...overrides,
|
|
} as any as PluginEvent
|
|
|
|
const runner = new EventPipelineRunner(hub, pluginEvent)
|
|
await runner.runEventPipeline(pluginEvent)
|
|
}
|
|
|
|
it('exports a batch of events in a time range', async () => {
|
|
await Promise.all([
|
|
ingestEvent('2021-07-28T00:00:00.000Z'),
|
|
ingestEvent('2021-08-01T00:00:00.000Z', { properties: { foo: 'bar' } }),
|
|
ingestEvent('2021-08-02T02:00:00.000Z'),
|
|
ingestEvent('2021-08-03T09:00:00.000Z'),
|
|
ingestEvent('2021-08-03T15:00:00.000Z'),
|
|
ingestEvent('2021-08-04T23:00:00.000Z'),
|
|
ingestEvent('2021-08-04T23:59:59.000Z'),
|
|
ingestEvent('2021-08-05T00:00:00.000Z'),
|
|
ingestEvent('2021-08-05T01:00:00.000Z'),
|
|
])
|
|
|
|
await hub.kafkaProducer.flush()
|
|
await delayUntilEventIngested(() => hub.db.fetchEvents(), 9)
|
|
|
|
await piscina.run({
|
|
task: 'runPluginJob',
|
|
args: {
|
|
job: {
|
|
type: 'Export historical events V2',
|
|
payload: {
|
|
dateRange: ['2021-08-01', '2021-08-04'],
|
|
parallelism: 5,
|
|
$operation: 'start',
|
|
},
|
|
pluginConfigId: pluginConfig39.id,
|
|
pluginConfigTeam: pluginConfig39.team_id,
|
|
timestamp: 0,
|
|
} as EnqueuedPluginJob,
|
|
},
|
|
})
|
|
|
|
await delayUntilEventIngested(() => Promise.resolve(testConsole.read()), 6, 1000, 50)
|
|
|
|
const exportedEventLogs = testConsole.read() as Array<[string, any]>
|
|
exportedEventLogs.sort((e1, e2) => e1[1].timestamp.localeCompare(e2[1].timestamp))
|
|
|
|
const timestamps = exportedEventLogs.map(([, event]) => event.timestamp)
|
|
expect(timestamps).toEqual([
|
|
'2021-08-01T00:00:00.000Z',
|
|
'2021-08-02T02:00:00.000Z',
|
|
'2021-08-03T09:00:00.000Z',
|
|
'2021-08-03T15:00:00.000Z',
|
|
'2021-08-04T23:00:00.000Z',
|
|
'2021-08-04T23:59:59.000Z',
|
|
])
|
|
expect(exportedEventLogs[0][1].properties).toEqual(
|
|
expect.objectContaining({
|
|
foo: 'bar',
|
|
$$historical_export_source_db: 'clickhouse',
|
|
$$is_historical_export_event: true,
|
|
$$historical_export_timestamp: expect.any(String),
|
|
})
|
|
)
|
|
})
|
|
})
|