diff --git a/frontend/src/scenes/pipeline/hogfunctions/hogFunctionConfigurationLogic.tsx b/frontend/src/scenes/pipeline/hogfunctions/hogFunctionConfigurationLogic.tsx index 16f244ac67c..0865cf18d45 100644 --- a/frontend/src/scenes/pipeline/hogfunctions/hogFunctionConfigurationLogic.tsx +++ b/frontend/src/scenes/pipeline/hogfunctions/hogFunctionConfigurationLogic.tsx @@ -18,7 +18,7 @@ import { userLogic } from 'scenes/userLogic' import { groupsModel } from '~/models/groupsModel' import { performQuery } from '~/queries/query' import { EventsNode, EventsQuery, NodeKind, TrendsQuery } from '~/queries/schema' -import { hogql } from '~/queries/utils' +import { escapePropertyAsHogQlIdentifier, hogql } from '~/queries/utils' import { AnyPropertyFilter, AvailableFeature, @@ -642,7 +642,7 @@ export const hogFunctionConfigurationLogic = kea { - const name = groupType.group_type + const name = escapePropertyAsHogQlIdentifier(groupType.group_type) query.select.push( `tuple(${name}.created_at, ${name}.index, ${name}.key, ${name}.properties, ${name}.updated_at)` ) diff --git a/hogvm/typescript/package.json b/hogvm/typescript/package.json index e9118c46261..9677f021d1a 100644 --- a/hogvm/typescript/package.json +++ b/hogvm/typescript/package.json @@ -1,6 +1,6 @@ { "name": "@posthog/hogvm", - "version": "1.0.50", + "version": "1.0.52", "description": "PostHog Hog Virtual Machine", "types": "dist/index.d.ts", "source": "src/index.ts", diff --git a/hogvm/typescript/src/__tests__/execute.test.ts b/hogvm/typescript/src/__tests__/execute.test.ts index ae32c39a89d..430198e726b 100644 --- a/hogvm/typescript/src/__tests__/execute.test.ts +++ b/hogvm/typescript/src/__tests__/execute.test.ts @@ -636,6 +636,7 @@ describe('hogvm execute', () => { ops: 3, stack: [], upvalues: [], + telemetry: undefined, throwStack: [], syncDuration: expect.any(Number), }, @@ -677,10 +678,15 @@ describe('hogvm execute', () => { ).toEqual(map({ key: map({ otherKey: 'value' }) })) // // return {key: 'value'}; - expect( - () => exec(['_h', op.STRING, 'key', op.GET_GLOBAL, 1, op.STRING, 'value', op.DICT, 1, op.RETURN]).result + expect(() => + execSync(['_h', op.STRING, 'key', op.GET_GLOBAL, 1, op.STRING, 'value', op.DICT, 1, op.RETURN]) ).toThrow('Global variable not found: key') + // // return {key: 'value'}; + expect( + exec(['_h', op.STRING, 'key', op.GET_GLOBAL, 1, op.STRING, 'value', op.DICT, 1, op.RETURN]).error.message + ).toEqual('Global variable not found: key') + // var key := 3; return {key: 'value'}; expect( exec(['_h', op.INTEGER, 3, op.GET_LOCAL, 0, op.STRING, 'value', op.DICT, 1, op.RETURN, op.POP]).result @@ -2105,7 +2111,8 @@ describe('hogvm execute', () => { finished: true, state: { bytecode: [], - stack: [], + stack: expect.any(Array), + telemetry: undefined, upvalues: [], callStack: [], throwStack: [], @@ -2443,7 +2450,7 @@ describe('hogvm execute', () => { finished: true, state: { bytecode: [], - stack: [], + stack: expect.any(Array), upvalues: [], callStack: [], throwStack: [], @@ -2455,4 +2462,59 @@ describe('hogvm execute', () => { }, }) }) + + test('logs telemetry', () => { + const bytecode = ['_h', op.INTEGER, 1, op.INTEGER, 2, op.PLUS, op.RETURN] + const result = exec(bytecode, { telemetry: true }) + expect(result).toEqual({ + result: 3, + finished: true, + state: { + bytecode: [], + stack: [], + upvalues: [], + callStack: [], + throwStack: [], + declaredFunctions: {}, + ops: 4, + asyncSteps: 0, + syncDuration: expect.any(Number), + maxMemUsed: 16, + telemetry: [ + [expect.any(Number), 'root', 0, 'START', ''], + [expect.any(Number), '', 1, '33/INTEGER', '1'], + [expect.any(Number), '', 3, '33/INTEGER', '2'], + [expect.any(Number), '', 5, '6/PLUS', ''], + [expect.any(Number), '', 6, '38/RETURN', ''], + ], + }, + }) + }) + + test('logs telemetry for calls', () => { + const bytecode = ['_h', op.FALSE, op.TRUE, op.CALL_GLOBAL, 'concat', 2] + const result = exec(bytecode, { telemetry: true }) + expect(result).toEqual({ + result: 'truefalse', + finished: true, + state: { + bytecode: [], + stack: [], + upvalues: [], + callStack: [], + throwStack: [], + declaredFunctions: {}, + ops: 3, + asyncSteps: 0, + syncDuration: expect.any(Number), + maxMemUsed: 17, + telemetry: [ + [expect.any(Number), 'root', 0, 'START', ''], + [expect.any(Number), '', 1, '30/FALSE', ''], + [expect.any(Number), '', 2, '29/TRUE', ''], + [expect.any(Number), '', 3, '2/CALL_GLOBAL', 'concat'], + ], + }, + }) + }) }) diff --git a/hogvm/typescript/src/__tests__/utils.test.ts b/hogvm/typescript/src/__tests__/utils.test.ts index b02f6dd16fd..62ca155dbea 100644 --- a/hogvm/typescript/src/__tests__/utils.test.ts +++ b/hogvm/typescript/src/__tests__/utils.test.ts @@ -1,4 +1,4 @@ -import { calculateCost } from '../utils' +import { calculateCost, convertJSToHog, convertHogToJS } from '../utils' const PTR_COST = 8 @@ -29,4 +29,26 @@ describe('hogvm utils', () => { obj['key'] = obj expect(calculateCost(obj)).toBe(PTR_COST * 3 + 3) }) + + test('convertJSToHog preserves circular references', () => { + const obj: any = { a: null, b: true } + obj.a = obj + const hog = convertJSToHog(obj) + expect(hog.get('a') === hog).toBe(true) + }) + + test('convertHogToJs preserves circular references', () => { + const obj: any = { a: null, b: true } + obj.a = obj + const js = convertHogToJS(obj) + expect(js.a === js).toBe(true) + + const map: any = new Map([ + ['a', null], + ['b', true], + ]) + map.set('a', map) + const js2 = convertHogToJS(map) + expect(js2.a === js2).toBe(true) + }) }) diff --git a/hogvm/typescript/src/execute.ts b/hogvm/typescript/src/execute.ts index 6e2c71b7a02..7433c189a84 100644 --- a/hogvm/typescript/src/execute.ts +++ b/hogvm/typescript/src/execute.ts @@ -1,9 +1,9 @@ import { DEFAULT_MAX_ASYNC_STEPS, DEFAULT_MAX_MEMORY, DEFAULT_TIMEOUT_MS, MAX_FUNCTION_ARGS_LENGTH } from './constants' import { isHogCallable, isHogClosure, isHogError, isHogUpValue, newHogCallable, newHogClosure } from './objects' -import { Operation } from './operation' +import { Operation, operations } from './operation' import { BYTECODE_STL } from './stl/bytecode' import { ASYNC_STL, STL } from './stl/stl' -import { CallFrame, ExecOptions, ExecResult, HogUpValue, ThrowFrame, VMState } from './types' +import { CallFrame, ExecOptions, ExecResult, HogUpValue, Telemetry, ThrowFrame, VMState } from './types' import { calculateCost, convertHogToJS, @@ -16,21 +16,27 @@ import { unifyComparisonTypes, } from './utils' -export function execSync(bytecode: any[], options?: ExecOptions): any { +export function execSync(bytecode: any[] | VMState, options?: ExecOptions): any { const response = exec(bytecode, options) if (response.finished) { return response.result } + if (response.error) { + throw response.error + } throw new HogVMException('Unexpected async function call: ' + response.asyncFunctionName) } -export async function execAsync(bytecode: any[], options?: ExecOptions): Promise { +export async function execAsync(bytecode: any[] | VMState, options?: ExecOptions): Promise { let vmState: VMState | undefined = undefined while (true) { const response = exec(vmState ?? bytecode, options) if (response.finished) { return response.result } + if (response.error) { + throw response.error + } if (response.state && response.asyncFunctionName && response.asyncFunctionArgs) { vmState = response.state if (options?.asyncFunctions && response.asyncFunctionName in options.asyncFunctions) { @@ -53,6 +59,7 @@ export async function execAsync(bytecode: any[], options?: ExecOptions): Promise } export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult { + const startTime = Date.now() let vmState: VMState | undefined = undefined let bytecode: any[] if (!Array.isArray(code)) { @@ -66,7 +73,6 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult { } const version = bytecode[0] === '_H' ? bytecode[1] ?? 0 : 0 - const startTime = Date.now() let temp: any let temp2: any let tempArray: any[] @@ -81,7 +87,7 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult { for (const upvalue of sortedUpValues) { upvaluesById[upvalue.id] = upvalue } - const stack: any[] = vmState ? vmState.stack.map(convertJSToHog) : [] + const stack: any[] = vmState ? vmState.stack.map((v) => convertJSToHog(v)) : [] const memStack: number[] = stack.map((s) => calculateCost(s)) const callStack: CallFrame[] = vmState ? vmState.callStack.map((v) => ({ ...v, closure: convertJSToHog(v.closure) })) @@ -114,6 +120,11 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult { } let frame: CallFrame = callStack[callStack.length - 1] let chunkBytecode: any[] = bytecode + let lastChunk = frame.chunk + let lastTime = startTime + + const telemetry: Telemetry[] = [[startTime, lastChunk, 0, 'START', '']] + const setChunkBytecode = (): void => { if (!frame.chunk || frame.chunk === 'root') { chunkBytecode = bytecode @@ -126,7 +137,8 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult { function popStack(): any { if (stack.length === 0) { - throw new HogVMException('Invalid HogQL bytecode, stack is empty') + logTelemetry() + throw new HogVMException('Invalid HogQL bytecode, stack is empty, can not pop') } memUsed -= memStack.pop() ?? 0 return stack.pop() @@ -146,6 +158,7 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult { memUsed -= memStack.splice(start, deleteCount).reduce((acc, val) => acc + val, 0) return stack.splice(start, deleteCount) } + function stackKeepFirstElements(count: number): any[] { if (count < 0 || stack.length < count) { throw new HogVMException('Stack underflow') @@ -177,20 +190,26 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult { throw new HogVMException(`Execution timed out after ${timeout / 1000} seconds. Performed ${ops} ops.`) } } - function getFinishedState(): VMState { + + function getVMState(): VMState { return { - bytecode: [], - stack: [], - upvalues: [], - callStack: [], - throwStack: [], - declaredFunctions: {}, + bytecode, + stack: stack.map((v) => convertHogToJS(v)), + upvalues: sortedUpValues.map((v) => ({ ...v, value: convertHogToJS(v.value) })), + callStack: callStack.map((v) => ({ + ...v, + closure: convertHogToJS(v.closure), + })), + throwStack, + declaredFunctions, ops, asyncSteps, syncDuration: syncDuration + (Date.now() - startTime), maxMemUsed, + telemetry: options?.telemetry ? telemetry : undefined, } } + function captureUpValue(index: number): HogUpValue { for (let i = sortedUpValues.length - 1; i >= 0; i--) { if (sortedUpValues[i].location < index) { @@ -220,467 +239,424 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult { return options.external.regex.match } - while (frame.ip < chunkBytecode.length) { - ops += 1 - if ((ops & 127) === 0) { - checkTimeout() + const logTelemetry = (): void => { + const op = chunkBytecode[frame.ip] + const newTime = new Date().getTime() + let debug = '' + if (op === Operation.CALL_LOCAL || op === Operation.GET_PROPERTY || op === Operation.GET_PROPERTY_NULLISH) { + debug = String(stack[stack.length - 1]) + } else if ( + op === Operation.GET_GLOBAL || + op === Operation.CALL_GLOBAL || + op === Operation.STRING || + op === Operation.INTEGER || + op === Operation.FLOAT + ) { + debug = String(chunkBytecode[frame.ip + 1]) } - switch (chunkBytecode[frame.ip]) { - case null: - break - case Operation.STRING: - pushStack(next()) - break - case Operation.FLOAT: - pushStack(next()) - break - case Operation.INTEGER: - pushStack(next()) - break - case Operation.TRUE: - pushStack(true) - break - case Operation.FALSE: - pushStack(false) - break - case Operation.NULL: - pushStack(null) - break - case Operation.NOT: - pushStack(!popStack()) - break - case Operation.AND: - temp = next() - temp2 = true - for (let i = 0; i < temp; i++) { - temp2 = !!popStack() && temp2 - } - pushStack(temp2) - break - case Operation.OR: - temp = next() - temp2 = false - for (let i = 0; i < temp; i++) { - temp2 = !!popStack() || temp2 - } - pushStack(temp2) - break - case Operation.PLUS: - pushStack(Number(popStack()) + Number(popStack())) - break - case Operation.MINUS: - pushStack(Number(popStack()) - Number(popStack())) - break - case Operation.DIVIDE: - pushStack(Number(popStack()) / Number(popStack())) - break - case Operation.MULTIPLY: - pushStack(Number(popStack()) * Number(popStack())) - break - case Operation.MOD: - pushStack(Number(popStack()) % Number(popStack())) - break - case Operation.EQ: - ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) - pushStack(temp === temp2) - break - case Operation.NOT_EQ: - ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) - pushStack(temp !== temp2) - break - case Operation.GT: - ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) - pushStack(temp > temp2) - break - case Operation.GT_EQ: - ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) - pushStack(temp >= temp2) - break - case Operation.LT: - ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) - pushStack(temp < temp2) - break - case Operation.LT_EQ: - ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) - pushStack(temp <= temp2) - break - case Operation.LIKE: - pushStack(like(popStack(), popStack())) - break - case Operation.ILIKE: - pushStack(like(popStack(), popStack(), true)) - break - case Operation.NOT_LIKE: - pushStack(!like(popStack(), popStack())) - break - case Operation.NOT_ILIKE: - pushStack(!like(popStack(), popStack(), true)) - break - case Operation.IN: - temp = popStack() - pushStack(popStack().includes(temp)) - break - case Operation.NOT_IN: - temp = popStack() - pushStack(!popStack().includes(temp)) - break - case Operation.REGEX: - temp = popStack() - pushStack(regexMatch()(popStack(), temp)) - break - case Operation.NOT_REGEX: - temp = popStack() - pushStack(!regexMatch()(popStack(), temp)) - break - case Operation.IREGEX: - temp = popStack() - pushStack(regexMatch()('(?i)' + popStack(), temp)) - break - case Operation.NOT_IREGEX: - temp = popStack() - pushStack(!regexMatch()('(?i)' + popStack(), temp)) - break - case Operation.GET_GLOBAL: { - const count = next() - const chain = [] - for (let i = 0; i < count; i++) { - chain.push(popStack()) - } - if (options?.globals && chain[0] in options.globals && Object.hasOwn(options.globals, chain[0])) { - pushStack(convertJSToHog(getNestedValue(options.globals, chain, true))) - } else if ( - options?.asyncFunctions && - chain.length == 1 && - Object.hasOwn(options.asyncFunctions, chain[0]) && - options.asyncFunctions[chain[0]] - ) { - pushStack( - newHogClosure( - newHogCallable('async', { - name: chain[0], - argCount: 0, // TODO - upvalueCount: 0, - ip: -1, - chunk: 'async', - }) - ) - ) - } else if (chain.length == 1 && chain[0] in ASYNC_STL && Object.hasOwn(ASYNC_STL, chain[0])) { - pushStack( - newHogClosure( - newHogCallable('async', { - name: chain[0], - argCount: ASYNC_STL[chain[0]].maxArgs ?? 0, - upvalueCount: 0, - ip: -1, - chunk: 'async', - }) - ) - ) - } else if (chain.length == 1 && chain[0] in STL && Object.hasOwn(STL, chain[0])) { - pushStack( - newHogClosure( - newHogCallable('stl', { - name: chain[0], - argCount: STL[chain[0]].maxArgs ?? 0, - upvalueCount: 0, - ip: -1, - chunk: 'stl', - }) - ) - ) - } else if (chain.length == 1 && chain[0] in BYTECODE_STL && Object.hasOwn(BYTECODE_STL, chain[0])) { - pushStack( - newHogClosure( - newHogCallable('stl', { - name: chain[0], - argCount: BYTECODE_STL[chain[0]][0].length, - upvalueCount: 0, - ip: 0, - chunk: `stl/${chain[0]}`, - }) - ) - ) - } else { - throw new HogVMException(`Global variable not found: ${chain.join('.')}`) - } - break - } - case Operation.POP: - popStack() - break - case Operation.CLOSE_UPVALUE: - stackKeepFirstElements(stack.length - 1) - break - case Operation.RETURN: { - const result = popStack() - const lastCallFrame = callStack.pop() - if (callStack.length === 0 || !lastCallFrame) { - return { result, finished: true, state: getFinishedState() } satisfies ExecResult - } - const stackStart = lastCallFrame.stackStart - stackKeepFirstElements(stackStart) - pushStack(result) - frame = callStack[callStack.length - 1] - setChunkBytecode() - continue // resume the loop without incrementing frame.ip - } - case Operation.GET_LOCAL: - temp = callStack.length > 0 ? callStack[callStack.length - 1].stackStart : 0 - pushStack(stack[next() + temp]) - break - case Operation.SET_LOCAL: - temp = (callStack.length > 0 ? callStack[callStack.length - 1].stackStart : 0) + next() - stack[temp] = popStack() - temp2 = memStack[temp] - memStack[temp] = calculateCost(stack[temp]) - memUsed += memStack[temp] - temp2 - maxMemUsed = Math.max(maxMemUsed, memUsed) - break - case Operation.GET_PROPERTY: - temp = popStack() // property - pushStack(getNestedValue(popStack(), [temp])) - break - case Operation.GET_PROPERTY_NULLISH: - temp = popStack() // property - pushStack(getNestedValue(popStack(), [temp], true)) - break - case Operation.SET_PROPERTY: - temp = popStack() // value - temp2 = popStack() // field - setNestedValue(popStack(), [temp2], temp) - break - case Operation.DICT: - temp = next() * 2 // number of elements to remove from the stack - tempArray = spliceStack2(stack.length - temp, temp) - tempMap = new Map() - for (let i = 0; i < tempArray.length; i += 2) { - tempMap.set(tempArray[i], tempArray[i + 1]) - } - pushStack(tempMap) - break - case Operation.ARRAY: - temp = next() - tempArray = spliceStack2(stack.length - temp, temp) - pushStack(tempArray) - break - case Operation.TUPLE: - temp = next() - tempArray = spliceStack2(stack.length - temp, temp) - ;(tempArray as any).__isHogTuple = true - pushStack(tempArray) - break - case Operation.JUMP: - temp = next() - frame.ip += temp - break - case Operation.JUMP_IF_FALSE: - temp = next() - if (!popStack()) { - frame.ip += temp - } - break - case Operation.JUMP_IF_STACK_NOT_NULL: - temp = next() - if (stack.length > 0 && stack[stack.length - 1] !== null) { - frame.ip += temp - } - break - case Operation.DECLARE_FN: { - // DEPRECATED - const name = next() - const argCount = next() - const bodyLength = next() - declaredFunctions[name] = [frame.ip + 1, argCount] - frame.ip += bodyLength - break - } - case Operation.CALLABLE: { - const name = next() - const argCount = next() - const upvalueCount = next() - const bodyLength = next() - const callable = newHogCallable('local', { - name, - argCount, - upvalueCount, - ip: frame.ip + 1, - chunk: frame.chunk, - }) - pushStack(callable) - frame.ip += bodyLength - break - } - case Operation.CLOSURE: { - const callable = popStack() - if (!isHogCallable(callable)) { - throw new HogVMException(`Invalid callable: ${JSON.stringify(callable)}`) - } - const upvalueCount = next() - const closureUpValues: number[] = [] - if (upvalueCount !== callable.upvalueCount) { - throw new HogVMException( - `Invalid upvalue count. Expected ${callable.upvalueCount}, got ${upvalueCount}` - ) - } - const stackStart = frame.stackStart - for (let i = 0; i < callable.upvalueCount; i++) { - const [isLocal, index] = [next(), next()] - if (isLocal) { - closureUpValues.push(captureUpValue(stackStart + index).id) - } else { - closureUpValues.push(frame.closure.upvalues[index]) - } - } - pushStack(newHogClosure(callable, closureUpValues)) - break - } - case Operation.GET_UPVALUE: { - const index = next() - if (index >= frame.closure.upvalues.length) { - throw new HogVMException(`Invalid upvalue index: ${index}`) - } - const upvalue = upvaluesById[frame.closure.upvalues[index]] - if (!isHogUpValue(upvalue)) { - throw new HogVMException(`Invalid upvalue: ${upvalue}`) - } - if (upvalue.closed) { - pushStack(upvalue.value) - } else { - pushStack(stack[upvalue.location]) - } - break - } - case Operation.SET_UPVALUE: { - const index = next() - if (index >= frame.closure.upvalues.length) { - throw new HogVMException(`Invalid upvalue index: ${index}`) - } - const upvalue = upvaluesById[frame.closure.upvalues[index]] - if (!isHogUpValue(upvalue)) { - throw new HogVMException(`Invalid upvalue: ${upvalue}`) - } - if (upvalue.closed) { - upvalue.value = popStack() - } else { - stack[upvalue.location] = popStack() - } - break - } - case Operation.CALL_GLOBAL: { - checkTimeout() - const name = next() - temp = next() // args.length - if (name in declaredFunctions && name !== 'toString') { - // This is for backwards compatibility. We use a closure on the stack with local functions now. - const [funcIp, argLen] = declaredFunctions[name] - frame.ip += 1 // advance for when we return - if (argLen > temp) { - for (let i = temp; i < argLen; i++) { - pushStack(null) - } - } - frame = { - ip: funcIp, - chunk: frame.chunk, - stackStart: stack.length - argLen, - argCount: argLen, - closure: newHogClosure( - newHogCallable('local', { - name: name, - argCount: argLen, - upvalueCount: 0, - ip: funcIp, - chunk: frame.chunk, - }) - ), - } satisfies CallFrame - setChunkBytecode() - callStack.push(frame) - continue // resume the loop without incrementing frame.ip - } else { - if (temp > stack.length) { - throw new HogVMException('Not enough arguments on the stack') - } - if (temp > MAX_FUNCTION_ARGS_LENGTH) { - throw new HogVMException('Too many arguments') - } + telemetry.push([ + newTime !== lastTime ? newTime - lastTime : 0, + frame.chunk !== lastChunk ? frame.chunk : '', + frame.ip, + typeof chunkBytecode[frame.ip] === 'number' + ? String(chunkBytecode[frame.ip]) + + (operations[chunkBytecode[frame.ip]] ? `/${operations[chunkBytecode[frame.ip]]}` : '') + : '???', + debug, + ]) + lastChunk = frame.chunk + lastTime = newTime + } - if (options?.functions && Object.hasOwn(options.functions, name) && options.functions[name]) { - const args = - version === 0 - ? Array(temp) - .fill(null) - .map(() => popStack()) - : stackKeepFirstElements(stack.length - temp) - pushStack(convertJSToHog(options.functions[name](...args.map(convertHogToJS)))) + const nextOp = options?.telemetry + ? () => { + ops += 1 + logTelemetry() + if ((ops & 31) === 0) { + checkTimeout() + } + } + : () => { + ops += 1 + if ((ops & 31) === 0) { + checkTimeout() + } + } + + try { + while (frame.ip < chunkBytecode.length) { + nextOp() + switch (chunkBytecode[frame.ip]) { + case null: + break + case Operation.STRING: + pushStack(next()) + break + case Operation.FLOAT: + pushStack(next()) + break + case Operation.INTEGER: + pushStack(next()) + break + case Operation.TRUE: + pushStack(true) + break + case Operation.FALSE: + pushStack(false) + break + case Operation.NULL: + pushStack(null) + break + case Operation.NOT: + pushStack(!popStack()) + break + case Operation.AND: + temp = next() + temp2 = true + for (let i = 0; i < temp; i++) { + temp2 = !!popStack() && temp2 + } + pushStack(temp2) + break + case Operation.OR: + temp = next() + temp2 = false + for (let i = 0; i < temp; i++) { + temp2 = !!popStack() || temp2 + } + pushStack(temp2) + break + case Operation.PLUS: + pushStack(Number(popStack()) + Number(popStack())) + break + case Operation.MINUS: + pushStack(Number(popStack()) - Number(popStack())) + break + case Operation.DIVIDE: + pushStack(Number(popStack()) / Number(popStack())) + break + case Operation.MULTIPLY: + pushStack(Number(popStack()) * Number(popStack())) + break + case Operation.MOD: + pushStack(Number(popStack()) % Number(popStack())) + break + case Operation.EQ: + ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) + pushStack(temp === temp2) + break + case Operation.NOT_EQ: + ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) + pushStack(temp !== temp2) + break + case Operation.GT: + ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) + pushStack(temp > temp2) + break + case Operation.GT_EQ: + ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) + pushStack(temp >= temp2) + break + case Operation.LT: + ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) + pushStack(temp < temp2) + break + case Operation.LT_EQ: + ;[temp, temp2] = unifyComparisonTypes(popStack(), popStack()) + pushStack(temp <= temp2) + break + case Operation.LIKE: + pushStack(like(popStack(), popStack())) + break + case Operation.ILIKE: + pushStack(like(popStack(), popStack(), true)) + break + case Operation.NOT_LIKE: + pushStack(!like(popStack(), popStack())) + break + case Operation.NOT_ILIKE: + pushStack(!like(popStack(), popStack(), true)) + break + case Operation.IN: + temp = popStack() + pushStack(popStack().includes(temp)) + break + case Operation.NOT_IN: + temp = popStack() + pushStack(!popStack().includes(temp)) + break + case Operation.REGEX: + temp = popStack() + pushStack(regexMatch()(popStack(), temp)) + break + case Operation.NOT_REGEX: + temp = popStack() + pushStack(!regexMatch()(popStack(), temp)) + break + case Operation.IREGEX: + temp = popStack() + pushStack(regexMatch()('(?i)' + popStack(), temp)) + break + case Operation.NOT_IREGEX: + temp = popStack() + pushStack(!regexMatch()('(?i)' + popStack(), temp)) + break + case Operation.GET_GLOBAL: { + const count = next() + const chain = [] + for (let i = 0; i < count; i++) { + chain.push(popStack()) + } + if (options?.globals && chain[0] in options.globals && Object.hasOwn(options.globals, chain[0])) { + pushStack(convertJSToHog(getNestedValue(options.globals, chain, true))) } else if ( - name !== 'toString' && - ((options?.asyncFunctions && - Object.hasOwn(options.asyncFunctions, name) && - options.asyncFunctions[name]) || - name in ASYNC_STL) + options?.asyncFunctions && + chain.length == 1 && + Object.hasOwn(options.asyncFunctions, chain[0]) && + options.asyncFunctions[chain[0]] ) { - if (asyncSteps >= maxAsyncSteps) { - throw new HogVMException(`Exceeded maximum number of async steps: ${maxAsyncSteps}`) - } - - const args = - version === 0 - ? Array(temp) - .fill(null) - .map(() => popStack()) - : stackKeepFirstElements(stack.length - temp) - - frame.ip += 1 // resume at the next address after async returns - - return { - result: undefined, - finished: false, - asyncFunctionName: name, - asyncFunctionArgs: args.map(convertHogToJS), - state: { - bytecode, - stack: stack.map(convertHogToJS), - upvalues: sortedUpValues, - callStack: callStack.map((v) => ({ - ...v, - closure: convertHogToJS(v.closure), - })), - throwStack, - declaredFunctions, - ops, - asyncSteps: asyncSteps + 1, - syncDuration: syncDuration + (Date.now() - startTime), - maxMemUsed, - }, - } satisfies ExecResult - } else if (name in STL) { - const args = - version === 0 - ? Array(temp) - .fill(null) - .map(() => popStack()) - : stackKeepFirstElements(stack.length - temp) - pushStack(STL[name].fn(args, name, options)) - } else if (name in BYTECODE_STL) { - const argNames = BYTECODE_STL[name][0] - if (argNames.length !== temp) { - throw new HogVMException(`Function ${name} requires exactly ${argNames.length} arguments`) - } - frame.ip += 1 // advance for when we return - frame = { - ip: 0, - chunk: `stl/${name}`, - stackStart: stack.length - temp, - argCount: temp, - closure: newHogClosure( + pushStack( + newHogClosure( + newHogCallable('async', { + name: chain[0], + argCount: 0, // TODO + upvalueCount: 0, + ip: -1, + chunk: 'async', + }) + ) + ) + } else if (chain.length == 1 && chain[0] in ASYNC_STL && Object.hasOwn(ASYNC_STL, chain[0])) { + pushStack( + newHogClosure( + newHogCallable('async', { + name: chain[0], + argCount: ASYNC_STL[chain[0]].maxArgs ?? 0, + upvalueCount: 0, + ip: -1, + chunk: 'async', + }) + ) + ) + } else if (chain.length == 1 && chain[0] in STL && Object.hasOwn(STL, chain[0])) { + pushStack( + newHogClosure( newHogCallable('stl', { - name, - argCount: temp, + name: chain[0], + argCount: STL[chain[0]].maxArgs ?? 0, + upvalueCount: 0, + ip: -1, + chunk: 'stl', + }) + ) + ) + } else if (chain.length == 1 && chain[0] in BYTECODE_STL && Object.hasOwn(BYTECODE_STL, chain[0])) { + pushStack( + newHogClosure( + newHogCallable('stl', { + name: chain[0], + argCount: BYTECODE_STL[chain[0]][0].length, upvalueCount: 0, ip: 0, - chunk: `stl/${name}`, + chunk: `stl/${chain[0]}`, + }) + ) + ) + } else { + throw new HogVMException(`Global variable not found: ${chain.join('.')}`) + } + break + } + case Operation.POP: + popStack() + break + case Operation.CLOSE_UPVALUE: + stackKeepFirstElements(stack.length - 1) + break + case Operation.RETURN: { + const result = popStack() + const lastCallFrame = callStack.pop() + if (callStack.length === 0 || !lastCallFrame) { + return { + result, + finished: true, + state: { ...getVMState(), bytecode: [], stack: [], callStack: [], upvalues: [] }, + } satisfies ExecResult + } + const stackStart = lastCallFrame.stackStart + stackKeepFirstElements(stackStart) + pushStack(result) + frame = callStack[callStack.length - 1] + setChunkBytecode() + continue // resume the loop without incrementing frame.ip + } + case Operation.GET_LOCAL: + temp = callStack.length > 0 ? callStack[callStack.length - 1].stackStart : 0 + pushStack(stack[next() + temp]) + break + case Operation.SET_LOCAL: + temp = (callStack.length > 0 ? callStack[callStack.length - 1].stackStart : 0) + next() + stack[temp] = popStack() + temp2 = memStack[temp] + memStack[temp] = calculateCost(stack[temp]) + memUsed += memStack[temp] - temp2 + maxMemUsed = Math.max(maxMemUsed, memUsed) + break + case Operation.GET_PROPERTY: + temp = popStack() // property + pushStack(getNestedValue(popStack(), [temp])) + break + case Operation.GET_PROPERTY_NULLISH: + temp = popStack() // property + pushStack(getNestedValue(popStack(), [temp], true)) + break + case Operation.SET_PROPERTY: + temp = popStack() // value + temp2 = popStack() // field + setNestedValue(popStack(), [temp2], temp) + break + case Operation.DICT: + temp = next() * 2 // number of elements to remove from the stack + tempArray = spliceStack2(stack.length - temp, temp) + tempMap = new Map() + for (let i = 0; i < tempArray.length; i += 2) { + tempMap.set(tempArray[i], tempArray[i + 1]) + } + pushStack(tempMap) + break + case Operation.ARRAY: + temp = next() + tempArray = spliceStack2(stack.length - temp, temp) + pushStack(tempArray) + break + case Operation.TUPLE: + temp = next() + tempArray = spliceStack2(stack.length - temp, temp) + ;(tempArray as any).__isHogTuple = true + pushStack(tempArray) + break + case Operation.JUMP: + temp = next() + frame.ip += temp + break + case Operation.JUMP_IF_FALSE: + temp = next() + if (!popStack()) { + frame.ip += temp + } + break + case Operation.JUMP_IF_STACK_NOT_NULL: + temp = next() + if (stack.length > 0 && stack[stack.length - 1] !== null) { + frame.ip += temp + } + break + case Operation.DECLARE_FN: { + // DEPRECATED + const name = next() + const argCount = next() + const bodyLength = next() + declaredFunctions[name] = [frame.ip + 1, argCount] + frame.ip += bodyLength + break + } + case Operation.CALLABLE: { + const name = next() + const argCount = next() + const upvalueCount = next() + const bodyLength = next() + const callable = newHogCallable('local', { + name, + argCount, + upvalueCount, + ip: frame.ip + 1, + chunk: frame.chunk, + }) + pushStack(callable) + frame.ip += bodyLength + break + } + case Operation.CLOSURE: { + const callable = popStack() + if (!isHogCallable(callable)) { + throw new HogVMException(`Invalid callable: ${JSON.stringify(callable)}`) + } + const upvalueCount = next() + const closureUpValues: number[] = [] + if (upvalueCount !== callable.upvalueCount) { + throw new HogVMException( + `Invalid upvalue count. Expected ${callable.upvalueCount}, got ${upvalueCount}` + ) + } + const stackStart = frame.stackStart + for (let i = 0; i < callable.upvalueCount; i++) { + const [isLocal, index] = [next(), next()] + if (isLocal) { + closureUpValues.push(captureUpValue(stackStart + index).id) + } else { + closureUpValues.push(frame.closure.upvalues[index]) + } + } + pushStack(newHogClosure(callable, closureUpValues)) + break + } + case Operation.GET_UPVALUE: { + const index = next() + if (index >= frame.closure.upvalues.length) { + throw new HogVMException(`Invalid upvalue index: ${index}`) + } + const upvalue = upvaluesById[frame.closure.upvalues[index]] + if (!isHogUpValue(upvalue)) { + throw new HogVMException(`Invalid upvalue: ${upvalue}`) + } + if (upvalue.closed) { + pushStack(upvalue.value) + } else { + pushStack(stack[upvalue.location]) + } + break + } + case Operation.SET_UPVALUE: { + const index = next() + if (index >= frame.closure.upvalues.length) { + throw new HogVMException(`Invalid upvalue index: ${index}`) + } + const upvalue = upvaluesById[frame.closure.upvalues[index]] + if (!isHogUpValue(upvalue)) { + throw new HogVMException(`Invalid upvalue: ${upvalue}`) + } + if (upvalue.closed) { + upvalue.value = popStack() + } else { + stack[upvalue.location] = popStack() + } + break + } + case Operation.CALL_GLOBAL: { + checkTimeout() + const name = next() + temp = next() // args.length + if (name in declaredFunctions && name !== 'toString') { + // This is for backwards compatibility. We use a closure on the stack with local functions now. + const [funcIp, argLen] = declaredFunctions[name] + frame.ip += 1 // advance for when we return + if (argLen > temp) { + for (let i = temp; i < argLen; i++) { + pushStack(null) + } + } + frame = { + ip: funcIp, + chunk: frame.chunk, + stackStart: stack.length - argLen, + argCount: argLen, + closure: newHogClosure( + newHogCallable('local', { + name: name, + argCount: argLen, + upvalueCount: 0, + ip: funcIp, + chunk: frame.chunk, }) ), } satisfies CallFrame @@ -688,158 +664,235 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult { callStack.push(frame) continue // resume the loop without incrementing frame.ip } else { - throw new HogVMException(`Unsupported function call: ${name}`) - } - } - break - } - case Operation.CALL_LOCAL: { - checkTimeout() - const closure = popStack() - if (!isHogClosure(closure)) { - throw new HogVMException(`Invalid closure: ${JSON.stringify(closure)}`) - } - if (!isHogCallable(closure.callable)) { - throw new HogVMException(`Invalid callable: ${JSON.stringify(closure.callable)}`) - } - temp = next() // args.length - if (temp > stack.length) { - throw new HogVMException('Not enough arguments on the stack') - } - if (temp > MAX_FUNCTION_ARGS_LENGTH) { - throw new HogVMException('Too many arguments') - } - if (closure.callable.__hogCallable__ === 'local') { - if (closure.callable.argCount > temp) { - for (let i = temp; i < closure.callable.argCount; i++) { - pushStack(null) + if (temp > stack.length) { + throw new HogVMException('Not enough arguments on the stack') + } + if (temp > MAX_FUNCTION_ARGS_LENGTH) { + throw new HogVMException('Too many arguments') + } + + if (options?.functions && Object.hasOwn(options.functions, name) && options.functions[name]) { + const args = + version === 0 + ? Array(temp) + .fill(null) + .map(() => popStack()) + : stackKeepFirstElements(stack.length - temp) + pushStack(convertJSToHog(options.functions[name](...args.map((v) => convertHogToJS(v))))) + } else if ( + name !== 'toString' && + ((options?.asyncFunctions && + Object.hasOwn(options.asyncFunctions, name) && + options.asyncFunctions[name]) || + name in ASYNC_STL) + ) { + if (asyncSteps >= maxAsyncSteps) { + throw new HogVMException(`Exceeded maximum number of async steps: ${maxAsyncSteps}`) + } + + const args = + version === 0 + ? Array(temp) + .fill(null) + .map(() => popStack()) + : stackKeepFirstElements(stack.length - temp) + + frame.ip += 1 // resume at the next address after async returns + + return { + result: undefined, + finished: false, + asyncFunctionName: name, + asyncFunctionArgs: args.map((v) => convertHogToJS(v)), + state: { + ...getVMState(), + asyncSteps: asyncSteps + 1, + }, + } satisfies ExecResult + } else if (name in STL) { + const args = + version === 0 + ? Array(temp) + .fill(null) + .map(() => popStack()) + : stackKeepFirstElements(stack.length - temp) + pushStack(STL[name].fn(args, name, options)) + } else if (name in BYTECODE_STL) { + const argNames = BYTECODE_STL[name][0] + if (argNames.length !== temp) { + throw new HogVMException( + `Function ${name} requires exactly ${argNames.length} arguments` + ) + } + frame.ip += 1 // advance for when we return + frame = { + ip: 0, + chunk: `stl/${name}`, + stackStart: stack.length - temp, + argCount: temp, + closure: newHogClosure( + newHogCallable('stl', { + name, + argCount: temp, + upvalueCount: 0, + ip: 0, + chunk: `stl/${name}`, + }) + ), + } satisfies CallFrame + setChunkBytecode() + callStack.push(frame) + continue // resume the loop without incrementing frame.ip + } else { + throw new HogVMException(`Unsupported function call: ${name}`) } - } else if (closure.callable.argCount < temp) { - throw new HogVMException( - `Too many arguments. Passed ${temp}, expected ${closure.callable.argCount}` - ) } - frame.ip += 1 // advance for when we return - frame = { - ip: closure.callable.ip, - chunk: closure.callable.chunk, - stackStart: stack.length - closure.callable.argCount, - argCount: closure.callable.argCount, - closure, - } satisfies CallFrame - setChunkBytecode() - callStack.push(frame) - continue // resume the loop without incrementing frame.ip - } else if (closure.callable.__hogCallable__ === 'stl') { - if (!closure.callable.name || !(closure.callable.name in STL)) { + break + } + case Operation.CALL_LOCAL: { + checkTimeout() + const closure = popStack() + if (!isHogClosure(closure)) { + throw new HogVMException(`Invalid closure: ${JSON.stringify(closure)}`) + } + if (!isHogCallable(closure.callable)) { + throw new HogVMException(`Invalid callable: ${JSON.stringify(closure.callable)}`) + } + temp = next() // args.length + if (temp > stack.length) { + throw new HogVMException('Not enough arguments on the stack') + } + if (temp > MAX_FUNCTION_ARGS_LENGTH) { + throw new HogVMException('Too many arguments') + } + if (closure.callable.__hogCallable__ === 'local') { + if (closure.callable.argCount > temp) { + for (let i = temp; i < closure.callable.argCount; i++) { + pushStack(null) + } + } else if (closure.callable.argCount < temp) { + throw new HogVMException( + `Too many arguments. Passed ${temp}, expected ${closure.callable.argCount}` + ) + } + frame.ip += 1 // advance for when we return + frame = { + ip: closure.callable.ip, + chunk: closure.callable.chunk, + stackStart: stack.length - closure.callable.argCount, + argCount: closure.callable.argCount, + closure, + } satisfies CallFrame + setChunkBytecode() + callStack.push(frame) + continue // resume the loop without incrementing frame.ip + } else if (closure.callable.__hogCallable__ === 'stl') { + if (!closure.callable.name || !(closure.callable.name in STL)) { + throw new HogVMException(`Unsupported function call: ${closure.callable.name}`) + } + const stlFn = STL[closure.callable.name] + if (stlFn.minArgs !== undefined && temp < stlFn.minArgs) { + throw new HogVMException( + `Function ${closure.callable.name} requires at least ${stlFn.minArgs} arguments` + ) + } + if (stlFn.maxArgs !== undefined && temp > stlFn.maxArgs) { + throw new HogVMException( + `Function ${closure.callable.name} requires at most ${stlFn.maxArgs} arguments` + ) + } + const args = Array(temp) + .fill(null) + .map(() => popStack()) + if (version > 0) { + args.reverse() + } + if (stlFn.maxArgs !== undefined && args.length < stlFn.maxArgs) { + for (let i = args.length; i < stlFn.maxArgs; i++) { + args.push(null) + } + } + pushStack(stlFn.fn(args, closure.callable.name, options)) + } else if (closure.callable.__hogCallable__ === 'async') { + if (asyncSteps >= maxAsyncSteps) { + throw new HogVMException(`Exceeded maximum number of async steps: ${maxAsyncSteps}`) + } + const args = Array(temp) + .fill(null) + .map(() => popStack()) + return { + result: undefined, + finished: false, + asyncFunctionName: closure.callable.name, + asyncFunctionArgs: args.map((v) => convertHogToJS(v)), + state: { ...getVMState(), asyncSteps: asyncSteps + 1 }, + } satisfies ExecResult + } else { throw new HogVMException(`Unsupported function call: ${closure.callable.name}`) } - const stlFn = STL[closure.callable.name] - if (stlFn.minArgs !== undefined && temp < stlFn.minArgs) { - throw new HogVMException( - `Function ${closure.callable.name} requires at least ${stlFn.minArgs} arguments` - ) - } - if (stlFn.maxArgs !== undefined && temp > stlFn.maxArgs) { - throw new HogVMException( - `Function ${closure.callable.name} requires at most ${stlFn.maxArgs} arguments` - ) - } - const args = Array(temp) - .fill(null) - .map(() => popStack()) - if (version > 0) { - args.reverse() - } - if (stlFn.maxArgs !== undefined && args.length < stlFn.maxArgs) { - for (let i = args.length; i < stlFn.maxArgs; i++) { - args.push(null) - } - } - pushStack(stlFn.fn(args, closure.callable.name, options)) - } else if (closure.callable.__hogCallable__ === 'async') { - if (asyncSteps >= maxAsyncSteps) { - throw new HogVMException(`Exceeded maximum number of async steps: ${maxAsyncSteps}`) - } - const args = Array(temp) - .fill(null) - .map(() => popStack()) - return { - result: undefined, - finished: false, - asyncFunctionName: closure.callable.name, - asyncFunctionArgs: args.map(convertHogToJS), - state: { - bytecode, - stack: stack.map(convertHogToJS), - upvalues: sortedUpValues, - callStack: callStack.map((v) => ({ - ...v, - closure: convertHogToJS(v.closure), - })), - throwStack, - declaredFunctions, - ops, - asyncSteps: asyncSteps + 1, - syncDuration: syncDuration + (Date.now() - startTime), - maxMemUsed, - }, - } satisfies ExecResult - } else { - throw new HogVMException(`Unsupported function call: ${closure.callable.name}`) + break } - break + case Operation.TRY: + throwStack.push({ + callStackLen: callStack.length, + stackLen: stack.length, + catchIp: frame.ip + 1 + next(), + }) + break + case Operation.POP_TRY: + if (throwStack.length > 0) { + throwStack.pop() + } else { + throw new HogVMException('Invalid operation POP_TRY: no try block to pop') + } + break + case Operation.THROW: { + const exception = popStack() + if (!isHogError(exception)) { + throw new HogVMException('Can not throw: value is not of type Error') + } + if (throwStack.length > 0) { + const { callStackLen, stackLen, catchIp } = throwStack.pop()! + stackKeepFirstElements(stackLen) + memUsed -= memStack.splice(stackLen).reduce((acc, val) => acc + val, 0) + callStack.splice(callStackLen) + pushStack(exception) + frame = callStack[callStack.length - 1] + setChunkBytecode() + frame.ip = catchIp + continue // resume the loop without incrementing frame.ip + } else { + throw new UncaughtHogVMException(exception.type, exception.message, exception.payload) + } + } + default: + throw new HogVMException( + `Unexpected node while running bytecode in chunk "${frame.chunk}": ${chunkBytecode[frame.ip]}` + ) } - case Operation.TRY: - throwStack.push({ - callStackLen: callStack.length, - stackLen: stack.length, - catchIp: frame.ip + 1 + next(), - }) - break - case Operation.POP_TRY: - if (throwStack.length > 0) { - throwStack.pop() - } else { - throw new HogVMException('Invalid operation POP_TRY: no try block to pop') - } - break - case Operation.THROW: { - const exception = popStack() - if (!isHogError(exception)) { - throw new HogVMException('Can not throw: value is not of type Error') - } - if (throwStack.length > 0) { - const { callStackLen, stackLen, catchIp } = throwStack.pop()! - stackKeepFirstElements(stackLen) - memUsed -= memStack.splice(stackLen).reduce((acc, val) => acc + val, 0) - callStack.splice(callStackLen) - pushStack(exception) - frame = callStack[callStack.length - 1] - setChunkBytecode() - frame.ip = catchIp - continue // resume the loop without incrementing frame.ip - } else { - throw new UncaughtHogVMException(exception.type, exception.message, exception.payload) - } - } - default: - throw new HogVMException( - `Unexpected node while running bytecode in chunk "${frame.chunk}": ${chunkBytecode[frame.ip]}` - ) + + // use "continue" to skip incrementing frame.ip each iteration + frame.ip++ } - // use "continue" to skip incrementing frame.ip each iteration - frame.ip++ - } - - if (stack.length > 1) { - throw new HogVMException('Invalid bytecode. More than one value left on stack') + if (stack.length > 1) { + throw new HogVMException('Invalid bytecode. More than one value left on stack') + } + } catch (e) { + return { result: null, finished: false, error: e, state: getVMState() } satisfies ExecResult } if (stack.length === 0) { - return { result: null, finished: true, state: getFinishedState() } satisfies ExecResult + return { + result: null, + finished: true, + state: { ...getVMState(), bytecode: [], stack: [], callStack: [], upvalues: [] }, + } satisfies ExecResult } - return { result: popStack() ?? null, finished: true, state: getFinishedState() } satisfies ExecResult + return { + result: popStack() ?? null, + finished: true, + state: { ...getVMState(), bytecode: [], stack: [], callStack: [], upvalues: [] }, + } satisfies ExecResult } diff --git a/hogvm/typescript/src/operation.ts b/hogvm/typescript/src/operation.ts index cd195059836..88303a38f86 100644 --- a/hogvm/typescript/src/operation.ts +++ b/hogvm/typescript/src/operation.ts @@ -58,3 +58,64 @@ export const enum Operation { SET_UPVALUE = 56, CLOSE_UPVALUE = 57, } + +export const operations = [ + '', + 'GET_GLOBAL', + 'CALL_GLOBAL', + 'AND', + 'OR', + 'NOT', + 'PLUS', + 'MINUS', + 'MULTIPLY', + 'DIVIDE', + 'MOD', + 'EQ', + 'NOT_EQ', + 'GT', + 'GT_EQ', + 'LT', + 'LT_EQ', + 'LIKE', + 'ILIKE', + 'NOT_LIKE', + 'NOT_ILIKE', + 'IN', + 'NOT_IN', + 'REGEX', + 'NOT_REGEX', + 'IREGEX', + 'NOT_IREGEX', + 'IN_COHORT', + 'NOT_IN_COHORT', + 'TRUE', + 'FALSE', + 'NULL', + 'STRING', + 'INTEGER', + 'FLOAT', + 'POP', + 'GET_LOCAL', + 'SET_LOCAL', + 'RETURN', + 'JUMP', + 'JUMP_IF_FALSE', + 'DECLARE_FN', + 'DICT', + 'ARRAY', + 'TUPLE', + 'GET_PROPERTY', + 'SET_PROPERTY', + 'JUMP_IF_STACK_NOT_NULL', + 'GET_PROPERTY_NULLISH', + 'THROW', + 'TRY', + 'POP_TRY', + 'CALLABLE', + 'CLOSURE', + 'CALL_LOCAL', + 'GET_UPVALUE', + 'SET_UPVALUE', + 'CLOSE_UPVALUE', +] diff --git a/hogvm/typescript/src/types.ts b/hogvm/typescript/src/types.ts index 8be269b1239..8cde60ce9cd 100644 --- a/hogvm/typescript/src/types.ts +++ b/hogvm/typescript/src/types.ts @@ -21,6 +21,8 @@ export interface VMState { syncDuration: number /** Max memory used */ maxMemUsed: number + /** Telemetry data */ + telemetry?: Telemetry[] } export interface ExecOptions { @@ -43,14 +45,31 @@ export interface ExecOptions { /** NodeJS crypto */ crypto?: typeof crypto } + /** Collecte telemetry data */ + telemetry?: boolean } +export type Telemetry = [ + /** Time from epoch in milliseconds */ + number, + /** Current chunk */ + string, + /** Current position in chunk */ + number, + /** Opcode */ + string, + /** Debug */ + string +] + export interface ExecResult { result: any finished: boolean + error?: any asyncFunctionName?: string asyncFunctionArgs?: any[] state?: VMState + telemetry?: Telemetry[] } export interface CallFrame { diff --git a/hogvm/typescript/src/utils.ts b/hogvm/typescript/src/utils.ts index 37e66259fef..03d603f0e51 100644 --- a/hogvm/typescript/src/utils.ts +++ b/hogvm/typescript/src/utils.ts @@ -89,9 +89,19 @@ export function setNestedValue(obj: any, chain: any[], value: any): void { } // Recursively convert objects to maps -export function convertJSToHog(x: any): any { +export function convertJSToHog(x: any, found?: Map): any { + if (!found) { + found = new Map() + } + if (found.has(x)) { + return found.get(x) + } if (Array.isArray(x)) { - return x.map(convertJSToHog) + const obj: any[] = [] + found.set(x, obj) + x.forEach((v) => obj.push(convertJSToHog(v, found))) + found.delete(x) + return obj } else if (typeof x === 'object' && x !== null) { if (x.__hogDateTime__) { return toHogDateTime(x.dt, x.zone) @@ -101,31 +111,47 @@ export function convertJSToHog(x: any): any { return x } const map = new Map() + found.set(x, map) for (const key in x) { - map.set(key, convertJSToHog(x[key])) + map.set(key, convertJSToHog(x[key], found)) } + found.delete(x) return map } return x } -export function convertHogToJS(x: any): any { +export function convertHogToJS(x: any, found?: Map): any { + if (!found) { + found = new Map() + } + if (found.has(x)) { + return found.get(x) + } if (x instanceof Map) { const obj: Record = {} + found.set(x, obj) x.forEach((value, key) => { - obj[key] = convertHogToJS(value) + obj[key] = convertHogToJS(value, found) }) + found.delete(x) return obj } else if (typeof x === 'object' && Array.isArray(x)) { - return x.map(convertHogToJS) + const obj: any[] = [] + found.set(x, obj) + x.forEach((v) => obj.push(convertHogToJS(v, found))) + found.delete(x) + return obj } else if (typeof x === 'object' && x !== null) { if (x.__hogDateTime__ || x.__hogDate__ || x.__hogClosure__ || x.__hogCallable__) { return x } const obj: Record = {} + found.set(x, obj) for (const key in x) { - obj[key] = convertHogToJS(x[key]) + obj[key] = convertHogToJS(x[key], found) } + found.delete(x) return obj } return x diff --git a/package.json b/package.json index d44d9449291..381d53bafd2 100644 --- a/package.json +++ b/package.json @@ -76,7 +76,7 @@ "@medv/finder": "^3.1.0", "@microlink/react-json-view": "^1.21.3", "@monaco-editor/react": "4.6.0", - "@posthog/hogvm": "^1.0.50", + "@posthog/hogvm": "^1.0.52", "@posthog/icons": "0.8.1", "@posthog/plugin-scaffold": "^1.4.4", "@react-hook/size": "^2.1.2", diff --git a/plugin-server/package.json b/plugin-server/package.json index 85cacf54c61..75b16774773 100644 --- a/plugin-server/package.json +++ b/plugin-server/package.json @@ -54,7 +54,7 @@ "@maxmind/geoip2-node": "^3.4.0", "@posthog/clickhouse": "^1.7.0", "@posthog/cyclotron": "file:../rust/cyclotron-node", - "@posthog/hogvm": "^1.0.50", + "@posthog/hogvm": "^1.0.52", "@posthog/plugin-scaffold": "1.4.4", "@sentry/node": "^7.49.0", "@sentry/profiling-node": "^0.3.0", diff --git a/plugin-server/pnpm-lock.yaml b/plugin-server/pnpm-lock.yaml index 513f5a70456..49c204f3984 100644 --- a/plugin-server/pnpm-lock.yaml +++ b/plugin-server/pnpm-lock.yaml @@ -47,8 +47,8 @@ dependencies: specifier: file:../rust/cyclotron-node version: file:../rust/cyclotron-node '@posthog/hogvm': - specifier: ^1.0.50 - version: 1.0.50(luxon@3.4.4) + specifier: ^1.0.52 + version: 1.0.52(luxon@3.4.4) '@posthog/plugin-scaffold': specifier: 1.4.4 version: 1.4.4 @@ -3119,8 +3119,8 @@ packages: engines: {node: '>=12'} dev: false - /@posthog/hogvm@1.0.50(luxon@3.4.4): - resolution: {integrity: sha512-dkq/46mkVO6xpvUzzxxJ5IrSvDwTTU/pUPJk/0DHnmLBuzAwlpwzMhNLA2XdxJhCC0giXjN7NIg9PERg615Wlw==} + /@posthog/hogvm@1.0.52(luxon@3.4.4): + resolution: {integrity: sha512-Mn/tLjgZbeKQcp1OTYkCiGD9mNybGeg07baCPJj4EHXFz1EdfSlDzjfYPOcRlpTnAHhF037eYrIocsYbMRdhSg==} peerDependencies: luxon: ^3.4.4 dependencies: diff --git a/plugin-server/src/cdp/cdp-consumers.ts b/plugin-server/src/cdp/cdp-consumers.ts index f50ae67104e..ed1c98d307d 100644 --- a/plugin-server/src/cdp/cdp-consumers.ts +++ b/plugin-server/src/cdp/cdp-consumers.ts @@ -121,7 +121,7 @@ abstract class CdpConsumerBase { void this.captureInternalPostHogEvent(id, 'hog function state changed', { state }) }) this.hogMasker = new HogMasker(this.redis) - this.hogExecutor = new HogExecutor(this.hogFunctionManager) + this.hogExecutor = new HogExecutor(this.hub, this.hogFunctionManager) const rustyHook = this.hub?.rustyHook ?? new RustyHook(this.hub) this.fetchExecutor = new FetchExecutor(this.hub, rustyHook) this.groupsManager = new GroupsManager(this.hub) diff --git a/plugin-server/src/cdp/hog-executor.ts b/plugin-server/src/cdp/hog-executor.ts index fe6365f195c..b0ed5515485 100644 --- a/plugin-server/src/cdp/hog-executor.ts +++ b/plugin-server/src/cdp/hog-executor.ts @@ -4,6 +4,8 @@ import { DateTime } from 'luxon' import { Histogram } from 'prom-client' import RE2 from 're2' +import { buildIntegerMatcher } from '../config/config' +import { Hub, ValueMatcher } from '../types' import { status } from '../utils/status' import { HogFunctionManager } from './hog-function-manager' import { @@ -65,6 +67,9 @@ export const formatInput = (bytecode: any, globals: HogFunctionInvocation['globa // NOT ALLOWED throw new Error('Input fields must be simple sync values') } + if (res.error) { + throw res.error + } return convertHogToJS(res.result) } @@ -93,7 +98,11 @@ const sanitizeLogMessage = (args: any[], sensitiveValues?: string[]): string => } export class HogExecutor { - constructor(private hogFunctionManager: HogFunctionManager) {} + private telemetryMatcher: ValueMatcher + + constructor(private hub: Hub, private hogFunctionManager: HogFunctionManager) { + this.telemetryMatcher = buildIntegerMatcher(this.hub.CDP_HOG_FILTERS_TELEMETRY_TEAMS, true) + } findMatchingFunctions(event: HogFunctionInvocationGlobals): { matchingFunctions: HogFunctionType[] @@ -112,15 +121,30 @@ export class HogExecutor { if (hogFunction.filters?.bytecode) { const start = performance.now() try { - const filterResult = execHog(hogFunction.filters.bytecode, { globals: filtersGlobals }) + const filterResult = execHog(hogFunction.filters.bytecode, { + globals: filtersGlobals, + telemetry: this.telemetryMatcher(hogFunction.team_id), + }) if (typeof filterResult.result === 'boolean' && filterResult.result) { matchingFunctions.push(hogFunction) return } + if (filterResult.error) { + status.error('🦔', `[HogExecutor] Error filtering function`, { + hogFunctionId: hogFunction.id, + hogFunctionName: hogFunction.name, + teamId: hogFunction.team_id, + error: filterResult.error.message, + result: filterResult, + }) + erroredFunctions.push(hogFunction) + return + } } catch (error) { status.error('🦔', `[HogExecutor] Error filtering function`, { hogFunctionId: hogFunction.id, hogFunctionName: hogFunction.name, + teamId: hogFunction.team_id, error: error.message, }) erroredFunctions.push(hogFunction) @@ -315,6 +339,9 @@ export class HogExecutor { }, }, }) + if (execRes.error) { + throw execRes.error + } } catch (e) { result.logs.push({ level: 'error', diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index 7f27258a61a..6a50dd81995 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -187,6 +187,7 @@ export function getDefaultConfig(): PluginsServerConfig { CDP_WATCHER_DISABLED_TEMPORARY_MAX_COUNT: 3, CDP_ASYNC_FUNCTIONS_RUSTY_HOOK_TEAMS: '', CDP_CYCLOTRON_ENABLED_TEAMS: '', + CDP_HOG_FILTERS_TELEMETRY_TEAMS: '', CDP_REDIS_PASSWORD: '', CDP_EVENT_PROCESSOR_EXECUTE_FIRST_STEP: true, CDP_REDIS_HOST: '', diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index 3abdcb496be..0cd322c3709 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -114,6 +114,7 @@ export type CdpConfig = { CDP_WATCHER_DISABLED_TEMPORARY_TTL: number // How long a function should be temporarily disabled for CDP_WATCHER_DISABLED_TEMPORARY_MAX_COUNT: number // How many times a function can be disabled before it is disabled permanently CDP_ASYNC_FUNCTIONS_RUSTY_HOOK_TEAMS: string + CDP_HOG_FILTERS_TELEMETRY_TEAMS: string CDP_CYCLOTRON_ENABLED_TEAMS: string CDP_CYCLOTRON_BATCH_SIZE: number CDP_CYCLOTRON_BATCH_DELAY_MS: number diff --git a/plugin-server/tests/cdp/cdp-api.test.ts b/plugin-server/tests/cdp/cdp-api.test.ts index ea179cc50c1..3808657e6ba 100644 --- a/plugin-server/tests/cdp/cdp-api.test.ts +++ b/plugin-server/tests/cdp/cdp-api.test.ts @@ -174,7 +174,7 @@ describe('CDP API', () => { }, { level: 'debug', - message: "Suspending function due to async function call 'fetch'. Payload: 2039 bytes", + message: "Suspending function due to async function call 'fetch'. Payload: 2064 bytes", }, { level: 'info', @@ -222,7 +222,7 @@ describe('CDP API', () => { }, { level: 'debug', - message: "Suspending function due to async function call 'fetch'. Payload: 2039 bytes", + message: "Suspending function due to async function call 'fetch'. Payload: 2064 bytes", }, { level: 'debug', diff --git a/plugin-server/tests/cdp/cdp-function-processor.test.ts b/plugin-server/tests/cdp/cdp-function-processor.test.ts index 9d8a9285e69..10b32193775 100644 --- a/plugin-server/tests/cdp/cdp-function-processor.test.ts +++ b/plugin-server/tests/cdp/cdp-function-processor.test.ts @@ -209,7 +209,7 @@ describe('CDP Function Processor', () => { ]) expect(kafkaMessages.logs.map((x) => x.value.message)).toEqual([ 'Executing function', - "Suspending function due to async function call 'fetch'. Payload: 1931 bytes", + "Suspending function due to async function call 'fetch'. Payload: 1956 bytes", 'Resuming function', 'Fetch response:, {"status":200,"body":{"success":true}}', expect.stringContaining('Function completed'), diff --git a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts index b58ed7bf366..12ae16879ea 100644 --- a/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts +++ b/plugin-server/tests/cdp/cdp-processed-events-consumer.test.ts @@ -173,7 +173,7 @@ describe('CDP Processed Events Consumer', () => { { topic: 'log_entries_test', value: { - message: "Suspending function due to async function call 'fetch'. Payload: 1931 bytes", + message: "Suspending function due to async function call 'fetch'. Payload: 1956 bytes", log_source_id: fnFetchNoFilters.id, }, }, diff --git a/plugin-server/tests/cdp/examples.ts b/plugin-server/tests/cdp/examples.ts index 831e1ef8975..d948f85e674 100644 --- a/plugin-server/tests/cdp/examples.ts +++ b/plugin-server/tests/cdp/examples.ts @@ -370,6 +370,7 @@ export const HOG_INPUTS_EXAMPLES: Record> = { no_filters: { filters: { events: [], actions: [], bytecode: ['_h', 29] } }, + broken_filters: { filters: { events: [], actions: [], bytecode: ['_H', 1, 29, 35, 35, 35] } }, pageview_or_autocapture_filter: { filters: { events: [ diff --git a/plugin-server/tests/cdp/hog-executor.test.ts b/plugin-server/tests/cdp/hog-executor.test.ts index d24183c7d68..04585d74657 100644 --- a/plugin-server/tests/cdp/hog-executor.test.ts +++ b/plugin-server/tests/cdp/hog-executor.test.ts @@ -3,9 +3,23 @@ import { DateTime } from 'luxon' import { HogExecutor } from '../../src/cdp/hog-executor' import { HogFunctionManager } from '../../src/cdp/hog-function-manager' import { HogFunctionInvocation, HogFunctionType } from '../../src/cdp/types' +import { Hub } from '../../src/types' +import { createHub } from '../../src/utils/db/hub' +import { status } from '../../src/utils/status' +import { truth } from '../helpers/truth' import { HOG_EXAMPLES, HOG_FILTERS_EXAMPLES, HOG_INPUTS_EXAMPLES } from './examples' import { createHogExecutionGlobals, createHogFunction, createInvocation } from './fixtures' +jest.mock('../../src/utils/status', () => ({ + status: { + error: jest.fn(), + info: jest.fn(), + warn: jest.fn(), + debug: jest.fn(), + updatePrompt: jest.fn(), + }, +})) + const setupFetchResponse = (invocation: HogFunctionInvocation, options?: { status?: number; body?: string }): void => { invocation.queue = 'hog' invocation.queueParameters = { @@ -25,6 +39,7 @@ const setupFetchResponse = (invocation: HogFunctionInvocation, options?: { statu describe('Hog Executor', () => { jest.setTimeout(1000) let executor: HogExecutor + let hub: Hub const mockFunctionManager = { reloadAllHogFunctions: jest.fn(), @@ -32,10 +47,11 @@ describe('Hog Executor', () => { getTeamHogFunction: jest.fn(), } - beforeEach(() => { + beforeEach(async () => { jest.useFakeTimers() jest.setSystemTime(new Date('2024-06-07T12:00:00.000Z').getTime()) - executor = new HogExecutor(mockFunctionManager as any as HogFunctionManager) + hub = await createHub() + executor = new HogExecutor(hub, mockFunctionManager as any as HogFunctionManager) }) describe('general event processing', () => { @@ -90,7 +106,7 @@ describe('Hog Executor', () => { { timestamp: expect.any(DateTime), level: 'debug', - message: "Suspending function due to async function call 'fetch'. Payload: 1847 bytes", + message: "Suspending function due to async function call 'fetch'. Payload: 1872 bytes", }, ]) }) @@ -171,7 +187,7 @@ describe('Hog Executor', () => { expect(logs.map((log) => log.message)).toMatchInlineSnapshot(` Array [ "Executing function", - "Suspending function due to async function call 'fetch'. Payload: 1847 bytes", + "Suspending function due to async function call 'fetch'. Payload: 1872 bytes", "Resuming function", "Fetch response:, {\\"status\\":200,\\"body\\":\\"success\\"}", "Function completed in 100ms. Sync: 0ms. Mem: 779 bytes. Ops: 22.", @@ -190,7 +206,7 @@ describe('Hog Executor', () => { expect(logs.map((log) => log.message)).toMatchInlineSnapshot(` Array [ "Executing function", - "Suspending function due to async function call 'fetch'. Payload: 1847 bytes", + "Suspending function due to async function call 'fetch'. Payload: 1872 bytes", "Resuming function", "Fetch response:, {\\"status\\":200,\\"body\\":{\\"foo\\":\\"bar\\"}}", "Function completed in 100ms. Sync: 0ms. Mem: 779 bytes. Ops: 22.", @@ -228,6 +244,40 @@ describe('Hog Executor', () => { expect(resultsShouldMatch.nonMatchingFunctions).toHaveLength(0) }) + it('logs telemetry', async () => { + hub = await createHub({ CDP_HOG_FILTERS_TELEMETRY_TEAMS: '*' }) + executor = new HogExecutor(hub, mockFunctionManager as any as HogFunctionManager) + + const fn = createHogFunction({ + ...HOG_EXAMPLES.simple_fetch, + ...HOG_INPUTS_EXAMPLES.simple_fetch, + ...HOG_FILTERS_EXAMPLES.broken_filters, + }) + mockFunctionManager.getTeamHogFunctions.mockReturnValue([fn]) + const resultsShouldMatch = executor.findMatchingFunctions( + createHogExecutionGlobals({ + groups: {}, + event: { + event: '$pageview', + properties: { + $current_url: 'https://posthog.com', + }, + } as any, + }) + ) + expect(resultsShouldMatch.erroredFunctions).toHaveLength(1) + expect(status.error).toHaveBeenCalledWith( + '🦔', + expect.stringContaining('Error filtering function'), + truth( + (obj) => + 'telemetry' in obj.result.state && + Array.isArray(obj.result.state.telemetry) && + obj.result.state.telemetry[0][3] === 'START' + ) + ) + }) + it('can use elements_chain_texts', () => { const fn = createHogFunction({ ...HOG_EXAMPLES.simple_fetch, diff --git a/plugin-server/tests/helpers/truth.ts b/plugin-server/tests/helpers/truth.ts new file mode 100644 index 00000000000..057e539c363 --- /dev/null +++ b/plugin-server/tests/helpers/truth.ts @@ -0,0 +1,44 @@ +export class AsymmetricMatcher { + protected sample: T + $$typeof: symbol + inverse?: boolean + + constructor(sample: T) { + this.$$typeof = Symbol.for('jest.asymmetricMatcher') + this.sample = sample + } +} + +class Truth extends AsymmetricMatcher<(value: any) => boolean> { + constructor(sample: (value: any) => boolean, inverse: boolean = false) { + if (typeof sample !== 'function') { + throw new Error('Expected is not a function') + } + super(sample) + this.inverse = inverse + } + + asymmetricMatch(other: any): boolean { + const result = this.sample(other) + + return this.inverse ? !result : result + } + + toString(): string { + return `${this.inverse ? 'Not' : ''}Truth` + } + + toAsymmetricMatcher(): string { + return `Truth<${this.sample}>` + } +} + +export const truth = (sample: (value: any) => boolean): Truth => new Truth(sample) + +export function partial(objectOrArray: T): T { + if (Array.isArray(objectOrArray)) { + return expect.arrayContaining(objectOrArray) + } else { + return expect.objectContaining(objectOrArray) + } +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 77c30fd274b..e6085ff4401 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -50,8 +50,8 @@ dependencies: specifier: 4.6.0 version: 4.6.0(monaco-editor@0.49.0)(react-dom@18.2.0)(react@18.2.0) '@posthog/hogvm': - specifier: ^1.0.50 - version: 1.0.50(luxon@3.5.0) + specifier: ^1.0.52 + version: 1.0.52(luxon@3.5.0) '@posthog/icons': specifier: 0.8.1 version: 0.8.1(react-dom@18.2.0)(react@18.2.0) @@ -5394,8 +5394,8 @@ packages: resolution: {integrity: sha512-50/17A98tWUfQ176raKiOGXuYpLyyVMkxxG6oylzL3BPOlA6ADGdK7EYunSa4I064xerltq9TGXs8HmOk5E+vw==} dev: false - /@posthog/hogvm@1.0.50(luxon@3.5.0): - resolution: {integrity: sha512-dkq/46mkVO6xpvUzzxxJ5IrSvDwTTU/pUPJk/0DHnmLBuzAwlpwzMhNLA2XdxJhCC0giXjN7NIg9PERg615Wlw==} + /@posthog/hogvm@1.0.52(luxon@3.5.0): + resolution: {integrity: sha512-Mn/tLjgZbeKQcp1OTYkCiGD9mNybGeg07baCPJj4EHXFz1EdfSlDzjfYPOcRlpTnAHhF037eYrIocsYbMRdhSg==} peerDependencies: luxon: ^3.4.4 dependencies: @@ -18314,7 +18314,7 @@ packages: react: '>=15' dependencies: react: 18.2.0 - unlayer-types: 1.95.0 + unlayer-types: 1.103.0 dev: false /react-error-boundary@3.1.4(react@18.2.0): @@ -20861,8 +20861,8 @@ packages: resolution: {integrity: sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ==} engines: {node: '>= 10.0.0'} - /unlayer-types@1.95.0: - resolution: {integrity: sha512-WsvQp85+Xl8Gggkt+dSSePawoVAnGqjJzJJcDhIQswlPejARBRzAhl5dOF1Q+LjQckhWhKNDaJ5tcOeT+PV4ew==} + /unlayer-types@1.103.0: + resolution: {integrity: sha512-aVZS7g5F6dWEoxc0dhSDqYYncu+LIMB/SerJi6u5FKVSfTWnzA2MTpjFCbGkOOi8rUiIOabeuEOfyO/WDnarJg==} dev: false /unpipe@1.0.0: