From 558b3b0e304468c12f2cdafd7c94c8f1a057e0fa Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Wed, 9 Aug 2023 00:01:01 +0200 Subject: [PATCH] feat(hogvm): remove hogvm cohort matching (#16931) --- hogvm/CHANGELOG.md | 16 ++++++++-------- hogvm/python/execute.py | 16 ++-------------- hogvm/python/operation.py | 2 +- hogvm/python/test/test_execute.py | 27 --------------------------- hogvm/typescript/package.json | 2 +- hogvm/typescript/src/bytecode.ts | 13 +------------ posthog/hogql/bytecode.py | 9 ++++++--- posthog/hogql/test/test_bytecode.py | 6 ++++-- production.Dockerfile | 3 ++- 9 files changed, 25 insertions(+), 69 deletions(-) diff --git a/hogvm/CHANGELOG.md b/hogvm/CHANGELOG.md index 7238a16429a..36b5fe14d3e 100644 --- a/hogvm/CHANGELOG.md +++ b/hogvm/CHANGELOG.md @@ -1,15 +1,19 @@ # HogQL bytecode changelog -## 2023-06-28 - In Cohort +## 2023-06-30 - 1.0.2 -### New async operations +Rolled back cohort matching instructions. + +## 2023-06-28 - 1.0.1 + +Added cohort matching instructions. ```bash IN_COHORT = 27 # [val2, val1, IREGEX] # val1 in cohort val2 NOT_IN_COHORT = 28 # [val2, val1, NOT_IREGEX] # val1 not in cohort val2 ``` -## 2023-06-28 - First version +## 2023-06-28 - 1.0.0 - First version ### Operations added @@ -46,10 +50,6 @@ NULL = 31 # [NULL] # null STRING = 32 # [STRING, 'text'] # 'text' INTEGER = 33 # [INTEGER, 123] # 123 FLOAT = 34 # [FLOAT, 123.12] # 123.01 - -# Added for completion, but not yet implemented. Stay tuned! -IN_COHORT = 27 # [val2, val1, IREGEX] # val1 in cohort val2 -NOT_IN_COHORT = 28 # [val2, val1, NOT_IREGEX] # val1 not in cohort val2 ``` ### Functions added @@ -61,4 +61,4 @@ toString(val) # toString(true) == 'true' toInt(val) # toInt('123') == 123 toFloat(val) # toFloat('123.2') == 123.2 toUUID(val) # toUUID('string') == 'string' -``` +``` \ No newline at end of file diff --git a/hogvm/python/execute.py b/hogvm/python/execute.py index 492b973a3d2..15bbbace68a 100644 --- a/hogvm/python/execute.py +++ b/hogvm/python/execute.py @@ -1,5 +1,5 @@ import re -from typing import List, Any, Dict, Callable, Optional +from typing import List, Any, Dict from hogvm.python.operation import Operation, HOGQL_BYTECODE_IDENTIFIER @@ -33,9 +33,7 @@ def to_concat_arg(arg) -> str: return str(arg) -def execute_bytecode( - bytecode: List[Any], fields: Dict[str, Any], async_operation: Optional[Callable[..., Any]] = None -) -> Any: +def execute_bytecode(bytecode: List[Any], fields: Dict[str, Any]) -> Any: try: stack = [] iterator = iter(bytecode) @@ -96,16 +94,6 @@ def execute_bytecode( stack.append(stack.pop() in stack.pop()) case Operation.NOT_IN: stack.append(stack.pop() not in stack.pop()) - case Operation.IN_COHORT: - if async_operation is None: - raise HogVMException("HogVM async_operation IN_COHORT not provided") - args = [Operation.IN_COHORT, stack.pop(), stack.pop()] - stack.append(async_operation(*args)) - case Operation.NOT_IN_COHORT: - if async_operation is None: - raise HogVMException("HogVM async_operation NOT_IN_COHORT not provided") - args = [Operation.NOT_IN_COHORT, stack.pop(), stack.pop()] - stack.append(async_operation(*args)) case Operation.REGEX: args = [stack.pop(), stack.pop()] stack.append(bool(re.search(re.compile(args[1]), args[0]))) diff --git a/hogvm/python/operation.py b/hogvm/python/operation.py index 48252f4aacc..26d6ea3e587 100644 --- a/hogvm/python/operation.py +++ b/hogvm/python/operation.py @@ -6,7 +6,7 @@ HOGQL_BYTECODE_IDENTIFIER = "_h" SUPPORTED_FUNCTIONS = ("concat", "match", "toString", "toInt", "toFloat", "toUUID") -class Operation(str, Enum): +class Operation(int, Enum): FIELD = 1 CALL = 2 AND = 3 diff --git a/hogvm/python/test/test_execute.py b/hogvm/python/test/test_execute.py index 9a59cf0535f..c99ed451f13 100644 --- a/hogvm/python/test/test_execute.py +++ b/hogvm/python/test/test_execute.py @@ -104,30 +104,3 @@ class TestBytecodeExecute(BaseTest): with self.assertRaises(Exception) as e: execute_bytecode([_H, op.TRUE, op.TRUE, op.NOT], {}) self.assertEqual(str(e.exception), "Invalid bytecode. More than one value left on stack") - - def test_async_operations(self): - def async_operation(*args): - if args[0] == op.IN_COHORT: - return args[1] == "my_id" or args[2] == 2 - elif args[0] == op.NOT_IN_COHORT: - return not (args[1] == "my_id" or args[2] == 2) - return False - - self.assertEqual( - execute_bytecode([_H, op.INTEGER, 1, op.STRING, "my_id", op.IN_COHORT], {}, async_operation), True - ) - self.assertEqual( - execute_bytecode([_H, op.INTEGER, 1, op.STRING, "other_id", op.IN_COHORT], {}, async_operation), False - ) - self.assertEqual( - execute_bytecode([_H, op.INTEGER, 2, op.STRING, "other_id", op.IN_COHORT], {}, async_operation), True - ) - self.assertEqual( - execute_bytecode([_H, op.INTEGER, 1, op.STRING, "my_id", op.NOT_IN_COHORT], {}, async_operation), False - ) - self.assertEqual( - execute_bytecode([_H, op.INTEGER, 1, op.STRING, "other_id", op.NOT_IN_COHORT], {}, async_operation), True - ) - self.assertEqual( - execute_bytecode([_H, op.INTEGER, 2, op.STRING, "other_id", op.NOT_IN_COHORT], {}, async_operation), False - ) diff --git a/hogvm/typescript/package.json b/hogvm/typescript/package.json index 09090b737b7..e0c68ff5723 100644 --- a/hogvm/typescript/package.json +++ b/hogvm/typescript/package.json @@ -1,6 +1,6 @@ { "name": "@posthog/hogvm", - "version": "1.0.0", + "version": "1.0.2", "description": "PostHog HogQL Virtual Machine", "types": "dist/bytecode.d.ts", "main": "dist/bytecode.js", diff --git a/hogvm/typescript/src/bytecode.ts b/hogvm/typescript/src/bytecode.ts index f656f25555e..d35049a298c 100644 --- a/hogvm/typescript/src/bytecode.ts +++ b/hogvm/typescript/src/bytecode.ts @@ -58,11 +58,7 @@ function toConcatArg(arg: any): string { return arg === null ? '' : String(arg) } -export async function executeHogQLBytecode( - bytecode: any[], - fields: Record, - asyncOperation?: (...args: any[]) => any | Promise -): Promise { +export function executeHogQLBytecode(bytecode: any[], fields: Record): any { let temp: any const stack: any[] = [] @@ -176,13 +172,6 @@ export async function executeHogQLBytecode( temp = popStack() stack.push(!popStack().includes(temp)) break - case Operation.IN_COHORT: - case Operation.NOT_IN_COHORT: - if (!asyncOperation) { - throw new Error('HogVM async operation IN_COHORT not provided') - } - stack.push(await asyncOperation(bytecode[i], popStack(), popStack())) - break case Operation.REGEX: temp = popStack() stack.push(new RegExp(popStack()).test(temp)) diff --git a/posthog/hogql/bytecode.py b/posthog/hogql/bytecode.py index 497195fae80..591b01f07bd 100644 --- a/posthog/hogql/bytecode.py +++ b/posthog/hogql/bytecode.py @@ -68,7 +68,10 @@ class BytecodeBuilder(Visitor): return [*self.visit(node.expr), Operation.NOT] def visit_compare_operation(self, node: ast.CompareOperation): - return [*self.visit(node.right), *self.visit(node.left), COMPARE_OPERATIONS[node.op]] + operation = COMPARE_OPERATIONS[node.op] + if operation in [Operation.IN_COHORT, Operation.NOT_IN_COHORT]: + raise NotImplementedException("Cohort operations are not supported") + return [*self.visit(node.right), *self.visit(node.left), operation] def visit_arithmetic_operation(self, node: ast.ArithmeticOperation): return [*self.visit(node.right), *self.visit(node.left), ARITHMETIC_OPERATIONS[node.op]] @@ -99,11 +102,11 @@ class BytecodeBuilder(Visitor): elif isinstance(node.value, str): return [Operation.STRING, node.value] else: - raise NotImplementedException(f"Unsupported constant type: {type(node.value)}") + raise NotImplementedException(f"Constant type `{type(node.value)}` is not supported") def visit_call(self, node: ast.Call): if node.name not in SUPPORTED_FUNCTIONS: - raise NotImplementedException(f"Unsupported function: {node.name}") + raise NotImplementedException(f"HogQL function `{node.name}` is not supported") response = [] for expr in reversed(node.args): response.extend(self.visit(expr)) diff --git a/posthog/hogql/test/test_bytecode.py b/posthog/hogql/test/test_bytecode.py index 3aff8fc325b..1f937995686 100644 --- a/posthog/hogql/test/test_bytecode.py +++ b/posthog/hogql/test/test_bytecode.py @@ -39,8 +39,6 @@ class TestBytecode(BaseTest): self.assertEqual(to_bytecode("1 not ilike 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.NOT_ILIKE]) self.assertEqual(to_bytecode("1 in 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.IN]) self.assertEqual(to_bytecode("1 not in 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.NOT_IN]) - self.assertEqual(to_bytecode("1 in cohort 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.IN_COHORT]) - self.assertEqual(to_bytecode("1 not in cohort 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.NOT_IN_COHORT]) self.assertEqual(to_bytecode("'string' ~ 'regex'"), [_H, op.STRING, "regex", op.STRING, "string", op.REGEX]) self.assertEqual(to_bytecode("'string' =~ 'regex'"), [_H, op.STRING, "regex", op.STRING, "string", op.REGEX]) self.assertEqual( @@ -65,3 +63,7 @@ class TestBytecode(BaseTest): with self.assertRaises(NotImplementedException) as e: to_bytecode("(select 1)") self.assertEqual(str(e.exception), "Visitor has no method visit_select_query") + + with self.assertRaises(NotImplementedException) as e: + to_bytecode("1 in cohort 2") + self.assertEqual(str(e.exception), "Cohort operations are not supported") diff --git a/production.Dockerfile b/production.Dockerfile index 0c6245675c2..5075ef0671b 100644 --- a/production.Dockerfile +++ b/production.Dockerfile @@ -192,6 +192,7 @@ COPY --chown=posthog:posthog ./bin ./bin/ COPY --chown=posthog:posthog manage.py manage.py COPY --chown=posthog:posthog posthog posthog/ COPY --chown=posthog:posthog ee ee/ +COPY --chown=posthog:posthog hogvm hogvm/ # Setup ENV. ENV NODE_ENV=production \ @@ -202,7 +203,7 @@ ENV NODE_ENV=production \ # Expose container port and run entry point script. EXPOSE 8000 -# Expose the port from which we serve OpenMetrics data. +# Expose the port from which we serve OpenMetrics data. EXPOSE 8001 CMD ["./bin/docker"]