0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-21 13:39:22 +01:00

feat(hogvm): remove hogvm cohort matching (#16931)

This commit is contained in:
Marius Andra 2023-08-09 00:01:01 +02:00 committed by GitHub
parent 1012a0c771
commit 558b3b0e30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 25 additions and 69 deletions

View File

@ -1,15 +1,19 @@
# HogQL bytecode changelog
## 2023-06-28 - In Cohort
## 2023-06-30 - 1.0.2
### New async operations
Rolled back cohort matching instructions.
## 2023-06-28 - 1.0.1
Added cohort matching instructions.
```bash
IN_COHORT = 27 # [val2, val1, IREGEX] # val1 in cohort val2
NOT_IN_COHORT = 28 # [val2, val1, NOT_IREGEX] # val1 not in cohort val2
```
## 2023-06-28 - First version
## 2023-06-28 - 1.0.0 - First version
### Operations added
@ -46,10 +50,6 @@ NULL = 31 # [NULL] # null
STRING = 32 # [STRING, 'text'] # 'text'
INTEGER = 33 # [INTEGER, 123] # 123
FLOAT = 34 # [FLOAT, 123.12] # 123.01
# Added for completion, but not yet implemented. Stay tuned!
IN_COHORT = 27 # [val2, val1, IREGEX] # val1 in cohort val2
NOT_IN_COHORT = 28 # [val2, val1, NOT_IREGEX] # val1 not in cohort val2
```
### Functions added
@ -61,4 +61,4 @@ toString(val) # toString(true) == 'true'
toInt(val) # toInt('123') == 123
toFloat(val) # toFloat('123.2') == 123.2
toUUID(val) # toUUID('string') == 'string'
```
```

View File

@ -1,5 +1,5 @@
import re
from typing import List, Any, Dict, Callable, Optional
from typing import List, Any, Dict
from hogvm.python.operation import Operation, HOGQL_BYTECODE_IDENTIFIER
@ -33,9 +33,7 @@ def to_concat_arg(arg) -> str:
return str(arg)
def execute_bytecode(
bytecode: List[Any], fields: Dict[str, Any], async_operation: Optional[Callable[..., Any]] = None
) -> Any:
def execute_bytecode(bytecode: List[Any], fields: Dict[str, Any]) -> Any:
try:
stack = []
iterator = iter(bytecode)
@ -96,16 +94,6 @@ def execute_bytecode(
stack.append(stack.pop() in stack.pop())
case Operation.NOT_IN:
stack.append(stack.pop() not in stack.pop())
case Operation.IN_COHORT:
if async_operation is None:
raise HogVMException("HogVM async_operation IN_COHORT not provided")
args = [Operation.IN_COHORT, stack.pop(), stack.pop()]
stack.append(async_operation(*args))
case Operation.NOT_IN_COHORT:
if async_operation is None:
raise HogVMException("HogVM async_operation NOT_IN_COHORT not provided")
args = [Operation.NOT_IN_COHORT, stack.pop(), stack.pop()]
stack.append(async_operation(*args))
case Operation.REGEX:
args = [stack.pop(), stack.pop()]
stack.append(bool(re.search(re.compile(args[1]), args[0])))

View File

@ -6,7 +6,7 @@ HOGQL_BYTECODE_IDENTIFIER = "_h"
SUPPORTED_FUNCTIONS = ("concat", "match", "toString", "toInt", "toFloat", "toUUID")
class Operation(str, Enum):
class Operation(int, Enum):
FIELD = 1
CALL = 2
AND = 3

View File

@ -104,30 +104,3 @@ class TestBytecodeExecute(BaseTest):
with self.assertRaises(Exception) as e:
execute_bytecode([_H, op.TRUE, op.TRUE, op.NOT], {})
self.assertEqual(str(e.exception), "Invalid bytecode. More than one value left on stack")
def test_async_operations(self):
def async_operation(*args):
if args[0] == op.IN_COHORT:
return args[1] == "my_id" or args[2] == 2
elif args[0] == op.NOT_IN_COHORT:
return not (args[1] == "my_id" or args[2] == 2)
return False
self.assertEqual(
execute_bytecode([_H, op.INTEGER, 1, op.STRING, "my_id", op.IN_COHORT], {}, async_operation), True
)
self.assertEqual(
execute_bytecode([_H, op.INTEGER, 1, op.STRING, "other_id", op.IN_COHORT], {}, async_operation), False
)
self.assertEqual(
execute_bytecode([_H, op.INTEGER, 2, op.STRING, "other_id", op.IN_COHORT], {}, async_operation), True
)
self.assertEqual(
execute_bytecode([_H, op.INTEGER, 1, op.STRING, "my_id", op.NOT_IN_COHORT], {}, async_operation), False
)
self.assertEqual(
execute_bytecode([_H, op.INTEGER, 1, op.STRING, "other_id", op.NOT_IN_COHORT], {}, async_operation), True
)
self.assertEqual(
execute_bytecode([_H, op.INTEGER, 2, op.STRING, "other_id", op.NOT_IN_COHORT], {}, async_operation), False
)

View File

@ -1,6 +1,6 @@
{
"name": "@posthog/hogvm",
"version": "1.0.0",
"version": "1.0.2",
"description": "PostHog HogQL Virtual Machine",
"types": "dist/bytecode.d.ts",
"main": "dist/bytecode.js",

View File

@ -58,11 +58,7 @@ function toConcatArg(arg: any): string {
return arg === null ? '' : String(arg)
}
export async function executeHogQLBytecode(
bytecode: any[],
fields: Record<string, any>,
asyncOperation?: (...args: any[]) => any | Promise<any>
): Promise<any> {
export function executeHogQLBytecode(bytecode: any[], fields: Record<string, any>): any {
let temp: any
const stack: any[] = []
@ -176,13 +172,6 @@ export async function executeHogQLBytecode(
temp = popStack()
stack.push(!popStack().includes(temp))
break
case Operation.IN_COHORT:
case Operation.NOT_IN_COHORT:
if (!asyncOperation) {
throw new Error('HogVM async operation IN_COHORT not provided')
}
stack.push(await asyncOperation(bytecode[i], popStack(), popStack()))
break
case Operation.REGEX:
temp = popStack()
stack.push(new RegExp(popStack()).test(temp))

View File

@ -68,7 +68,10 @@ class BytecodeBuilder(Visitor):
return [*self.visit(node.expr), Operation.NOT]
def visit_compare_operation(self, node: ast.CompareOperation):
return [*self.visit(node.right), *self.visit(node.left), COMPARE_OPERATIONS[node.op]]
operation = COMPARE_OPERATIONS[node.op]
if operation in [Operation.IN_COHORT, Operation.NOT_IN_COHORT]:
raise NotImplementedException("Cohort operations are not supported")
return [*self.visit(node.right), *self.visit(node.left), operation]
def visit_arithmetic_operation(self, node: ast.ArithmeticOperation):
return [*self.visit(node.right), *self.visit(node.left), ARITHMETIC_OPERATIONS[node.op]]
@ -99,11 +102,11 @@ class BytecodeBuilder(Visitor):
elif isinstance(node.value, str):
return [Operation.STRING, node.value]
else:
raise NotImplementedException(f"Unsupported constant type: {type(node.value)}")
raise NotImplementedException(f"Constant type `{type(node.value)}` is not supported")
def visit_call(self, node: ast.Call):
if node.name not in SUPPORTED_FUNCTIONS:
raise NotImplementedException(f"Unsupported function: {node.name}")
raise NotImplementedException(f"HogQL function `{node.name}` is not supported")
response = []
for expr in reversed(node.args):
response.extend(self.visit(expr))

View File

@ -39,8 +39,6 @@ class TestBytecode(BaseTest):
self.assertEqual(to_bytecode("1 not ilike 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.NOT_ILIKE])
self.assertEqual(to_bytecode("1 in 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.IN])
self.assertEqual(to_bytecode("1 not in 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.NOT_IN])
self.assertEqual(to_bytecode("1 in cohort 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.IN_COHORT])
self.assertEqual(to_bytecode("1 not in cohort 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.NOT_IN_COHORT])
self.assertEqual(to_bytecode("'string' ~ 'regex'"), [_H, op.STRING, "regex", op.STRING, "string", op.REGEX])
self.assertEqual(to_bytecode("'string' =~ 'regex'"), [_H, op.STRING, "regex", op.STRING, "string", op.REGEX])
self.assertEqual(
@ -65,3 +63,7 @@ class TestBytecode(BaseTest):
with self.assertRaises(NotImplementedException) as e:
to_bytecode("(select 1)")
self.assertEqual(str(e.exception), "Visitor has no method visit_select_query")
with self.assertRaises(NotImplementedException) as e:
to_bytecode("1 in cohort 2")
self.assertEqual(str(e.exception), "Cohort operations are not supported")

View File

@ -192,6 +192,7 @@ COPY --chown=posthog:posthog ./bin ./bin/
COPY --chown=posthog:posthog manage.py manage.py
COPY --chown=posthog:posthog posthog posthog/
COPY --chown=posthog:posthog ee ee/
COPY --chown=posthog:posthog hogvm hogvm/
# Setup ENV.
ENV NODE_ENV=production \
@ -202,7 +203,7 @@ ENV NODE_ENV=production \
# Expose container port and run entry point script.
EXPOSE 8000
# Expose the port from which we serve OpenMetrics data.
# Expose the port from which we serve OpenMetrics data.
EXPOSE 8001
CMD ["./bin/docker"]