mirror of
https://github.com/PostHog/posthog.git
synced 2024-11-21 13:39:22 +01:00
feat(hogvm): remove hogvm cohort matching (#16931)
This commit is contained in:
parent
1012a0c771
commit
558b3b0e30
@ -1,15 +1,19 @@
|
||||
# HogQL bytecode changelog
|
||||
|
||||
## 2023-06-28 - In Cohort
|
||||
## 2023-06-30 - 1.0.2
|
||||
|
||||
### New async operations
|
||||
Rolled back cohort matching instructions.
|
||||
|
||||
## 2023-06-28 - 1.0.1
|
||||
|
||||
Added cohort matching instructions.
|
||||
|
||||
```bash
|
||||
IN_COHORT = 27 # [val2, val1, IREGEX] # val1 in cohort val2
|
||||
NOT_IN_COHORT = 28 # [val2, val1, NOT_IREGEX] # val1 not in cohort val2
|
||||
```
|
||||
|
||||
## 2023-06-28 - First version
|
||||
## 2023-06-28 - 1.0.0 - First version
|
||||
|
||||
### Operations added
|
||||
|
||||
@ -46,10 +50,6 @@ NULL = 31 # [NULL] # null
|
||||
STRING = 32 # [STRING, 'text'] # 'text'
|
||||
INTEGER = 33 # [INTEGER, 123] # 123
|
||||
FLOAT = 34 # [FLOAT, 123.12] # 123.01
|
||||
|
||||
# Added for completion, but not yet implemented. Stay tuned!
|
||||
IN_COHORT = 27 # [val2, val1, IREGEX] # val1 in cohort val2
|
||||
NOT_IN_COHORT = 28 # [val2, val1, NOT_IREGEX] # val1 not in cohort val2
|
||||
```
|
||||
|
||||
### Functions added
|
||||
@ -61,4 +61,4 @@ toString(val) # toString(true) == 'true'
|
||||
toInt(val) # toInt('123') == 123
|
||||
toFloat(val) # toFloat('123.2') == 123.2
|
||||
toUUID(val) # toUUID('string') == 'string'
|
||||
```
|
||||
```
|
@ -1,5 +1,5 @@
|
||||
import re
|
||||
from typing import List, Any, Dict, Callable, Optional
|
||||
from typing import List, Any, Dict
|
||||
|
||||
from hogvm.python.operation import Operation, HOGQL_BYTECODE_IDENTIFIER
|
||||
|
||||
@ -33,9 +33,7 @@ def to_concat_arg(arg) -> str:
|
||||
return str(arg)
|
||||
|
||||
|
||||
def execute_bytecode(
|
||||
bytecode: List[Any], fields: Dict[str, Any], async_operation: Optional[Callable[..., Any]] = None
|
||||
) -> Any:
|
||||
def execute_bytecode(bytecode: List[Any], fields: Dict[str, Any]) -> Any:
|
||||
try:
|
||||
stack = []
|
||||
iterator = iter(bytecode)
|
||||
@ -96,16 +94,6 @@ def execute_bytecode(
|
||||
stack.append(stack.pop() in stack.pop())
|
||||
case Operation.NOT_IN:
|
||||
stack.append(stack.pop() not in stack.pop())
|
||||
case Operation.IN_COHORT:
|
||||
if async_operation is None:
|
||||
raise HogVMException("HogVM async_operation IN_COHORT not provided")
|
||||
args = [Operation.IN_COHORT, stack.pop(), stack.pop()]
|
||||
stack.append(async_operation(*args))
|
||||
case Operation.NOT_IN_COHORT:
|
||||
if async_operation is None:
|
||||
raise HogVMException("HogVM async_operation NOT_IN_COHORT not provided")
|
||||
args = [Operation.NOT_IN_COHORT, stack.pop(), stack.pop()]
|
||||
stack.append(async_operation(*args))
|
||||
case Operation.REGEX:
|
||||
args = [stack.pop(), stack.pop()]
|
||||
stack.append(bool(re.search(re.compile(args[1]), args[0])))
|
||||
|
@ -6,7 +6,7 @@ HOGQL_BYTECODE_IDENTIFIER = "_h"
|
||||
SUPPORTED_FUNCTIONS = ("concat", "match", "toString", "toInt", "toFloat", "toUUID")
|
||||
|
||||
|
||||
class Operation(str, Enum):
|
||||
class Operation(int, Enum):
|
||||
FIELD = 1
|
||||
CALL = 2
|
||||
AND = 3
|
||||
|
@ -104,30 +104,3 @@ class TestBytecodeExecute(BaseTest):
|
||||
with self.assertRaises(Exception) as e:
|
||||
execute_bytecode([_H, op.TRUE, op.TRUE, op.NOT], {})
|
||||
self.assertEqual(str(e.exception), "Invalid bytecode. More than one value left on stack")
|
||||
|
||||
def test_async_operations(self):
|
||||
def async_operation(*args):
|
||||
if args[0] == op.IN_COHORT:
|
||||
return args[1] == "my_id" or args[2] == 2
|
||||
elif args[0] == op.NOT_IN_COHORT:
|
||||
return not (args[1] == "my_id" or args[2] == 2)
|
||||
return False
|
||||
|
||||
self.assertEqual(
|
||||
execute_bytecode([_H, op.INTEGER, 1, op.STRING, "my_id", op.IN_COHORT], {}, async_operation), True
|
||||
)
|
||||
self.assertEqual(
|
||||
execute_bytecode([_H, op.INTEGER, 1, op.STRING, "other_id", op.IN_COHORT], {}, async_operation), False
|
||||
)
|
||||
self.assertEqual(
|
||||
execute_bytecode([_H, op.INTEGER, 2, op.STRING, "other_id", op.IN_COHORT], {}, async_operation), True
|
||||
)
|
||||
self.assertEqual(
|
||||
execute_bytecode([_H, op.INTEGER, 1, op.STRING, "my_id", op.NOT_IN_COHORT], {}, async_operation), False
|
||||
)
|
||||
self.assertEqual(
|
||||
execute_bytecode([_H, op.INTEGER, 1, op.STRING, "other_id", op.NOT_IN_COHORT], {}, async_operation), True
|
||||
)
|
||||
self.assertEqual(
|
||||
execute_bytecode([_H, op.INTEGER, 2, op.STRING, "other_id", op.NOT_IN_COHORT], {}, async_operation), False
|
||||
)
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@posthog/hogvm",
|
||||
"version": "1.0.0",
|
||||
"version": "1.0.2",
|
||||
"description": "PostHog HogQL Virtual Machine",
|
||||
"types": "dist/bytecode.d.ts",
|
||||
"main": "dist/bytecode.js",
|
||||
|
@ -58,11 +58,7 @@ function toConcatArg(arg: any): string {
|
||||
return arg === null ? '' : String(arg)
|
||||
}
|
||||
|
||||
export async function executeHogQLBytecode(
|
||||
bytecode: any[],
|
||||
fields: Record<string, any>,
|
||||
asyncOperation?: (...args: any[]) => any | Promise<any>
|
||||
): Promise<any> {
|
||||
export function executeHogQLBytecode(bytecode: any[], fields: Record<string, any>): any {
|
||||
let temp: any
|
||||
const stack: any[] = []
|
||||
|
||||
@ -176,13 +172,6 @@ export async function executeHogQLBytecode(
|
||||
temp = popStack()
|
||||
stack.push(!popStack().includes(temp))
|
||||
break
|
||||
case Operation.IN_COHORT:
|
||||
case Operation.NOT_IN_COHORT:
|
||||
if (!asyncOperation) {
|
||||
throw new Error('HogVM async operation IN_COHORT not provided')
|
||||
}
|
||||
stack.push(await asyncOperation(bytecode[i], popStack(), popStack()))
|
||||
break
|
||||
case Operation.REGEX:
|
||||
temp = popStack()
|
||||
stack.push(new RegExp(popStack()).test(temp))
|
||||
|
@ -68,7 +68,10 @@ class BytecodeBuilder(Visitor):
|
||||
return [*self.visit(node.expr), Operation.NOT]
|
||||
|
||||
def visit_compare_operation(self, node: ast.CompareOperation):
|
||||
return [*self.visit(node.right), *self.visit(node.left), COMPARE_OPERATIONS[node.op]]
|
||||
operation = COMPARE_OPERATIONS[node.op]
|
||||
if operation in [Operation.IN_COHORT, Operation.NOT_IN_COHORT]:
|
||||
raise NotImplementedException("Cohort operations are not supported")
|
||||
return [*self.visit(node.right), *self.visit(node.left), operation]
|
||||
|
||||
def visit_arithmetic_operation(self, node: ast.ArithmeticOperation):
|
||||
return [*self.visit(node.right), *self.visit(node.left), ARITHMETIC_OPERATIONS[node.op]]
|
||||
@ -99,11 +102,11 @@ class BytecodeBuilder(Visitor):
|
||||
elif isinstance(node.value, str):
|
||||
return [Operation.STRING, node.value]
|
||||
else:
|
||||
raise NotImplementedException(f"Unsupported constant type: {type(node.value)}")
|
||||
raise NotImplementedException(f"Constant type `{type(node.value)}` is not supported")
|
||||
|
||||
def visit_call(self, node: ast.Call):
|
||||
if node.name not in SUPPORTED_FUNCTIONS:
|
||||
raise NotImplementedException(f"Unsupported function: {node.name}")
|
||||
raise NotImplementedException(f"HogQL function `{node.name}` is not supported")
|
||||
response = []
|
||||
for expr in reversed(node.args):
|
||||
response.extend(self.visit(expr))
|
||||
|
@ -39,8 +39,6 @@ class TestBytecode(BaseTest):
|
||||
self.assertEqual(to_bytecode("1 not ilike 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.NOT_ILIKE])
|
||||
self.assertEqual(to_bytecode("1 in 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.IN])
|
||||
self.assertEqual(to_bytecode("1 not in 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.NOT_IN])
|
||||
self.assertEqual(to_bytecode("1 in cohort 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.IN_COHORT])
|
||||
self.assertEqual(to_bytecode("1 not in cohort 2"), [_H, op.INTEGER, 2, op.INTEGER, 1, op.NOT_IN_COHORT])
|
||||
self.assertEqual(to_bytecode("'string' ~ 'regex'"), [_H, op.STRING, "regex", op.STRING, "string", op.REGEX])
|
||||
self.assertEqual(to_bytecode("'string' =~ 'regex'"), [_H, op.STRING, "regex", op.STRING, "string", op.REGEX])
|
||||
self.assertEqual(
|
||||
@ -65,3 +63,7 @@ class TestBytecode(BaseTest):
|
||||
with self.assertRaises(NotImplementedException) as e:
|
||||
to_bytecode("(select 1)")
|
||||
self.assertEqual(str(e.exception), "Visitor has no method visit_select_query")
|
||||
|
||||
with self.assertRaises(NotImplementedException) as e:
|
||||
to_bytecode("1 in cohort 2")
|
||||
self.assertEqual(str(e.exception), "Cohort operations are not supported")
|
||||
|
@ -192,6 +192,7 @@ COPY --chown=posthog:posthog ./bin ./bin/
|
||||
COPY --chown=posthog:posthog manage.py manage.py
|
||||
COPY --chown=posthog:posthog posthog posthog/
|
||||
COPY --chown=posthog:posthog ee ee/
|
||||
COPY --chown=posthog:posthog hogvm hogvm/
|
||||
|
||||
# Setup ENV.
|
||||
ENV NODE_ENV=production \
|
||||
@ -202,7 +203,7 @@ ENV NODE_ENV=production \
|
||||
# Expose container port and run entry point script.
|
||||
EXPOSE 8000
|
||||
|
||||
# Expose the port from which we serve OpenMetrics data.
|
||||
# Expose the port from which we serve OpenMetrics data.
|
||||
EXPOSE 8001
|
||||
|
||||
CMD ["./bin/docker"]
|
||||
|
Loading…
Reference in New Issue
Block a user