mirror of
https://github.com/PostHog/posthog.git
synced 2024-11-21 13:39:22 +01:00
feat(hog): memory limits (#23564)
This commit is contained in:
parent
cf575b0129
commit
043aa16e5b
4
hogvm/__tests__/__snapshots__/printLoops.hoge
Normal file
4
hogvm/__tests__/__snapshots__/printLoops.hoge
Normal file
@ -0,0 +1,4 @@
|
||||
["_h", 32, "key", 32, "value", 32, "key2", 32, "value2", 42, 2, 32, "na", 33, 0, 33, 100, 36, 2, 15, 40, 45, 32, "na",
|
||||
36, 1, 2, "concat", 2, 37, 1, 36, 0, 36, 2, 32, "key_", 2, "concat", 2, 32, "wasted", 32, " batman!", 36, 1, 32,
|
||||
"memory: ", 2, "concat", 3, 32, "something", 36, 0, 42, 2, 46, 33, 1, 36, 2, 6, 37, 2, 39, -52, 35, 36, 0, 2, "print",
|
||||
1, 35, 36, 0, 2, "jsonStringify", 1, 36, 2, 2, "jsonParse", 1, 2, "print", 1, 35, 35, 35, 35]
|
2
hogvm/__tests__/__snapshots__/printLoops.stdout
Normal file
2
hogvm/__tests__/__snapshots__/printLoops.stdout
Normal file
File diff suppressed because one or more lines are too long
4
hogvm/__tests__/__snapshots__/printLoops2.hoge
Normal file
4
hogvm/__tests__/__snapshots__/printLoops2.hoge
Normal file
@ -0,0 +1,4 @@
|
||||
["_h", 32, "key", 32, "value", 32, "key2", 32, "value2", 42, 2, 32, "key", 32, "value", 32, "key2", 32, "value2", 42, 2,
|
||||
33, 0, 33, 30, 36, 2, 15, 40, 25, 36, 0, 36, 2, 32, "key_", 2, "concat", 2, 32, "something", 36, 1, 42, 1, 46, 33, 1,
|
||||
36, 2, 6, 37, 2, 39, -32, 35, 36, 0, 2, "print", 1, 35, 36, 0, 2, "jsonStringify", 1, 2, "jsonParse", 1, 2, "print", 1,
|
||||
35, 35, 35]
|
2
hogvm/__tests__/__snapshots__/printLoops2.stdout
Normal file
2
hogvm/__tests__/__snapshots__/printLoops2.stdout
Normal file
@ -0,0 +1,2 @@
|
||||
{'key': 'value', 'key2': 'value2', 'key_0': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_1': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_2': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_3': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_4': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_5': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_6': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_7': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_8': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_9': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_10': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_11': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_12': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_13': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_14': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_15': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_16': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_17': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_18': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_19': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_20': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_21': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_22': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_23': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_24': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_25': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_26': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_27': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_28': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_29': {'something': {'key': 'value', 'key2': 'value2'}}}
|
||||
{'key': 'value', 'key2': 'value2', 'key_0': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_1': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_2': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_3': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_4': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_5': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_6': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_7': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_8': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_9': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_10': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_11': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_12': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_13': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_14': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_15': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_16': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_17': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_18': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_19': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_20': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_21': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_22': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_23': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_24': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_25': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_26': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_27': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_28': {'something': {'key': 'value', 'key2': 'value2'}}, 'key_29': {'something': {'key': 'value', 'key2': 'value2'}}}
|
22
hogvm/__tests__/printLoops.hog
Normal file
22
hogvm/__tests__/printLoops.hog
Normal file
@ -0,0 +1,22 @@
|
||||
// Printing recursive objects.
|
||||
let obj := {'key': 'value', 'key2': 'value2'}
|
||||
let str := 'na'
|
||||
for (let i := 0; i < 100; i := i + 1) {
|
||||
str := str || 'na'
|
||||
obj[f'key_{i}'] := {
|
||||
'wasted': 'memory: ' || str || ' batman!',
|
||||
'something': obj, // something links to obj
|
||||
}
|
||||
}
|
||||
|
||||
// printing works without loops
|
||||
print(obj)
|
||||
|
||||
// this doesn't crash
|
||||
let json := jsonStringify(obj)
|
||||
|
||||
// Commented out because JSON output is slightly different in python vs nodejs
|
||||
// print(json)
|
||||
|
||||
// Should be equal to the original printed object -> nulls instead of recursive nodes
|
||||
print(jsonParse(json))
|
11
hogvm/__tests__/printLoops2.hog
Normal file
11
hogvm/__tests__/printLoops2.hog
Normal file
@ -0,0 +1,11 @@
|
||||
// Printing recursive objects.
|
||||
let root := {'key': 'value', 'key2': 'value2'}
|
||||
let leaf := {'key': 'value', 'key2': 'value2'}
|
||||
for (let i := 0; i < 30; i := i + 1) {
|
||||
root[f'key_{i}'] := {
|
||||
'something': leaf,
|
||||
}
|
||||
}
|
||||
// Should NOT replace all leaves with nulls.
|
||||
print(root)
|
||||
print(jsonParse(jsonStringify(root)))
|
@ -10,11 +10,13 @@ from hogvm.python.operation import Operation, HOGQL_BYTECODE_IDENTIFIER
|
||||
from hogvm.python.stl import STL
|
||||
from dataclasses import dataclass
|
||||
|
||||
from hogvm.python.utils import HogVMException, get_nested_value, like, set_nested_value
|
||||
from hogvm.python.utils import HogVMException, get_nested_value, like, set_nested_value, calculate_cost
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from posthog.models import Team
|
||||
|
||||
MAX_MEMORY = 64 * 1024 * 1024 # 64 MB
|
||||
|
||||
|
||||
@dataclass
|
||||
class BytecodeResult:
|
||||
@ -35,8 +37,11 @@ def execute_bytecode(
|
||||
start_time = time.time()
|
||||
last_op = len(bytecode) - 1
|
||||
stack: list = []
|
||||
mem_stack: list = []
|
||||
call_stack: list[tuple[int, int, int]] = [] # (ip, stack_start, arg_len)
|
||||
declared_functions: dict[str, tuple[int, int]] = {}
|
||||
mem_used = 0
|
||||
max_mem_used = 0
|
||||
ip = -1
|
||||
ops = 0
|
||||
stdout: list[str] = []
|
||||
@ -52,8 +57,20 @@ def execute_bytecode(
|
||||
def pop_stack():
|
||||
if not stack:
|
||||
raise HogVMException("Stack underflow")
|
||||
nonlocal mem_used
|
||||
mem_used -= mem_stack.pop()
|
||||
return stack.pop()
|
||||
|
||||
def push_stack(value):
|
||||
stack.append(value)
|
||||
mem_stack.append(calculate_cost(value))
|
||||
nonlocal mem_used
|
||||
mem_used += mem_stack[-1]
|
||||
nonlocal max_mem_used
|
||||
max_mem_used = max(mem_used, max_mem_used)
|
||||
if mem_used > MAX_MEMORY:
|
||||
raise HogVMException(f"Memory limit of {MAX_MEMORY} bytes exceeded. Tried to allocate {mem_used} bytes.")
|
||||
|
||||
if next_token() != HOGQL_BYTECODE_IDENTIFIER:
|
||||
raise HogVMException(f"Invalid bytecode. Must start with '{HOGQL_BYTECODE_IDENTIFIER}'")
|
||||
|
||||
@ -75,72 +92,72 @@ def execute_bytecode(
|
||||
case None:
|
||||
break
|
||||
case Operation.STRING:
|
||||
stack.append(next_token())
|
||||
push_stack(next_token())
|
||||
case Operation.INTEGER:
|
||||
stack.append(next_token())
|
||||
push_stack(next_token())
|
||||
case Operation.FLOAT:
|
||||
stack.append(next_token())
|
||||
push_stack(next_token())
|
||||
case Operation.TRUE:
|
||||
stack.append(True)
|
||||
push_stack(True)
|
||||
case Operation.FALSE:
|
||||
stack.append(False)
|
||||
push_stack(False)
|
||||
case Operation.NULL:
|
||||
stack.append(None)
|
||||
push_stack(None)
|
||||
case Operation.NOT:
|
||||
stack.append(not pop_stack())
|
||||
push_stack(not pop_stack())
|
||||
case Operation.AND:
|
||||
stack.append(all([pop_stack() for _ in range(next_token())])) # noqa: C419
|
||||
push_stack(all([pop_stack() for _ in range(next_token())])) # noqa: C419
|
||||
case Operation.OR:
|
||||
stack.append(any([pop_stack() for _ in range(next_token())])) # noqa: C419
|
||||
push_stack(any([pop_stack() for _ in range(next_token())])) # noqa: C419
|
||||
case Operation.PLUS:
|
||||
stack.append(pop_stack() + pop_stack())
|
||||
push_stack(pop_stack() + pop_stack())
|
||||
case Operation.MINUS:
|
||||
stack.append(pop_stack() - pop_stack())
|
||||
push_stack(pop_stack() - pop_stack())
|
||||
case Operation.DIVIDE:
|
||||
stack.append(pop_stack() / pop_stack())
|
||||
push_stack(pop_stack() / pop_stack())
|
||||
case Operation.MULTIPLY:
|
||||
stack.append(pop_stack() * pop_stack())
|
||||
push_stack(pop_stack() * pop_stack())
|
||||
case Operation.MOD:
|
||||
stack.append(pop_stack() % pop_stack())
|
||||
push_stack(pop_stack() % pop_stack())
|
||||
case Operation.EQ:
|
||||
stack.append(pop_stack() == pop_stack())
|
||||
push_stack(pop_stack() == pop_stack())
|
||||
case Operation.NOT_EQ:
|
||||
stack.append(pop_stack() != pop_stack())
|
||||
push_stack(pop_stack() != pop_stack())
|
||||
case Operation.GT:
|
||||
stack.append(pop_stack() > pop_stack())
|
||||
push_stack(pop_stack() > pop_stack())
|
||||
case Operation.GT_EQ:
|
||||
stack.append(pop_stack() >= pop_stack())
|
||||
push_stack(pop_stack() >= pop_stack())
|
||||
case Operation.LT:
|
||||
stack.append(pop_stack() < pop_stack())
|
||||
push_stack(pop_stack() < pop_stack())
|
||||
case Operation.LT_EQ:
|
||||
stack.append(pop_stack() <= pop_stack())
|
||||
push_stack(pop_stack() <= pop_stack())
|
||||
case Operation.LIKE:
|
||||
stack.append(like(pop_stack(), pop_stack()))
|
||||
push_stack(like(pop_stack(), pop_stack()))
|
||||
case Operation.ILIKE:
|
||||
stack.append(like(pop_stack(), pop_stack(), re.IGNORECASE))
|
||||
push_stack(like(pop_stack(), pop_stack(), re.IGNORECASE))
|
||||
case Operation.NOT_LIKE:
|
||||
stack.append(not like(pop_stack(), pop_stack()))
|
||||
push_stack(not like(pop_stack(), pop_stack()))
|
||||
case Operation.NOT_ILIKE:
|
||||
stack.append(not like(pop_stack(), pop_stack(), re.IGNORECASE))
|
||||
push_stack(not like(pop_stack(), pop_stack(), re.IGNORECASE))
|
||||
case Operation.IN:
|
||||
stack.append(pop_stack() in pop_stack())
|
||||
push_stack(pop_stack() in pop_stack())
|
||||
case Operation.NOT_IN:
|
||||
stack.append(pop_stack() not in pop_stack())
|
||||
push_stack(pop_stack() not in pop_stack())
|
||||
case Operation.REGEX:
|
||||
args = [pop_stack(), pop_stack()]
|
||||
stack.append(bool(re.search(re.compile(args[1]), args[0])))
|
||||
push_stack(bool(re.search(re.compile(args[1]), args[0])))
|
||||
case Operation.NOT_REGEX:
|
||||
args = [pop_stack(), pop_stack()]
|
||||
stack.append(not bool(re.search(re.compile(args[1]), args[0])))
|
||||
push_stack(not bool(re.search(re.compile(args[1]), args[0])))
|
||||
case Operation.IREGEX:
|
||||
args = [pop_stack(), pop_stack()]
|
||||
stack.append(bool(re.search(re.compile(args[1], re.RegexFlag.IGNORECASE), args[0])))
|
||||
push_stack(bool(re.search(re.compile(args[1], re.RegexFlag.IGNORECASE), args[0])))
|
||||
case Operation.NOT_IREGEX:
|
||||
args = [pop_stack(), pop_stack()]
|
||||
stack.append(not bool(re.search(re.compile(args[1], re.RegexFlag.IGNORECASE), args[0])))
|
||||
push_stack(not bool(re.search(re.compile(args[1], re.RegexFlag.IGNORECASE), args[0])))
|
||||
case Operation.GET_GLOBAL:
|
||||
chain = [pop_stack() for _ in range(next_token())]
|
||||
stack.append(deepcopy(get_nested_value(globals, chain)))
|
||||
push_stack(deepcopy(get_nested_value(globals, chain)))
|
||||
case Operation.POP:
|
||||
pop_stack()
|
||||
case Operation.RETURN:
|
||||
@ -148,22 +165,29 @@ def execute_bytecode(
|
||||
ip, stack_start, arg_len = call_stack.pop()
|
||||
response = pop_stack()
|
||||
stack = stack[0:stack_start]
|
||||
stack.append(response)
|
||||
mem_used -= sum(mem_stack[stack_start:])
|
||||
mem_stack = mem_stack[0:stack_start]
|
||||
push_stack(response)
|
||||
else:
|
||||
return BytecodeResult(result=pop_stack(), stdout=stdout, bytecode=bytecode)
|
||||
case Operation.GET_LOCAL:
|
||||
stack_start = 0 if not call_stack else call_stack[-1][1]
|
||||
stack.append(stack[next_token() + stack_start])
|
||||
push_stack(stack[next_token() + stack_start])
|
||||
case Operation.SET_LOCAL:
|
||||
stack_start = 0 if not call_stack else call_stack[-1][1]
|
||||
value = pop_stack()
|
||||
stack[next_token() + stack_start] = value
|
||||
index = next_token() + stack_start
|
||||
stack[index] = value
|
||||
last_cost = mem_stack[index]
|
||||
mem_stack[index] = calculate_cost(value)
|
||||
mem_used += mem_stack[index] - last_cost
|
||||
max_mem_used = max(mem_used, max_mem_used)
|
||||
case Operation.GET_PROPERTY:
|
||||
property = pop_stack()
|
||||
stack.append(get_nested_value(pop_stack(), [property]))
|
||||
push_stack(get_nested_value(pop_stack(), [property]))
|
||||
case Operation.GET_PROPERTY_NULLISH:
|
||||
property = pop_stack()
|
||||
stack.append(get_nested_value(pop_stack(), [property], nullish=True))
|
||||
push_stack(get_nested_value(pop_stack(), [property], nullish=True))
|
||||
case Operation.SET_PROPERTY:
|
||||
value = pop_stack()
|
||||
field = pop_stack()
|
||||
@ -173,19 +197,25 @@ def execute_bytecode(
|
||||
if count > 0:
|
||||
elems = stack[-(count * 2) :]
|
||||
stack = stack[: -(count * 2)]
|
||||
stack.append({elems[i]: elems[i + 1] for i in range(0, len(elems), 2)})
|
||||
mem_used -= sum(mem_stack[-(count * 2) :])
|
||||
mem_stack = mem_stack[: -(count * 2)]
|
||||
push_stack({elems[i]: elems[i + 1] for i in range(0, len(elems), 2)})
|
||||
else:
|
||||
stack.append({})
|
||||
push_stack({})
|
||||
case Operation.ARRAY:
|
||||
count = next_token()
|
||||
elems = stack[-count:]
|
||||
stack = stack[:-count]
|
||||
stack.append(elems)
|
||||
mem_used -= sum(mem_stack[-count:])
|
||||
mem_stack = mem_stack[:-count]
|
||||
push_stack(elems)
|
||||
case Operation.TUPLE:
|
||||
count = next_token()
|
||||
elems = stack[-count:]
|
||||
stack = stack[:-count]
|
||||
stack.append(tuple(elems))
|
||||
mem_used -= sum(mem_stack[-count:])
|
||||
mem_stack = mem_stack[:-count]
|
||||
push_stack(tuple(elems))
|
||||
case Operation.JUMP:
|
||||
count = next_token()
|
||||
ip += count
|
||||
@ -214,13 +244,13 @@ def execute_bytecode(
|
||||
args = [pop_stack() for _ in range(next_token())]
|
||||
|
||||
if functions is not None and name in functions:
|
||||
stack.append(functions[name](*args))
|
||||
push_stack(functions[name](*args))
|
||||
continue
|
||||
|
||||
if name not in STL:
|
||||
raise HogVMException(f"Unsupported function call: {name}")
|
||||
|
||||
stack.append(STL[name](name, args, team, stdout, timeout))
|
||||
push_stack(STL[name](name, args, team, stdout, timeout))
|
||||
if ip == last_op:
|
||||
break
|
||||
if debug:
|
||||
|
@ -107,9 +107,28 @@ def jsonParse(name: str, args: list[Any], team: Optional["Team"], stdout: Option
|
||||
|
||||
|
||||
def jsonStringify(name: str, args: list[Any], team: Optional["Team"], stdout: Optional[list[str]], timeout: int) -> str:
|
||||
marked = set()
|
||||
|
||||
def json_safe(obj):
|
||||
if isinstance(obj, dict) or isinstance(obj, list) or isinstance(obj, tuple):
|
||||
if id(obj) in marked:
|
||||
return None
|
||||
else:
|
||||
marked.add(id(obj))
|
||||
try:
|
||||
if isinstance(obj, dict):
|
||||
return {json_safe(k): json_safe(v) for k, v in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
return [json_safe(v) for v in obj]
|
||||
elif isinstance(obj, tuple):
|
||||
return tuple(json_safe(v) for v in obj)
|
||||
finally:
|
||||
marked.remove(id(obj))
|
||||
return obj
|
||||
|
||||
if len(args) > 1 and isinstance(args[1], int) and args[1] > 0:
|
||||
return json.dumps(args[0], indent=args[1])
|
||||
return json.dumps(args[0])
|
||||
return json.dumps(json_safe(args[0]), indent=args[1])
|
||||
return json.dumps(json_safe(args[0]))
|
||||
|
||||
|
||||
def base64Encode(name: str, args: list[Any], team: Optional["Team"], stdout: Optional[list[str]], timeout: int) -> str:
|
||||
|
@ -31,15 +31,24 @@ def escape_identifier(identifier: str | int) -> str:
|
||||
return "`{}`".format("".join(backquote_escape_chars_map.get(c, c) for c in identifier))
|
||||
|
||||
|
||||
def print_hog_value(obj):
|
||||
if isinstance(obj, list):
|
||||
return f"[{', '.join(map(print_hog_value, obj))}]"
|
||||
if isinstance(obj, dict):
|
||||
return f"{{{', '.join([f'{print_hog_value(key)}: {print_hog_value(value)}' for key, value in obj.items()])}}}"
|
||||
if isinstance(obj, tuple):
|
||||
if len(obj) < 2:
|
||||
return f"tuple({', '.join(map(print_hog_value, obj))})"
|
||||
return f"({', '.join(map(print_hog_value, obj))})"
|
||||
def print_hog_value(obj, marked: set | None = None):
|
||||
if marked is None:
|
||||
marked = set()
|
||||
if isinstance(obj, list) or isinstance(obj, dict) or isinstance(obj, tuple):
|
||||
if id(obj) in marked:
|
||||
return "null"
|
||||
marked.add(id(obj))
|
||||
try:
|
||||
if isinstance(obj, list):
|
||||
return f"[{', '.join([print_hog_value(o, marked) for o in obj])}]"
|
||||
if isinstance(obj, dict):
|
||||
return f"{{{', '.join([f'{print_hog_value(key, marked)}: {print_hog_value(value, marked)}' for key, value in obj.items()])}}}"
|
||||
if isinstance(obj, tuple):
|
||||
if len(obj) < 2:
|
||||
return f"tuple({', '.join([print_hog_value(o, marked) for o in obj])})"
|
||||
return f"({', '.join([print_hog_value(o, marked) for o in obj])})"
|
||||
finally:
|
||||
marked.remove(id(obj))
|
||||
if obj is True:
|
||||
return "true"
|
||||
if obj is False:
|
||||
|
@ -134,6 +134,143 @@ class TestBytecodeExecute:
|
||||
else:
|
||||
raise AssertionError("Expected Exception not raised")
|
||||
|
||||
def test_memory_limits_1(self):
|
||||
# let string := 'banana'
|
||||
# for (let i := 0; i < 100; i := i + 1) {
|
||||
# string := string || string
|
||||
# }
|
||||
bytecode = [
|
||||
"_h",
|
||||
32,
|
||||
"banana",
|
||||
33,
|
||||
0,
|
||||
33,
|
||||
100,
|
||||
36,
|
||||
1,
|
||||
15,
|
||||
40,
|
||||
18,
|
||||
36,
|
||||
0,
|
||||
36,
|
||||
0,
|
||||
2,
|
||||
"concat",
|
||||
2,
|
||||
37,
|
||||
0,
|
||||
33,
|
||||
1,
|
||||
36,
|
||||
1,
|
||||
6,
|
||||
37,
|
||||
1,
|
||||
39,
|
||||
-25,
|
||||
35,
|
||||
35,
|
||||
]
|
||||
try:
|
||||
execute_bytecode(bytecode, {})
|
||||
except Exception as e:
|
||||
assert str(e) == "Memory limit of 67108864 bytes exceeded. Tried to allocate 75497504 bytes."
|
||||
else:
|
||||
raise AssertionError("Expected Exception not raised")
|
||||
|
||||
def test_memory_limits_2(self):
|
||||
# let string := 'banana'
|
||||
# for (let i := 0; i < 100; i := i + 1) {
|
||||
# string := string || string
|
||||
# }
|
||||
bytecode = [
|
||||
"_h",
|
||||
32,
|
||||
"key",
|
||||
32,
|
||||
"value",
|
||||
32,
|
||||
"key2",
|
||||
32,
|
||||
"value2",
|
||||
42,
|
||||
2,
|
||||
32,
|
||||
"na",
|
||||
33,
|
||||
0,
|
||||
33,
|
||||
10000,
|
||||
36,
|
||||
2,
|
||||
15,
|
||||
40,
|
||||
52,
|
||||
33,
|
||||
16,
|
||||
36,
|
||||
2,
|
||||
15,
|
||||
40,
|
||||
9,
|
||||
36,
|
||||
1,
|
||||
36,
|
||||
1,
|
||||
2,
|
||||
"concat",
|
||||
2,
|
||||
37,
|
||||
1,
|
||||
36,
|
||||
0,
|
||||
36,
|
||||
2,
|
||||
32,
|
||||
"key_",
|
||||
2,
|
||||
"concat",
|
||||
2,
|
||||
32,
|
||||
"wasted",
|
||||
32,
|
||||
" batman!",
|
||||
36,
|
||||
1,
|
||||
32,
|
||||
"memory: ",
|
||||
2,
|
||||
"concat",
|
||||
3,
|
||||
32,
|
||||
"something",
|
||||
36,
|
||||
0,
|
||||
42,
|
||||
2,
|
||||
46,
|
||||
33,
|
||||
1,
|
||||
36,
|
||||
2,
|
||||
6,
|
||||
37,
|
||||
2,
|
||||
39,
|
||||
-59,
|
||||
35,
|
||||
35,
|
||||
35,
|
||||
]
|
||||
try:
|
||||
execute_bytecode(bytecode, {})
|
||||
except Exception as e:
|
||||
assert str(e) == "Memory limit of 67108864 bytes exceeded. Tried to allocate 67155164 bytes."
|
||||
else:
|
||||
raise AssertionError("Expected Exception not raised")
|
||||
|
||||
def test_functions(self):
|
||||
def stringify(*args):
|
||||
if args[0] == 1:
|
||||
|
@ -2,6 +2,9 @@ import re
|
||||
from typing import Any
|
||||
|
||||
|
||||
COST_PER_UNIT = 8
|
||||
|
||||
|
||||
class HogVMException(Exception):
|
||||
pass
|
||||
|
||||
@ -46,3 +49,24 @@ def set_nested_value(obj, chain, value) -> Any:
|
||||
raise HogVMException(f'Can not set property "{chain[-1]}" on object of type "{type(obj).__name__}"')
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
def calculate_cost(object, marked: set | None = None) -> int:
|
||||
if marked is None:
|
||||
marked = set()
|
||||
if isinstance(object, dict) or isinstance(object, list) or isinstance(object, tuple):
|
||||
if id(object) in marked:
|
||||
return COST_PER_UNIT
|
||||
marked.add(id(object))
|
||||
try:
|
||||
if isinstance(object, dict):
|
||||
return COST_PER_UNIT + sum(
|
||||
[calculate_cost(key, marked) + calculate_cost(value, marked) for key, value in object.items()]
|
||||
)
|
||||
elif isinstance(object, list) or isinstance(object, tuple):
|
||||
return COST_PER_UNIT + sum([calculate_cost(val, marked) for val in object])
|
||||
finally:
|
||||
marked.remove(id(object))
|
||||
elif isinstance(object, str):
|
||||
return COST_PER_UNIT + len(object)
|
||||
return COST_PER_UNIT
|
||||
|
@ -1,8 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
cd typescript
|
||||
pnpm run build
|
||||
cd ..
|
||||
|
||||
cd ..
|
||||
|
||||
rm -f hogvm/__tests__/__snapshots__/*.stdout.nodejs
|
||||
rm -f hogvm/__tests__/__snapshots__/*.stdout.python
|
||||
|
||||
for file in hogvm/__tests__/*.hog; do
|
||||
echo "Testing $file"
|
||||
|
||||
@ -21,7 +27,6 @@ for file in hogvm/__tests__/*.hog; do
|
||||
rm $basename.stdout.python
|
||||
else
|
||||
echo "Test failed"
|
||||
rm $basename.stdout.nodejs $basename.stdout.python
|
||||
fi
|
||||
set -e
|
||||
done
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@posthog/hogvm",
|
||||
"version": "1.0.18",
|
||||
"version": "1.0.20",
|
||||
"description": "PostHog Hog Virtual Machine",
|
||||
"types": "dist/index.d.ts",
|
||||
"main": "dist/index.js",
|
||||
|
@ -13,7 +13,7 @@ const tuple = (array: any[]): any[] => {
|
||||
return array
|
||||
}
|
||||
|
||||
describe('HogQL Bytecode', () => {
|
||||
describe('hogvm execute', () => {
|
||||
test('execution results', async () => {
|
||||
const globals = { properties: { foo: 'bar', nullValue: null } }
|
||||
const options = { globals }
|
||||
@ -133,6 +133,149 @@ describe('HogQL Bytecode', () => {
|
||||
expect(() => execSync(bytecode2)).toThrow('Too many arguments')
|
||||
})
|
||||
|
||||
test('memory limits 1', async () => {
|
||||
// let string := 'banana'
|
||||
// for (let i := 0; i < 100; i := i + 1) {
|
||||
// string := string || string
|
||||
// }
|
||||
const bytecode: any[] = [
|
||||
'_h',
|
||||
32,
|
||||
'banana',
|
||||
33,
|
||||
0,
|
||||
33,
|
||||
100,
|
||||
36,
|
||||
1,
|
||||
15,
|
||||
40,
|
||||
18,
|
||||
36,
|
||||
0,
|
||||
36,
|
||||
0,
|
||||
2,
|
||||
'concat',
|
||||
2,
|
||||
37,
|
||||
0,
|
||||
33,
|
||||
1,
|
||||
36,
|
||||
1,
|
||||
6,
|
||||
37,
|
||||
1,
|
||||
39,
|
||||
-25,
|
||||
35,
|
||||
35,
|
||||
]
|
||||
|
||||
await expect(execAsync(bytecode)).rejects.toThrow(
|
||||
'Memory limit of 67108864 bytes exceeded. Tried to allocate 75497504 bytes.'
|
||||
)
|
||||
})
|
||||
|
||||
test('memory limits 2', async () => {
|
||||
// // Printing recursive objects.
|
||||
// let obj := {'key': 'value', 'key2': 'value2'}
|
||||
// let str := 'na'
|
||||
// for (let i := 0; i < 10000; i := i + 1) {
|
||||
// if (i < 16) {
|
||||
// str := str || str
|
||||
// }
|
||||
// obj[f'key_{i}'] := {
|
||||
// 'wasted': 'memory: ' || str || ' batman!',
|
||||
// 'something': obj, // something links to obj
|
||||
// }
|
||||
// }
|
||||
const bytecode: any[] = [
|
||||
'_h',
|
||||
32,
|
||||
'key',
|
||||
32,
|
||||
'value',
|
||||
32,
|
||||
'key2',
|
||||
32,
|
||||
'value2',
|
||||
42,
|
||||
2,
|
||||
32,
|
||||
'na',
|
||||
33,
|
||||
0,
|
||||
33,
|
||||
10000,
|
||||
36,
|
||||
2,
|
||||
15,
|
||||
40,
|
||||
52,
|
||||
33,
|
||||
16,
|
||||
36,
|
||||
2,
|
||||
15,
|
||||
40,
|
||||
9,
|
||||
36,
|
||||
1,
|
||||
36,
|
||||
1,
|
||||
2,
|
||||
'concat',
|
||||
2,
|
||||
37,
|
||||
1,
|
||||
36,
|
||||
0,
|
||||
36,
|
||||
2,
|
||||
32,
|
||||
'key_',
|
||||
2,
|
||||
'concat',
|
||||
2,
|
||||
32,
|
||||
'wasted',
|
||||
32,
|
||||
' batman!',
|
||||
36,
|
||||
1,
|
||||
32,
|
||||
'memory: ',
|
||||
2,
|
||||
'concat',
|
||||
3,
|
||||
32,
|
||||
'something',
|
||||
36,
|
||||
0,
|
||||
42,
|
||||
2,
|
||||
46,
|
||||
33,
|
||||
1,
|
||||
36,
|
||||
2,
|
||||
6,
|
||||
37,
|
||||
2,
|
||||
39,
|
||||
-59,
|
||||
35,
|
||||
35,
|
||||
35,
|
||||
]
|
||||
|
||||
await expect(execAsync(bytecode)).rejects.toThrow(
|
||||
'Memory limit of 67108864 bytes exceeded. Tried to allocate 67155164 bytes.'
|
||||
)
|
||||
})
|
||||
|
||||
test('should execute user-defined stringify function correctly', async () => {
|
||||
const functions = {
|
||||
stringify: (arg: any) => {
|
||||
@ -383,6 +526,7 @@ describe('HogQL Bytecode', () => {
|
||||
callStack: [],
|
||||
declaredFunctions: {},
|
||||
ip: 8,
|
||||
maxMemUsed: 16,
|
||||
ops: 3,
|
||||
stack: [4.2],
|
||||
syncDuration: 0,
|
||||
|
32
hogvm/typescript/src/__tests__/utils.test.ts
Normal file
32
hogvm/typescript/src/__tests__/utils.test.ts
Normal file
@ -0,0 +1,32 @@
|
||||
import { calculateCost } from '../utils'
|
||||
|
||||
const PTR_COST = 8
|
||||
|
||||
describe('hogvm utils', () => {
|
||||
test('calculateCost', async () => {
|
||||
expect(calculateCost(1)).toBe(PTR_COST)
|
||||
expect(calculateCost('hello')).toBe(PTR_COST + 5)
|
||||
expect(calculateCost(true)).toBe(PTR_COST)
|
||||
expect(calculateCost(null)).toBe(PTR_COST)
|
||||
expect(calculateCost([])).toBe(PTR_COST)
|
||||
expect(calculateCost([1])).toBe(PTR_COST * 2)
|
||||
expect(calculateCost(['hello'])).toBe(PTR_COST * 2 + 5)
|
||||
expect(calculateCost({})).toBe(PTR_COST)
|
||||
expect(calculateCost({ key: 'value' })).toBe(PTR_COST * 3 + 3 + 5)
|
||||
expect(calculateCost(new Map([['key', 'value']]))).toBe(PTR_COST * 3 + 3 + 5)
|
||||
expect(
|
||||
calculateCost(
|
||||
new Map<any, any>([
|
||||
['key', 'value'],
|
||||
['key2', new Map<any, any>([['key', 'value']])],
|
||||
])
|
||||
)
|
||||
).toBe(PTR_COST * 7 + 3 + 5 + 4 + 3 + 5)
|
||||
})
|
||||
|
||||
test('calculateCost with cycles', async () => {
|
||||
const obj: Record<string, any> = {}
|
||||
obj['key'] = obj
|
||||
expect(calculateCost(obj)).toBe(PTR_COST * 3 + 3)
|
||||
})
|
||||
})
|
@ -2,10 +2,12 @@ import RE2 from 're2'
|
||||
|
||||
import { Operation } from './operation'
|
||||
import { ASYNC_STL, STL } from './stl/stl'
|
||||
import { convertHogToJS, convertJSToHog, getNestedValue, like, setNestedValue } from './utils'
|
||||
import { calculateCost, convertHogToJS, convertJSToHog, getNestedValue, like, setNestedValue } from './utils'
|
||||
|
||||
const DEFAULT_MAX_ASYNC_STEPS = 100
|
||||
const DEFAULT_MAX_MEMORY = 64 * 1024 * 1024 // 64 MB
|
||||
const DEFAULT_TIMEOUT_MS = 5000 // ms
|
||||
const MAX_FUNCTION_ARGS_LENGTH = 300
|
||||
|
||||
export interface VMState {
|
||||
/** Bytecode running in the VM */
|
||||
@ -24,6 +26,8 @@ export interface VMState {
|
||||
asyncSteps: number
|
||||
/** Combined duration of sync steps */
|
||||
syncDuration: number
|
||||
/** Max memory used */
|
||||
maxMemUsed: number
|
||||
}
|
||||
|
||||
export interface ExecOptions {
|
||||
@ -35,6 +39,8 @@ export interface ExecOptions {
|
||||
timeout?: number
|
||||
/** Max number of async function that can happen. When reached the function will throw */
|
||||
maxAsyncSteps?: number
|
||||
/** Memory limit in bytes. This is calculated based on the size of the VM stack. */
|
||||
memoryLimit?: number
|
||||
}
|
||||
|
||||
export interface ExecResult {
|
||||
@ -45,9 +51,6 @@ export interface ExecResult {
|
||||
state?: VMState
|
||||
}
|
||||
|
||||
/** Maximum function arguments allowed */
|
||||
const MAX_ARGS_LENGTH = 300
|
||||
|
||||
export function execSync(bytecode: any[], options?: ExecOptions): any {
|
||||
const response = exec(bytecode, options)
|
||||
if (response.finished) {
|
||||
@ -109,8 +112,12 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
|
||||
const asyncSteps = vmState ? vmState.asyncSteps : 0
|
||||
const syncDuration = vmState ? vmState.syncDuration : 0
|
||||
const stack: any[] = vmState ? vmState.stack : []
|
||||
const memStack: number[] = stack.map((s) => calculateCost(s))
|
||||
const callStack: [number, number, number][] = vmState ? vmState.callStack : []
|
||||
const declaredFunctions: Record<string, [number, number]> = vmState ? vmState.declaredFunctions : {}
|
||||
let memUsed = memStack.reduce((acc, val) => acc + val, 0)
|
||||
let maxMemUsed = Math.max(vmState ? vmState.maxMemUsed : 0, memUsed)
|
||||
const memLimit = options?.memoryLimit ?? DEFAULT_MAX_MEMORY
|
||||
let ip = vmState ? vmState.ip : 1
|
||||
let ops = vmState ? vmState.ops : 0
|
||||
const timeout = options?.timeout ?? DEFAULT_TIMEOUT_MS
|
||||
@ -120,15 +127,36 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
|
||||
if (stack.length === 0) {
|
||||
throw new Error('Invalid HogQL bytecode, stack is empty')
|
||||
}
|
||||
memUsed -= memStack.pop() ?? 0
|
||||
return stack.pop()
|
||||
}
|
||||
|
||||
function pushStack(value: any): any {
|
||||
memStack.push(calculateCost(value))
|
||||
memUsed += memStack[memStack.length - 1]
|
||||
maxMemUsed = Math.max(maxMemUsed, memUsed)
|
||||
if (memUsed > memLimit && memLimit > 0) {
|
||||
throw new Error(`Memory limit of ${memLimit} bytes exceeded. Tried to allocate ${memUsed} bytes.`)
|
||||
}
|
||||
return stack.push(value)
|
||||
}
|
||||
|
||||
function spliceStack2(start: number, deleteCount?: number): any[] {
|
||||
memUsed -= memStack.splice(start, deleteCount).reduce((acc, val) => acc + val, 0)
|
||||
return stack.splice(start, deleteCount)
|
||||
}
|
||||
function spliceStack1(start: number): any[] {
|
||||
memUsed -= memStack.splice(start).reduce((acc, val) => acc + val, 0)
|
||||
return stack.splice(start)
|
||||
}
|
||||
|
||||
function next(): any {
|
||||
if (ip >= bytecode!.length - 1) {
|
||||
throw new Error('Unexpected end of bytecode')
|
||||
}
|
||||
return bytecode![++ip]
|
||||
}
|
||||
|
||||
function checkTimeout(): void {
|
||||
if (syncDuration + Date.now() - startTime > timeout) {
|
||||
throw new Error(`Execution timed out after ${timeout / 1000} seconds. Performed ${ops} ops.`)
|
||||
@ -144,110 +172,110 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
|
||||
case null:
|
||||
break
|
||||
case Operation.STRING:
|
||||
stack.push(next())
|
||||
pushStack(next())
|
||||
break
|
||||
case Operation.FLOAT:
|
||||
stack.push(next())
|
||||
pushStack(next())
|
||||
break
|
||||
case Operation.INTEGER:
|
||||
stack.push(next())
|
||||
pushStack(next())
|
||||
break
|
||||
case Operation.TRUE:
|
||||
stack.push(true)
|
||||
pushStack(true)
|
||||
break
|
||||
case Operation.FALSE:
|
||||
stack.push(false)
|
||||
pushStack(false)
|
||||
break
|
||||
case Operation.NULL:
|
||||
stack.push(null)
|
||||
pushStack(null)
|
||||
break
|
||||
case Operation.NOT:
|
||||
stack.push(!popStack())
|
||||
pushStack(!popStack())
|
||||
break
|
||||
case Operation.AND:
|
||||
stack.push(
|
||||
Array(next())
|
||||
.fill(null)
|
||||
.map(() => popStack())
|
||||
.every(Boolean)
|
||||
)
|
||||
temp = next()
|
||||
temp2 = true
|
||||
for (let i = 0; i < temp; i++) {
|
||||
temp2 = !!popStack() && temp2
|
||||
}
|
||||
pushStack(temp2)
|
||||
break
|
||||
case Operation.OR:
|
||||
stack.push(
|
||||
Array(next())
|
||||
.fill(null)
|
||||
.map(() => popStack())
|
||||
.some(Boolean)
|
||||
)
|
||||
temp = next()
|
||||
temp2 = false
|
||||
for (let i = 0; i < temp; i++) {
|
||||
temp2 = !!popStack() || temp2
|
||||
}
|
||||
pushStack(temp2)
|
||||
break
|
||||
case Operation.PLUS:
|
||||
stack.push(Number(popStack()) + Number(popStack()))
|
||||
pushStack(Number(popStack()) + Number(popStack()))
|
||||
break
|
||||
case Operation.MINUS:
|
||||
stack.push(Number(popStack()) - Number(popStack()))
|
||||
pushStack(Number(popStack()) - Number(popStack()))
|
||||
break
|
||||
case Operation.DIVIDE:
|
||||
stack.push(Number(popStack()) / Number(popStack()))
|
||||
pushStack(Number(popStack()) / Number(popStack()))
|
||||
break
|
||||
case Operation.MULTIPLY:
|
||||
stack.push(Number(popStack()) * Number(popStack()))
|
||||
pushStack(Number(popStack()) * Number(popStack()))
|
||||
break
|
||||
case Operation.MOD:
|
||||
stack.push(Number(popStack()) % Number(popStack()))
|
||||
pushStack(Number(popStack()) % Number(popStack()))
|
||||
break
|
||||
case Operation.EQ:
|
||||
stack.push(popStack() === popStack())
|
||||
pushStack(popStack() === popStack())
|
||||
break
|
||||
case Operation.NOT_EQ:
|
||||
stack.push(popStack() !== popStack())
|
||||
pushStack(popStack() !== popStack())
|
||||
break
|
||||
case Operation.GT:
|
||||
stack.push(popStack() > popStack())
|
||||
pushStack(popStack() > popStack())
|
||||
break
|
||||
case Operation.GT_EQ:
|
||||
stack.push(popStack() >= popStack())
|
||||
pushStack(popStack() >= popStack())
|
||||
break
|
||||
case Operation.LT:
|
||||
stack.push(popStack() < popStack())
|
||||
pushStack(popStack() < popStack())
|
||||
break
|
||||
case Operation.LT_EQ:
|
||||
stack.push(popStack() <= popStack())
|
||||
pushStack(popStack() <= popStack())
|
||||
break
|
||||
case Operation.LIKE:
|
||||
stack.push(like(popStack(), popStack()))
|
||||
pushStack(like(popStack(), popStack()))
|
||||
break
|
||||
case Operation.ILIKE:
|
||||
stack.push(like(popStack(), popStack(), true))
|
||||
pushStack(like(popStack(), popStack(), true))
|
||||
break
|
||||
case Operation.NOT_LIKE:
|
||||
stack.push(!like(popStack(), popStack()))
|
||||
pushStack(!like(popStack(), popStack()))
|
||||
break
|
||||
case Operation.NOT_ILIKE:
|
||||
stack.push(!like(popStack(), popStack(), true))
|
||||
pushStack(!like(popStack(), popStack(), true))
|
||||
break
|
||||
case Operation.IN:
|
||||
temp = popStack()
|
||||
stack.push(popStack().includes(temp))
|
||||
pushStack(popStack().includes(temp))
|
||||
break
|
||||
case Operation.NOT_IN:
|
||||
temp = popStack()
|
||||
stack.push(!popStack().includes(temp))
|
||||
pushStack(!popStack().includes(temp))
|
||||
break
|
||||
case Operation.REGEX:
|
||||
temp = popStack()
|
||||
stack.push(new RE2(popStack()).test(temp))
|
||||
pushStack(new RE2(popStack()).test(temp))
|
||||
break
|
||||
case Operation.NOT_REGEX:
|
||||
temp = popStack()
|
||||
stack.push(!new RE2(popStack()).test(temp))
|
||||
pushStack(!new RE2(popStack()).test(temp))
|
||||
break
|
||||
case Operation.IREGEX:
|
||||
temp = popStack()
|
||||
stack.push(new RE2(popStack(), 'i').test(temp))
|
||||
pushStack(new RE2(popStack(), 'i').test(temp))
|
||||
break
|
||||
case Operation.NOT_IREGEX:
|
||||
temp = popStack()
|
||||
stack.push(!new RE2(popStack(), 'i').test(temp))
|
||||
pushStack(!new RE2(popStack(), 'i').test(temp))
|
||||
break
|
||||
case Operation.GET_GLOBAL: {
|
||||
const count = next()
|
||||
@ -255,7 +283,7 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
|
||||
for (let i = 0; i < count; i++) {
|
||||
chain.push(popStack())
|
||||
}
|
||||
stack.push(options?.globals ? convertJSToHog(getNestedValue(options.globals, chain)) : null)
|
||||
pushStack(options?.globals ? convertJSToHog(getNestedValue(options.globals, chain)) : null)
|
||||
break
|
||||
}
|
||||
case Operation.POP:
|
||||
@ -265,8 +293,8 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
|
||||
if (callStack.length > 0) {
|
||||
const [newIp, stackStart, _] = callStack.pop()!
|
||||
const response = popStack()
|
||||
stack.splice(stackStart)
|
||||
stack.push(response)
|
||||
spliceStack1(stackStart)
|
||||
pushStack(response)
|
||||
ip = newIp
|
||||
break
|
||||
} else {
|
||||
@ -277,19 +305,23 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
|
||||
}
|
||||
case Operation.GET_LOCAL:
|
||||
temp = callStack.length > 0 ? callStack[callStack.length - 1][1] : 0
|
||||
stack.push(stack[next() + temp])
|
||||
pushStack(stack[next() + temp])
|
||||
break
|
||||
case Operation.SET_LOCAL:
|
||||
temp = callStack.length > 0 ? callStack[callStack.length - 1][1] : 0
|
||||
stack[next() + temp] = popStack()
|
||||
temp = (callStack.length > 0 ? callStack[callStack.length - 1][1] : 0) + next()
|
||||
stack[temp] = popStack()
|
||||
temp2 = memStack[temp]
|
||||
memStack[temp] = calculateCost(stack[temp])
|
||||
memUsed += memStack[temp] - temp2
|
||||
maxMemUsed = Math.max(maxMemUsed, memUsed)
|
||||
break
|
||||
case Operation.GET_PROPERTY:
|
||||
temp = popStack() // property
|
||||
stack.push(getNestedValue(popStack(), [temp]))
|
||||
pushStack(getNestedValue(popStack(), [temp]))
|
||||
break
|
||||
case Operation.GET_PROPERTY_NULLISH:
|
||||
temp = popStack() // property
|
||||
stack.push(getNestedValue(popStack(), [temp], true))
|
||||
pushStack(getNestedValue(popStack(), [temp], true))
|
||||
break
|
||||
case Operation.SET_PROPERTY:
|
||||
temp = popStack() // value
|
||||
@ -298,23 +330,23 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
|
||||
break
|
||||
case Operation.DICT:
|
||||
temp = next() * 2 // number of elements to remove from the stack
|
||||
tempArray = stack.splice(stack.length - temp, temp)
|
||||
tempArray = spliceStack2(stack.length - temp, temp)
|
||||
tempMap = new Map()
|
||||
for (let i = 0; i < tempArray.length; i += 2) {
|
||||
tempMap.set(tempArray[i], tempArray[i + 1])
|
||||
}
|
||||
stack.push(tempMap)
|
||||
pushStack(tempMap)
|
||||
break
|
||||
case Operation.ARRAY:
|
||||
temp = next()
|
||||
tempArray = stack.splice(stack.length - temp, temp)
|
||||
stack.push(tempArray)
|
||||
tempArray = spliceStack2(stack.length - temp, temp)
|
||||
pushStack(tempArray)
|
||||
break
|
||||
case Operation.TUPLE:
|
||||
temp = next()
|
||||
tempArray = stack.splice(stack.length - temp, temp)
|
||||
tempArray = spliceStack2(stack.length - temp, temp)
|
||||
;(tempArray as any).__isHogTuple = true
|
||||
stack.push(tempArray)
|
||||
pushStack(tempArray)
|
||||
break
|
||||
case Operation.JUMP:
|
||||
temp = next()
|
||||
@ -353,14 +385,14 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
|
||||
if (temp > stack.length) {
|
||||
throw new Error('Not enough arguments on the stack')
|
||||
}
|
||||
if (temp > MAX_ARGS_LENGTH) {
|
||||
if (temp > MAX_FUNCTION_ARGS_LENGTH) {
|
||||
throw new Error('Too many arguments')
|
||||
}
|
||||
const args = Array(temp)
|
||||
.fill(null)
|
||||
.map(() => popStack())
|
||||
if (options?.functions && options.functions.hasOwnProperty(name) && options.functions[name]) {
|
||||
stack.push(convertJSToHog(options.functions[name](...args.map(convertHogToJS))))
|
||||
pushStack(convertJSToHog(options.functions[name](...args.map(convertHogToJS))))
|
||||
} else if (
|
||||
name !== 'toString' &&
|
||||
((options?.asyncFunctions &&
|
||||
@ -386,10 +418,11 @@ export function exec(code: any[] | VMState, options?: ExecOptions): ExecResult {
|
||||
ops,
|
||||
asyncSteps: asyncSteps + 1,
|
||||
syncDuration: syncDuration + (Date.now() - startTime),
|
||||
maxMemUsed,
|
||||
},
|
||||
} satisfies ExecResult
|
||||
} else if (name in STL) {
|
||||
stack.push(STL[name](args, name, timeout))
|
||||
pushStack(STL[name](args, name, timeout))
|
||||
} else {
|
||||
throw new Error(`Unsupported function call: ${name}`)
|
||||
}
|
||||
|
@ -39,34 +39,41 @@ export function escapeIdentifier(identifier: string | number): string {
|
||||
.join('')}\``
|
||||
}
|
||||
|
||||
export function printHogValue(obj: any): string {
|
||||
if (Array.isArray(obj)) {
|
||||
if ((obj as any).__isHogTuple) {
|
||||
if (obj.length < 2) {
|
||||
return `tuple(${obj.map(printHogValue).join(', ')})`
|
||||
}
|
||||
return `(${obj.map(printHogValue).join(', ')})`
|
||||
} else {
|
||||
return `[${obj.map(printHogValue).join(', ')}]`
|
||||
}
|
||||
}
|
||||
if (obj instanceof Map) {
|
||||
return `{${Array.from(obj.entries())
|
||||
.map(([key, value]) => `${printHogValue(key)}: ${printHogValue(value)}`)
|
||||
.join(', ')}}`
|
||||
export function printHogValue(obj: any, marked: Set<any> | undefined = undefined): string {
|
||||
if (!marked) {
|
||||
marked = new Set()
|
||||
}
|
||||
if (typeof obj === 'object' && obj !== null) {
|
||||
return `{${Object.entries(obj)
|
||||
.map(([key, value]) => `${printHogValue(key)}: ${printHogValue(value)}`)
|
||||
.join(', ')}}`
|
||||
}
|
||||
if (typeof obj === 'boolean') {
|
||||
if (marked.has(obj)) {
|
||||
return 'null'
|
||||
}
|
||||
marked.add(obj)
|
||||
try {
|
||||
if (Array.isArray(obj)) {
|
||||
if ((obj as any).__isHogTuple) {
|
||||
if (obj.length < 2) {
|
||||
return `tuple(${obj.map((o) => printHogValue(o, marked)).join(', ')})`
|
||||
}
|
||||
return `(${obj.map((o) => printHogValue(o, marked)).join(', ')})`
|
||||
}
|
||||
return `[${obj.map((o) => printHogValue(o, marked)).join(', ')}]`
|
||||
}
|
||||
if (obj instanceof Map) {
|
||||
return `{${Array.from(obj.entries())
|
||||
.map(([key, value]) => `${printHogValue(key, marked)}: ${printHogValue(value, marked)}`)
|
||||
.join(', ')}}`
|
||||
}
|
||||
return `{${Object.entries(obj)
|
||||
.map(([key, value]) => `${printHogValue(key, marked)}: ${printHogValue(value, marked)}`)
|
||||
.join(', ')}}`
|
||||
} finally {
|
||||
marked.delete(obj)
|
||||
}
|
||||
} else if (typeof obj === 'boolean') {
|
||||
return obj ? 'true' : 'false'
|
||||
}
|
||||
if (obj === null) {
|
||||
} else if (obj === null) {
|
||||
return 'null'
|
||||
}
|
||||
if (typeof obj === 'string') {
|
||||
} else if (typeof obj === 'string') {
|
||||
return escapeString(obj)
|
||||
}
|
||||
return obj.toString()
|
||||
|
@ -80,21 +80,35 @@ export const STL: Record<string, (args: any[], name: string, timeout: number) =>
|
||||
},
|
||||
jsonStringify: (args) => {
|
||||
// Recursively convert maps to objects
|
||||
function convert(x: any): any {
|
||||
if (x instanceof Map) {
|
||||
const obj: Record<string, any> = {}
|
||||
x.forEach((value, key) => {
|
||||
obj[key] = convert(value)
|
||||
})
|
||||
return obj
|
||||
} else if (typeof x === 'object' && Array.isArray(x)) {
|
||||
return x.map(convert)
|
||||
} else if (typeof x === 'object' && x !== null) {
|
||||
const obj: Record<string, any> = {}
|
||||
for (const key in x) {
|
||||
obj[key] = convert(x[key])
|
||||
function convert(x: any, marked?: Set<any>): any {
|
||||
if (!marked) {
|
||||
marked = new Set()
|
||||
}
|
||||
if (typeof x === 'object' && x !== null) {
|
||||
if (marked.has(x)) {
|
||||
return null
|
||||
}
|
||||
marked.add(x)
|
||||
try {
|
||||
if (x instanceof Map) {
|
||||
const obj: Record<string, any> = {}
|
||||
x.forEach((value, key) => {
|
||||
obj[convert(key, marked)] = convert(value, marked)
|
||||
})
|
||||
return obj
|
||||
}
|
||||
if (typeof x === 'object' && Array.isArray(x)) {
|
||||
return x.map((v) => convert(v, marked))
|
||||
}
|
||||
|
||||
const obj: Record<string, any> = {}
|
||||
for (const key in x) {
|
||||
obj[key] = convert(x[key], marked)
|
||||
}
|
||||
return obj
|
||||
} finally {
|
||||
marked.delete(x)
|
||||
}
|
||||
return obj
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
@ -1,3 +1,6 @@
|
||||
/** Fixed cost per object in memory */
|
||||
const COST_PER_UNIT = 8
|
||||
|
||||
export function like(string: string, pattern: string, caseInsensitive = false): boolean {
|
||||
pattern = String(pattern)
|
||||
.replaceAll(/[-/\\^$*+?.()|[\]{}]/g, '\\$&')
|
||||
@ -79,3 +82,40 @@ export function convertHogToJS(x: any): any {
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
export function calculateCost(object: any, marked: Set<any> | undefined = undefined): any {
|
||||
if (!marked) {
|
||||
marked = new Set()
|
||||
}
|
||||
if (typeof object === 'object' && object !== null) {
|
||||
if (marked.has(object)) {
|
||||
return COST_PER_UNIT
|
||||
}
|
||||
marked.add(object)
|
||||
try {
|
||||
if (object instanceof Map) {
|
||||
return (
|
||||
COST_PER_UNIT +
|
||||
Array.from(object.keys()).reduce(
|
||||
(acc, key) => acc + calculateCost(key, marked) + calculateCost(object.get(key), marked),
|
||||
0
|
||||
)
|
||||
)
|
||||
} else if (Array.isArray(object)) {
|
||||
return COST_PER_UNIT + object.reduce((acc, val) => acc + calculateCost(val, marked), 0)
|
||||
}
|
||||
return (
|
||||
COST_PER_UNIT +
|
||||
Object.keys(object).reduce(
|
||||
(acc, key) => acc + calculateCost(key, marked) + calculateCost(object[key], marked),
|
||||
0
|
||||
)
|
||||
)
|
||||
} finally {
|
||||
marked.delete(object)
|
||||
}
|
||||
} else if (typeof object === 'string') {
|
||||
return COST_PER_UNIT + object.length
|
||||
}
|
||||
return COST_PER_UNIT
|
||||
}
|
||||
|
@ -50,7 +50,7 @@
|
||||
"@google-cloud/storage": "^5.8.5",
|
||||
"@maxmind/geoip2-node": "^3.4.0",
|
||||
"@posthog/clickhouse": "^1.7.0",
|
||||
"@posthog/hogvm": "^1.0.18",
|
||||
"@posthog/hogvm": "^1.0.20",
|
||||
"@posthog/plugin-scaffold": "1.4.4",
|
||||
"@sentry/node": "^7.49.0",
|
||||
"@sentry/profiling-node": "^0.3.0",
|
||||
|
@ -44,8 +44,8 @@ dependencies:
|
||||
specifier: ^1.7.0
|
||||
version: 1.7.0
|
||||
'@posthog/hogvm':
|
||||
specifier: ^1.0.18
|
||||
version: 1.0.18(re2@1.20.3)
|
||||
specifier: ^1.0.20
|
||||
version: 1.0.20(re2@1.20.3)
|
||||
'@posthog/plugin-scaffold':
|
||||
specifier: 1.4.4
|
||||
version: 1.4.4
|
||||
@ -3110,8 +3110,8 @@ packages:
|
||||
engines: {node: '>=12'}
|
||||
dev: false
|
||||
|
||||
/@posthog/hogvm@1.0.18(re2@1.20.3):
|
||||
resolution: {integrity: sha512-h0C9AlpfDRYlSzmXFOZKXR5x5UD+sgXkiQ6CUCoBQX0TjxMYE7hU1lZ7cgWQTYWJwSHeLp5RTUN0BjRDEFhj/Q==}
|
||||
/@posthog/hogvm@1.0.20(re2@1.20.3):
|
||||
resolution: {integrity: sha512-NSy4EbjR0SyNCkHHA2wU7psI9oVQtEkxl1Tr8NBYxa2QBnmAS+yWcST0MhFIX38GrkYVWTsgrZUeSnEQTsvMxg==}
|
||||
peerDependencies:
|
||||
re2: ^1.21.3
|
||||
dependencies:
|
||||
|
Loading…
Reference in New Issue
Block a user