0
0
mirror of https://github.com/PostHog/posthog.git synced 2024-11-22 08:40:03 +01:00
posthog/hogql_parser/parser.cpp
Sandy Spicer 447a18930f
feat: add "INTERSECT" and "EXCEPT" to HogQL (#25737)
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
2024-10-29 11:31:56 -07:00

2849 lines
97 KiB
C++

#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <boost/algorithm/string.hpp>
#include <string>
#include "HogQLLexer.h"
#include "HogQLParser.h"
#include "HogQLParserBaseVisitor.h"
#include "error.h"
#include "parser.h"
#include "string.h"
#define VISIT(RULE) any visit##RULE(HogQLParser::RULE##Context* ctx) override
#define VISIT_UNSUPPORTED(RULE) \
VISIT(RULE) { \
throw NotImplementedError("Unsupported rule: " #RULE); \
}
#define HANDLE_HOGQL_ERROR(TYPE, OTHER_CLEANUP) \
(const TYPE& e) { \
string err_what = e.what(); \
PyObject *error_type = NULL, *py_err_args = NULL, *py_err = NULL, *py_start = NULL, *py_end = NULL; \
int err_indicator = 0; \
error_type = PyObject_GetAttrString(state->errors_module, #TYPE); \
if (!error_type) goto exit##TYPE; \
py_err_args = Py_BuildValue("(s#)", err_what.data(), err_what.size()); \
if (!py_err_args) goto exit##TYPE; \
py_err = PyObject_CallObject(error_type, py_err_args); \
if (!py_err) goto exit##TYPE; \
py_start = PyLong_FromSize_t(e.start); \
if (!py_start) goto exit##TYPE; \
py_end = PyLong_FromSize_t(e.end); \
if (!py_end) goto exit##TYPE; \
err_indicator = PyObject_SetAttrString(py_err, "start", py_start); \
if (err_indicator == -1) goto exit##TYPE; \
err_indicator = PyObject_SetAttrString(py_err, "end", py_end); \
if (err_indicator == -1) goto exit##TYPE; \
PyErr_SetObject(error_type, py_err); \
exit##TYPE :; \
Py_XDECREF(py_end); \
Py_XDECREF(py_start); \
Py_XDECREF(py_err); \
Py_XDECREF(error_type); \
OTHER_CLEANUP \
return NULL; \
}
#define RETURN_NEW_AST_NODE(TYPE_NAME, KWARGS_FORMAT, ...) \
PyObject* ret = build_ast_node(TYPE_NAME, KWARGS_FORMAT, __VA_ARGS__); \
/* Fortunately we don't need to care about decrementing Py_BuildValue/Py_VaBuildValue args, */ \
/* so just throw is enough: https://github.com/python/cpython/blob/a254120f/Python/modsupport.c#L147-L148*/ \
if (!ret) throw PyInternalError(); \
return ret
using namespace std;
// PYTHON UTILS (`X_` stands for "extension")
// Extend `list` with `extension` in-place. Return 0 on success, -1 on error.
int X_PyList_Extend(PyObject* list, PyObject* extension) {
Py_ssize_t list_size = PyList_Size(list);
if (list_size == -1) {
return -1;
}
Py_ssize_t extension_size = PyList_Size(extension);
if (extension_size == -1) {
return -1;
}
return PyList_SetSlice(list, list_size, list_size + extension_size, extension);
}
// Decref all elements of a vector.
void X_Py_DECREF_ALL(vector<PyObject*> objects) {
for (PyObject* object : objects) {
Py_DECREF(object);
}
}
// Construct a Python list from a vector of strings. Return value: New reference (or NULL on error).
PyObject* X_PyList_FromStrings(const vector<string>& items) {
PyObject* list = PyList_New(items.size());
if (!list) {
return NULL;
}
for (size_t i = 0; i < items.size(); i++) {
PyObject* value = PyUnicode_FromStringAndSize(items[i].data(), items[i].size());
if (!value) {
Py_DECREF(list);
return NULL;
}
PyList_SET_ITEM(list, i, value);
}
return list;
}
// PARSING AND AST CONVERSION
class HogQLParseTreeConverter : public HogQLParserBaseVisitor {
private:
parser_state* state;
bool is_internal;
const vector<string> RESERVED_KEYWORDS = {"true", "false", "null", "team_id"};
// Build an AST node of the specified type. Return value: New reference.
PyObject* build_ast_node(const char* type_name, const char* kwargs_format, ...) {
va_list valist;
va_start(valist, kwargs_format);
PyObject *node_type = NULL, *args = NULL, *kwargs = NULL, *ast_node = NULL;
node_type = PyObject_GetAttrString(state->ast_module, type_name);
if (!node_type) goto exit;
args = PyTuple_New(0);
if (!args) goto exit;
kwargs = Py_VaBuildValue(kwargs_format, valist);
if (!kwargs) goto exit;
ast_node = PyObject_Call(node_type, args, kwargs);
exit:
va_end(valist);
Py_XDECREF(kwargs);
Py_XDECREF(args);
Py_XDECREF(node_type);
return ast_node;
}
// Return the specified member of the specified enum. Return value: New reference.
PyObject* get_ast_enum_member(const char* enum_name, const char* enum_member_name) {
PyObject* enum_type = PyObject_GetAttrString(state->ast_module, enum_name);
if (!enum_type) {
return NULL;
}
PyObject* enum_member = PyObject_GetAttrString(enum_type, enum_member_name);
Py_DECREF(enum_type);
if (!enum_member) {
return NULL;
}
return enum_member;
}
#define IS_AST_NODE_INSTANCE_IMPL(HOGQL_MODULE, TYPE_NAME) \
PyObject* node_type = PyObject_GetAttrString(state->HOGQL_MODULE##_module, TYPE_NAME); \
if (!node_type) return -1; \
int ret = PyObject_IsInstance(obj, node_type); \
Py_DECREF(node_type); \
return ret;
// Return 1 if the passed object is an instance of the specified AST node type, 0 if not, -1 if an error occurred.
int is_ast_node_instance(PyObject* obj, const char* type_name) { IS_AST_NODE_INSTANCE_IMPL(ast, type_name) }
// Return 1 if the passed object is an instance of _any_ AST node type, 0 if not, -1 if an error occurred.
int is_ast_node_instance(PyObject* obj) { IS_AST_NODE_INSTANCE_IMPL(base, "AST") }
#undef IS_AST_NODE_INSTANCE_IMPL
public:
HogQLParseTreeConverter(parser_state* state, bool is_internal) : state(state), is_internal(is_internal) {}
any visit(antlr4::tree::ParseTree* tree) override {
// Find the start and stop indices of the parse tree node
size_t start;
size_t stop;
auto token = dynamic_cast<antlr4::Token*>(tree);
if (token) {
start = token->getStartIndex();
stop = token->getStopIndex();
} else {
auto ctx = dynamic_cast<antlr4::ParserRuleContext*>(tree);
if (!ctx) {
throw ParsingError("Parse tree node is neither a Token nor a ParserRuleContext");
}
start = ctx->getStart()->getStartIndex();
stop = ctx->getStop()->getStopIndex();
}
// Visit the parse tree node (while making sure that nodes/errors have spans - except for internal expressions)
any node;
try {
node = tree->accept(this);
} catch (const SyntaxError& e) {
// If start and end are unset, rethrow with the current start and stop
if (!is_internal && e.start == 0 && e.end == 0) {
throw SyntaxError(e.what(), start, stop + 1);
}
throw;
}
if (!is_internal && node.has_value() && node.type() == typeid(PyObject*)) {
PyObject* py_node = any_cast<PyObject*>(node);
if (py_node) {
int is_ast = is_ast_node_instance(py_node);
if (is_ast == -1) {
Py_DECREF(py_node);
throw PyInternalError();
}
if (is_ast) {
PyObject *py_start = NULL, *py_end = NULL;
int err_indicator = 0;
py_start = PyLong_FromSize_t(start);
if (!py_start) goto error;
py_end = PyLong_FromSize_t(stop + 1);
if (!py_end) goto error;
err_indicator = PyObject_SetAttrString(py_node, "start", py_start);
if (err_indicator == -1) goto error;
err_indicator = PyObject_SetAttrString(py_node, "end", py_end);
if (err_indicator == -1) goto error;
goto success;
error:
Py_XDECREF(py_start);
Py_XDECREF(py_end);
Py_DECREF(py_node);
throw PyInternalError();
success:
Py_XDECREF(py_start);
Py_XDECREF(py_end);
}
}
}
return node;
}
// This is the only method that should actually be called from outside the class.
// Convert the parse tree to an AST node result. If an error has occurred in conversion, handle it gracefully.
PyObject* visitAsPyObjectFinal(antlr4::tree::ParseTree* tree) {
try {
return visitAsPyObject(tree);
} catch HANDLE_HOGQL_ERROR(SyntaxError, ) catch HANDLE_HOGQL_ERROR(
NotImplementedError,
) catch HANDLE_HOGQL_ERROR(ParsingError, ) catch (const PyInternalError& e) {
return NULL;
} catch (const bad_any_cast& e) {
PyObject* error_type = PyObject_GetAttrString(state->errors_module, "ParsingError");
if (error_type) {
PyErr_SetString(error_type, "Parsing failed due to bad type casting");
}
return NULL;
}
}
PyObject* visitAsPyObject(antlr4::tree::ParseTree* tree) {
PyObject* ret = any_cast<PyObject*>(visit(tree));
if (!ret) {
throw ParsingError(
"Rule resulted in a null PyObject pointer. A PyInternalError should have been raised instead."
);
}
return ret;
}
PyObject* visitAsPyObjectOrNone(antlr4::tree::ParseTree* tree) {
if (tree == NULL) {
Py_RETURN_NONE;
}
return visitAsPyObject(tree);
}
PyObject* visitAsPyObjectOrEmptyList(antlr4::tree::ParseTree* tree) {
if (tree == NULL) {
PyObject* list = PyList_New(0);
if (!list) throw PyInternalError();
return list;
}
return visitAsPyObject(tree);
}
// T has to be used in place of antlr4::tree::ParseTree* here, because there's no conversion from the child class
// to its parent within vectors
template <typename T>
PyObject* visitPyListOfObjects(vector<T> tree) {
PyObject* ret = PyList_New(tree.size());
if (!ret) {
throw PyInternalError();
}
for (size_t i = 0; i < tree.size(); i++) {
try {
PyList_SET_ITEM(ret, i, visitAsPyObject(tree[i]));
} catch (...) {
Py_DECREF(ret);
throw;
}
}
return ret;
}
string visitAsString(antlr4::tree::ParseTree* tree) { return any_cast<string>(visit(tree)); }
template <typename T>
vector<string> visitAsVectorOfStrings(vector<T> tree) {
vector<string> ret;
ret.reserve(tree.size());
for (auto child : tree) {
ret.push_back(visitAsString(child));
}
return ret;
}
VISIT(Program) {
PyObject* declarations = PyList_New(0);
if (!declarations) {
throw PyInternalError();
}
auto declaration_ctxs = ctx->declaration();
for (auto declaration_ctx : declaration_ctxs) {
if (declaration_ctx->statement() && declaration_ctx->statement()->emptyStmt()) {
continue;
}
PyObject* statement = Py_None;
try {
statement = visitAsPyObject(declaration_ctx);
int append_code = PyList_Append(declarations, statement);
Py_DECREF(statement);
if (append_code == -1) {
throw PyInternalError();
}
} catch (...) {
Py_DECREF(declarations);
throw;
}
}
PyObject* ret = build_ast_node("Program", "{s:N}", "declarations", declarations);
if (!ret) {
Py_DECREF(declarations);
throw PyInternalError();
}
return ret;
}
VISIT(Declaration) {
auto var_decl_ctx = ctx->varDecl();
if (var_decl_ctx) {
return visit(var_decl_ctx);
}
auto statement_ctx = ctx->statement();
if (statement_ctx) {
return visit(statement_ctx);
}
throw ParsingError("Declaration must be either a varDecl or a statement");
}
VISIT(Expression) {
return visit(ctx->columnExpr());
}
VISIT(VarDecl) {
string name = visitAsString(ctx->identifier());
PyObject* expr = visitAsPyObjectOrNone(ctx->expression());
PyObject* ret = build_ast_node("VariableDeclaration", "{s:s#,s:N}", "name", name.data(), name.size(), "expr", expr);
if (!ret) {
Py_DECREF(expr);
throw PyInternalError();
}
return ret;
}
VISIT(VarAssignment) {
PyObject* left = visitAsPyObject(ctx->expression(0));
PyObject* right;
try {
right = visitAsPyObject(ctx->expression(1));
} catch (...) {
Py_DECREF(left);
throw;
}
PyObject* ret = build_ast_node("VariableAssignment", "{s:N,s:N}", "left", left, "right", right);
if (!ret) {
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
}
return ret;
}
VISIT(Statement) {
auto return_stmt_ctx = ctx->returnStmt();
if (return_stmt_ctx) {
return visit(return_stmt_ctx);
}
auto throw_stmt_ctx = ctx->throwStmt();
if (throw_stmt_ctx) {
return visit(throw_stmt_ctx);
}
auto try_catch_stmt_ctx = ctx->tryCatchStmt();
if (try_catch_stmt_ctx) {
return visit(try_catch_stmt_ctx);
}
auto if_stmt_ctx = ctx->ifStmt();
if (if_stmt_ctx) {
return visit(if_stmt_ctx);
}
auto while_stmt_ctx = ctx->whileStmt();
if (while_stmt_ctx) {
return visit(while_stmt_ctx);
}
auto for_stmt_ctx = ctx->forStmt();
if (for_stmt_ctx) {
return visit(for_stmt_ctx);
}
auto for_in_stmt_ctx = ctx->forInStmt();
if (for_in_stmt_ctx) {
return visit(for_in_stmt_ctx);
}
auto func_stmt_ctx = ctx->funcStmt();
if (func_stmt_ctx) {
return visit(func_stmt_ctx);
}
auto var_assignment_ctx = ctx->varAssignment();
if (var_assignment_ctx) {
return visit(var_assignment_ctx);
}
auto block_ctx = ctx->block();
if (block_ctx) {
return visit(block_ctx);
}
auto expr_stmt_ctx = ctx->exprStmt();
if (expr_stmt_ctx) {
return visit(expr_stmt_ctx);
}
auto empty_stmt_ctx = ctx->emptyStmt();
if (empty_stmt_ctx) {
return visit(empty_stmt_ctx);
}
throw ParsingError("Statement must be one of returnStmt, throwStmt, tryCatchStmt, ifStmt, whileStmt, forStmt, forInStmt, funcStmt, "
"varAssignment, block, exprStmt, or emptyStmt");
}
VISIT(ExprStmt) {
PyObject* expr;
try {
expr = visitAsPyObject(ctx->expression());
} catch (...) {
throw;
}
PyObject* ret = build_ast_node("ExprStatement", "{s:N}", "expr", expr);
if (!ret) {
Py_DECREF(expr);
throw PyInternalError();
}
return ret;
}
VISIT(ReturnStmt) {
PyObject* expr;
try {
expr = visitAsPyObjectOrNone(ctx->expression());
} catch (...) {
throw;
}
PyObject* ret = build_ast_node("ReturnStatement", "{s:N}", "expr", expr);
if (!ret) {
Py_DECREF(expr);
throw PyInternalError();
}
return ret;
}
VISIT(ThrowStmt) {
PyObject* expr;
try {
expr = visitAsPyObjectOrNone(ctx->expression());
} catch (...) {
throw;
}
RETURN_NEW_AST_NODE("ThrowStatement", "{s:N}", "expr", expr);
}
VISIT(CatchBlock) {
PyObject* catch_var_py;
string catch_var;
if (ctx->catchVar) {
catch_var = visitAsString(ctx->catchVar);
catch_var_py = PyUnicode_FromStringAndSize(catch_var.data(), catch_var.size());
} else {
catch_var_py = Py_NewRef(Py_None);
}
PyObject* catch_type_py;
string catch_type;
if (ctx->catchType) {
catch_type = visitAsString(ctx->catchType);
catch_type_py = PyUnicode_FromStringAndSize(catch_type.data(), catch_type.size());
} else {
catch_type_py = Py_None;
Py_INCREF(catch_type_py);
}
PyObject* catch_stmt;
try {
catch_stmt = visitAsPyObject(ctx->catchStmt);
} catch (...) {
Py_DECREF(catch_var_py);
Py_DECREF(catch_type_py);
throw;
}
PyObject* ret = PyTuple_Pack(3, catch_var_py, catch_type_py, catch_stmt);
Py_DECREF(catch_var_py);
Py_DECREF(catch_type_py);
Py_DECREF(catch_stmt);
if (!ret) {
throw PyInternalError();
}
return ret;
}
VISIT(TryCatchStmt) {
PyObject* try_stmt;
try {
try_stmt = visitAsPyObject(ctx->tryStmt);
} catch (...) {
throw;
}
PyObject* catches = PyList_New(0);
if (!catches) {
Py_DECREF(try_stmt);
throw PyInternalError();
}
auto catch_block_ctxs = ctx->catchBlock();
for (auto catch_block_ctx : catch_block_ctxs) {
PyObject* catch_block;
try {
catch_block = visitAsPyObject(catch_block_ctx);
} catch (...) {
Py_DECREF(try_stmt);
Py_DECREF(catches);
throw;
}
int append_code = PyList_Append(catches, catch_block);
Py_DECREF(catch_block);
if (append_code == -1) {
Py_DECREF(try_stmt);
Py_DECREF(catches);
throw PyInternalError();
}
}
PyObject* finally_stmt;
try {
finally_stmt = visitAsPyObjectOrNone(ctx->finallyStmt);
} catch (...) {
Py_DECREF(try_stmt);
Py_DECREF(catches);
throw;
}
PyObject* ret = build_ast_node(
"TryCatchStatement", "{s:N,s:N,s:N}", "try_stmt", try_stmt, "catches", catches, "finally_stmt", finally_stmt
);
if (!ret) {
Py_DECREF(try_stmt);
Py_DECREF(catches);
Py_DECREF(finally_stmt);
throw PyInternalError();
}
return ret;
}
VISIT(IfStmt) {
PyObject* expr;
try {
expr = visitAsPyObject(ctx->expression());
} catch (...) {
throw;
}
PyObject* then_stmt;
try {
then_stmt = visitAsPyObject(ctx->statement(0));
} catch (...) {
Py_DECREF(expr);
throw;
}
PyObject* else_stmt;
try {
else_stmt = visitAsPyObjectOrNone(ctx->statement(1));
} catch (...) {
Py_DECREF(expr);
Py_DECREF(then_stmt);
throw;
}
PyObject* ret = build_ast_node("IfStatement", "{s:N,s:N,s:N}", "expr", expr, "then", then_stmt, "else_", else_stmt);
if (!ret) {
Py_DECREF(expr);
Py_DECREF(then_stmt);
Py_DECREF(else_stmt);
throw PyInternalError();
}
return ret;
}
VISIT(WhileStmt) {
PyObject* expr;
try {
expr = visitAsPyObject(ctx->expression());
} catch (...) {
throw;
}
PyObject* body;
try {
body = visitAsPyObjectOrNone(ctx->statement());
} catch (...) {
Py_DECREF(expr);
throw;
}
PyObject* ret = build_ast_node("WhileStatement", "{s:N,s:N}", "expr", expr, "body", body);
if (!ret) {
Py_DECREF(expr);
Py_DECREF(body);
throw PyInternalError();
}
return ret;
}
VISIT(ForStmt) {
PyObject* initializer;
if (ctx->initializerVarDeclr) {
initializer = visitAsPyObject(ctx->initializerVarDeclr);
} else if (ctx->initializerVarAssignment) {
initializer = visitAsPyObject(ctx->initializerVarAssignment);
} else if (ctx->initializerExpression) {
initializer = visitAsPyObject(ctx->initializerExpression);
} else {
initializer = Py_None;
Py_INCREF(initializer);
}
PyObject* condition;
try {
condition = visitAsPyObjectOrNone(ctx->condition);
} catch (...) {
Py_DECREF(initializer);
throw;
}
PyObject* increment;
auto increment_var_declr_ctx = ctx->incrementVarDeclr;
auto increment_var_assignment_ctx = ctx->incrementVarAssignment;
auto increment_expression_ctx = ctx->incrementExpression;
if (increment_var_declr_ctx) {
try {
increment = visitAsPyObject(increment_var_declr_ctx);
} catch (...) {
Py_DECREF(initializer);
Py_DECREF(condition);
throw;
}
} else if (increment_var_assignment_ctx) {
try {
increment = visitAsPyObject(increment_var_assignment_ctx);
} catch (...) {
Py_DECREF(initializer);
Py_DECREF(condition);
throw;
}
} else if (increment_expression_ctx) {
try {
increment = visitAsPyObject(increment_expression_ctx);
} catch (...) {
Py_DECREF(initializer);
Py_DECREF(condition);
throw;
}
} else {
increment = Py_None;
Py_INCREF(increment);
}
PyObject* body;
try {
body = visitAsPyObject(ctx->statement());
} catch (...) {
Py_DECREF(initializer);
Py_DECREF(condition);
Py_DECREF(increment);
throw;
}
PyObject* ret = build_ast_node(
"ForStatement", "{s:N,s:N,s:N,s:N}", "initializer", initializer, "condition", condition, "increment", increment,
"body", body
);
if (!ret) {
Py_DECREF(initializer);
Py_DECREF(condition);
Py_DECREF(increment);
Py_DECREF(body);
throw PyInternalError();
}
return ret;
}
VISIT(ForInStmt) {
string first_identifier = visitAsString(ctx->identifier(0));
string second_identifier;
if (ctx->identifier(1)) {
second_identifier = visitAsString(ctx->identifier(1));
}
PyObject* expr = visitAsPyObject(ctx->expression());
PyObject* body;
try {
body = visitAsPyObject(ctx->statement());
} catch (...) {
Py_DECREF(expr);
throw;
}
PyObject* ret = second_identifier.empty()
? build_ast_node(
"ForInStatement", "{s:O,s:s#,s:N,s:N}",
"keyVar", Py_None,
"valueVar", first_identifier.data(), first_identifier.size(),
"expr", expr,
"body", body
)
: build_ast_node(
"ForInStatement", "{s:s#,s:s#,s:N,s:N}",
"keyVar", first_identifier.data(), first_identifier.size(),
"valueVar", second_identifier.data(), second_identifier.size(),
"expr", expr,
"body", body
);
if (!ret) {
Py_DECREF(expr);
Py_DECREF(body);
throw PyInternalError();
}
return ret;
}
VISIT(FuncStmt) {
PyObject* params;
string name = visitAsString(ctx->identifier());
auto identifier_list_ctx = ctx->identifierList();
if (identifier_list_ctx) {
vector<string> paramList = any_cast<vector<string>>(visit(ctx->identifierList()));
params = X_PyList_FromStrings(paramList);
} else {
vector<string> paramList;
params = PyList_New(0);
}
if (!params) {
throw PyInternalError();
}
PyObject* body;
try {
body = visitAsPyObject(ctx->block());
} catch (...) {
Py_DECREF(params);
throw;
}
PyObject* ret = build_ast_node("Function", "{s:s#,s:N,s:N}", "name", name.data(), name.size(), "params", params, "body", body);
if (!ret) {
Py_DECREF(params);
Py_DECREF(body);
throw PyInternalError();
}
return ret;
}
VISIT(KvPairList) {
return visitPyListOfObjects(ctx->kvPair());
}
VISIT(KvPair) {
PyObject* k = visitAsPyObject(ctx->expression(0));
PyObject* v;
try {
v = visitAsPyObject(ctx->expression(1));
} catch (...) {
Py_DECREF(k);
throw;
}
PyObject* ret = PyTuple_Pack(2, k, v);
Py_DECREF(k);
Py_DECREF(v);
if (!ret) {
throw PyInternalError();
}
return ret;
}
VISIT(IdentifierList) {
return visitAsVectorOfStrings(ctx->identifier());
}
VISIT(EmptyStmt) {
RETURN_NEW_AST_NODE("ExprStatement", "{s:O}", "expr", Py_None);
}
VISIT(Block) {
PyObject* declarations = PyList_New(0);
if (!declarations) {
throw PyInternalError();
}
auto declaration_ctxs = ctx->declaration();
for (auto declaration_ctx : declaration_ctxs) {
if (!declaration_ctx->statement() || !declaration_ctx->statement()->emptyStmt()) {
PyObject* statement;
try {
statement = visitAsPyObject(declaration_ctx);
} catch (...) {
Py_DECREF(declarations);
throw;
}
int append_code = PyList_Append(declarations, statement);
Py_DECREF(statement);
if (append_code == -1) {
Py_DECREF(declarations);
throw PyInternalError();
}
}
}
PyObject* ret = build_ast_node("Block", "{s:N}", "declarations", declarations);
if (!ret) {
Py_DECREF(declarations);
throw PyInternalError();
}
return ret;
}
// HogQL rules
VISIT(Select) {
auto select_set_stmt_ctx = ctx->selectSetStmt();
if (select_set_stmt_ctx) {
return visit(select_set_stmt_ctx);
}
auto select_stmt_ctx = ctx->selectStmt();
if (select_stmt_ctx) {
return visit(select_stmt_ctx);
}
return visit(ctx->hogqlxTagElement());
}
VISIT(SelectStmtWithParens) {
auto select_stmt_ctx = ctx->selectStmt();
if (select_stmt_ctx) {
return visit(select_stmt_ctx);
}
auto placeholder_ctx = ctx->placeholder();
if (placeholder_ctx) {
return visitAsPyObject(placeholder_ctx);
}
return visit(ctx->selectSetStmt());
}
VISIT(SelectSetStmt) {
PyObject* initial_query = visitAsPyObject(ctx->selectStmtWithParens());
PyObject* select_query = NULL;
PyObject* select_queries = PyList_New(0);
if (!select_queries) {
throw PyInternalError();
}
try {
for (auto subsequent : ctx->subsequentSelectSetClause()) {
char* set_operator;
if (subsequent->UNION() && subsequent->ALL()) {
set_operator = "UNION ALL";
} else if (subsequent->INTERSECT()) {
set_operator = "INTERSECT";
} else if (subsequent->EXCEPT()) {
set_operator = "EXCEPT";
} else {
throw SyntaxError("Set operator must be one of UNION ALL, INTERSECT, and EXCEPT");
}
select_query = visitAsPyObject(subsequent->selectStmtWithParens());
PyObject* query = build_ast_node("SelectSetNode", "{s:N,s:N}", "select_query", select_query, "set_operator", PyUnicode_FromString(set_operator));
if (!query) {
throw PyInternalError();
}
PyList_Append(select_queries, query);
}
} catch (...) {
Py_DECREF(select_queries);
Py_DECREF(initial_query);
throw;
}
if (PyList_Size(select_queries) == 0) {
Py_DECREF(select_queries);
return initial_query;
}
RETURN_NEW_AST_NODE("SelectSetQuery", "{s:N, s:N}", "initial_select_query", initial_query, "subsequent_select_queries", select_queries);
}
VISIT(SelectStmt) {
// These are stolen by select_query
PyObject *ctes = NULL, *select = NULL, *select_from = NULL, *where = NULL, *prewhere = NULL, *having = NULL,
*group_by = NULL, *order_by = NULL;
try {
ctes = visitAsPyObjectOrNone(ctx->withClause());
select = visitAsPyObjectOrEmptyList(ctx->columnExprList());
select_from = visitAsPyObjectOrNone(ctx->fromClause());
where = visitAsPyObjectOrNone(ctx->whereClause());
prewhere = visitAsPyObjectOrNone(ctx->prewhereClause());
having = visitAsPyObjectOrNone(ctx->havingClause());
group_by = visitAsPyObjectOrNone(ctx->groupByClause());
order_by = visitAsPyObjectOrNone(ctx->orderByClause());
} catch (...) {
Py_XDECREF(ctes);
Py_XDECREF(select);
Py_XDECREF(select_from);
Py_XDECREF(where);
Py_XDECREF(prewhere);
Py_XDECREF(having);
Py_XDECREF(group_by);
Py_XDECREF(order_by);
throw;
}
PyObject* select_query = build_ast_node(
"SelectQuery", "{s:N,s:N,s:N,s:N,s:N,s:N,s:N,s:N,s:N}", "ctes", ctes, "select", select, "distinct",
Py_NewRef(ctx->DISTINCT() ? Py_True : Py_None), "select_from", select_from, "where", where, "prewhere",
prewhere, "having", having, "group_by", group_by, "order_by", order_by
);
if (!select_query) {
throw PyInternalError();
}
int err_indicator = 0;
auto window_clause_ctx = ctx->windowClause();
if (window_clause_ctx) {
auto window_expr_ctxs = window_clause_ctx->windowExpr();
auto identifier_ctxs = window_clause_ctx->identifier();
if (window_expr_ctxs.size() != identifier_ctxs.size()) {
Py_DECREF(select_query);
throw ParsingError("WindowClause must have a matching number of window exprs and identifiers");
}
PyObject* window_exprs = PyDict_New();
if (!window_exprs) {
Py_DECREF(select_query);
throw PyInternalError();
}
for (size_t i = 0; i < window_expr_ctxs.size(); i++) {
string identifier;
PyObject* window_expr;
try {
identifier = visitAsString(identifier_ctxs[i]);
window_expr = visitAsPyObject(window_expr_ctxs[i]);
} catch (...) {
Py_DECREF(window_exprs);
Py_DECREF(select_query);
throw;
}
err_indicator = PyDict_SetItemString(window_exprs, identifier.c_str(), window_expr);
Py_DECREF(window_expr);
if (err_indicator == -1) {
Py_DECREF(window_exprs);
Py_DECREF(select_query);
throw PyInternalError();
}
}
err_indicator = PyObject_SetAttrString(select_query, "window_exprs", window_exprs);
Py_DECREF(window_exprs);
if (err_indicator == -1) {
Py_DECREF(select_query);
throw PyInternalError();
}
}
auto limit_and_offset_clause_ctx = ctx->limitAndOffsetClause();
if (limit_and_offset_clause_ctx) {
PyObject* limit;
try {
limit = visitAsPyObject(limit_and_offset_clause_ctx->columnExpr(0));
} catch (...) {
Py_DECREF(select_query);
throw;
}
err_indicator = PyObject_SetAttrString(select_query, "limit", limit);
Py_DECREF(limit);
if (err_indicator == -1) {
Py_DECREF(select_query);
throw PyInternalError();
}
auto offset_ctx = limit_and_offset_clause_ctx->columnExpr(1);
if (offset_ctx) {
PyObject* offset;
try {
offset = visitAsPyObject(offset_ctx);
} catch (...) {
Py_DECREF(select_query);
throw;
}
err_indicator = PyObject_SetAttrString(select_query, "offset", offset);
Py_DECREF(offset);
if (err_indicator == -1) {
Py_DECREF(select_query);
throw PyInternalError();
}
}
auto limit_by_exprs_ctx = limit_and_offset_clause_ctx->columnExprList();
if (limit_by_exprs_ctx) {
PyObject* limit_by_exprs;
try {
limit_by_exprs = visitAsPyObject(limit_by_exprs_ctx);
} catch (...) {
Py_DECREF(select_query);
throw;
}
err_indicator = PyObject_SetAttrString(select_query, "limit_by", limit_by_exprs);
Py_DECREF(limit_by_exprs);
if (err_indicator == -1) {
Py_DECREF(select_query);
throw PyInternalError();
}
}
if (limit_and_offset_clause_ctx->WITH() && limit_and_offset_clause_ctx->TIES()) {
err_indicator = PyObject_SetAttrString(select_query, "limit_with_ties", Py_True);
if (err_indicator == -1) {
Py_DECREF(select_query);
throw PyInternalError();
}
}
} else {
auto offset_only_clause_ctx = ctx->offsetOnlyClause();
if (offset_only_clause_ctx) {
PyObject* offset_only_clause;
try {
offset_only_clause = visitAsPyObject(offset_only_clause_ctx->columnExpr());
} catch (...) {
Py_DECREF(select_query);
throw;
}
err_indicator = PyObject_SetAttrString(select_query, "offset", offset_only_clause);
Py_DECREF(offset_only_clause);
if (err_indicator == -1) {
Py_DECREF(select_query);
throw PyInternalError();
}
}
}
auto array_join_clause_ctx = ctx->arrayJoinClause();
if (array_join_clause_ctx) {
if (Py_IsNone(select_from)) {
Py_DECREF(select_query);
throw SyntaxError("Using ARRAY JOIN without a FROM clause is not permitted");
}
PyObject* join_op = PyUnicode_FromString(
array_join_clause_ctx->LEFT() ? "LEFT ARRAY JOIN"
: array_join_clause_ctx->INNER() ? "INNER ARRAY JOIN"
: "ARRAY JOIN"
);
if (!join_op) {
Py_DECREF(select_query);
throw PyInternalError();
}
err_indicator = PyObject_SetAttrString(select_query, "array_join_op", join_op);
Py_DECREF(join_op);
if (err_indicator == -1) {
Py_DECREF(select_query);
throw PyInternalError();
}
auto array_join_arrays_ctx = array_join_clause_ctx->columnExprList();
PyObject* array_join_list;
try {
array_join_list = visitAsPyObject(array_join_arrays_ctx);
} catch (...) {
Py_DECREF(select_query);
throw;
}
Py_ssize_t array_join_list_size = PyList_Size(array_join_list);
if (array_join_list_size == -1) {
Py_DECREF(select_query);
Py_DECREF(array_join_list);
throw PyInternalError();
}
for (Py_ssize_t i = 0; i < array_join_list_size; i++) {
PyObject* expr = PyList_GET_ITEM(array_join_list, i);
int is_alias = is_ast_node_instance(expr, "Alias");
if (is_alias == -1) {
Py_DECREF(array_join_list);
Py_DECREF(select_query);
throw PyInternalError();
}
if (!is_alias) {
Py_DECREF(array_join_list);
Py_DECREF(select_query);
auto relevant_column_expr_ctx = array_join_arrays_ctx->columnExpr(i);
throw SyntaxError(
"ARRAY JOIN arrays must have an alias", relevant_column_expr_ctx->getStart()->getStartIndex(),
relevant_column_expr_ctx->getStop()->getStopIndex() + 1
);
}
}
err_indicator = PyObject_SetAttrString(select_query, "array_join_list", array_join_list);
Py_DECREF(array_join_list);
if (err_indicator == -1) {
Py_DECREF(select_query);
throw PyInternalError();
}
}
if (ctx->topClause()) {
Py_DECREF(select_query);
throw NotImplementedError("Unsupported: SelectStmt.topClause()");
}
if (ctx->settingsClause()) {
Py_DECREF(select_query);
throw NotImplementedError("Unsupported: SelectStmt.settingsClause()");
}
return select_query;
}
VISIT(WithClause) { return visit(ctx->withExprList()); }
VISIT_UNSUPPORTED(TopClause)
VISIT(FromClause) { return visit(ctx->joinExpr()); }
VISIT_UNSUPPORTED(ArrayJoinClause)
VISIT_UNSUPPORTED(WindowClause)
VISIT(PrewhereClause) { return visit(ctx->columnExpr()); }
VISIT(WhereClause) { return visit(ctx->columnExpr()); }
VISIT(GroupByClause) { return visit(ctx->columnExprList()); }
VISIT(HavingClause) { return visit(ctx->columnExpr()); }
VISIT(OrderByClause) { return visit(ctx->orderExprList()); }
VISIT_UNSUPPORTED(ProjectionOrderByClause)
VISIT_UNSUPPORTED(LimitAndOffsetClause)
VISIT_UNSUPPORTED(SettingsClause)
#define RETURN_CHAINED_JOIN_EXPRS() \
PyObject* last_join = join1; \
PyObject* next_join = \
PyObject_GetAttrString(last_join, "next_join"); /* 1500 is Python's recursion limit (C_RECURSION_LIMIT) */ \
for (size_t i = 0; i < 1500; i++) { /* We can safely decref, because a reference is anyway held by join1 */ \
Py_XDECREF(next_join); \
if (!next_join) { \
Py_DECREF(join1); \
Py_DECREF(join2); \
throw PyInternalError(); \
} \
int reached_end_of_chain = Py_IsNone(next_join); \
if (reached_end_of_chain == -1) { \
Py_DECREF(join1); \
Py_DECREF(join2); \
throw PyInternalError(); \
} \
if (reached_end_of_chain) { \
int err_indicator = PyObject_SetAttrString(last_join, "next_join", join2); \
if (err_indicator == -1) { \
Py_DECREF(join1); \
Py_DECREF(join2); \
throw PyInternalError(); \
} \
Py_DECREF(join2); \
return join1; \
} \
last_join = next_join; \
next_join = PyObject_GetAttrString(last_join, "next_join"); \
} \
Py_DECREF(join1); \
Py_DECREF(join2); \
PyErr_SetString(PyExc_RecursionError, "maximum recursion depth exceeded during JOIN parsing"); \
throw PyInternalError(); /* This should never be reached, but `while (true)`s are scary, so better to be safe */
VISIT(JoinExprOp) {
auto join_op_ctx = ctx->joinOp();
PyObject* py_join_op;
if (join_op_ctx) {
string join_op = visitAsString(join_op_ctx);
join_op.append(" JOIN");
py_join_op = PyUnicode_FromStringAndSize(join_op.data(), join_op.size());
} else {
py_join_op = PyUnicode_FromString("JOIN");
}
if (!py_join_op) throw PyInternalError();
int err_indicator = 0;
PyObject* join2;
try {
join2 = visitAsPyObject(ctx->joinExpr(1));
} catch (...) {
Py_DECREF(py_join_op);
throw;
}
err_indicator = PyObject_SetAttrString(join2, "join_type", py_join_op);
Py_DECREF(py_join_op);
if (err_indicator == -1) {
Py_DECREF(join2);
throw PyInternalError();
}
PyObject* constraint;
try {
constraint = visitAsPyObject(ctx->joinConstraintClause());
} catch (...) {
Py_DECREF(join2);
throw;
}
err_indicator = PyObject_SetAttrString(join2, "constraint", constraint);
Py_DECREF(constraint);
if (err_indicator == -1) {
Py_DECREF(join2);
throw PyInternalError();
}
PyObject* join1;
try {
join1 = visitAsPyObject(ctx->joinExpr(0));
} catch (...) {
Py_DECREF(join2);
throw;
}
RETURN_CHAINED_JOIN_EXPRS();
}
VISIT(JoinExprTable) {
PyObject* table = visitAsPyObject(ctx->tableExpr());
int is_table_join_expr = is_ast_node_instance(table, "JoinExpr");
if (is_table_join_expr == -1) {
Py_DECREF(table);
throw PyInternalError();
}
PyObject* sample;
try {
sample = visitAsPyObjectOrNone(ctx->sampleClause());
} catch (...) {
Py_DECREF(table);
throw;
}
PyObject* table_final = ctx->FINAL() ? Py_True : Py_None;
if (is_table_join_expr) {
int err_indicator = 0;
err_indicator = PyObject_SetAttrString(table, "sample", sample);
Py_DECREF(sample);
if (err_indicator == -1) {
Py_DECREF(table);
throw PyInternalError();
}
err_indicator = PyObject_SetAttrString(table, "table_final", table_final);
if (err_indicator == -1) {
Py_DECREF(table);
throw PyInternalError();
}
return table;
} else {
PyObject* ret =
build_ast_node("JoinExpr", "{s:N,s:O,s:N}", "table", table, "table_final", table_final, "sample", sample);
if (!ret) {
Py_DECREF(table);
Py_DECREF(sample);
throw PyInternalError();
}
return ret;
}
}
VISIT(JoinExprParens) { return visit(ctx->joinExpr()); }
VISIT(JoinExprCrossOp) {
PyObject* join_type = PyUnicode_FromString("CROSS JOIN");
if (!join_type) {
throw PyInternalError();
}
PyObject* join2;
try {
join2 = visitAsPyObject(ctx->joinExpr(1));
} catch (...) {
Py_DECREF(join_type);
throw;
}
int err_indicator = PyObject_SetAttrString(join2, "join_type", join_type);
if (err_indicator == -1) {
Py_DECREF(join2);
throw PyInternalError();
}
Py_DECREF(join_type);
PyObject* join1;
try {
join1 = visitAsPyObject(ctx->joinExpr(0));
} catch (...) {
Py_DECREF(join2);
throw;
}
RETURN_CHAINED_JOIN_EXPRS();
}
#undef RETURN_CHAINED_JOIN_EXPRS
VISIT(JoinOpInner) {
vector<string> tokens;
if (ctx->ALL()) {
tokens.push_back("ALL");
}
if (ctx->ANY()) {
tokens.push_back("ANY");
}
if (ctx->ASOF()) {
tokens.push_back("ASOF");
}
tokens.push_back("INNER");
return boost::algorithm::join(tokens, " ");
}
VISIT(JoinOpLeftRight) {
vector<string> tokens;
if (ctx->LEFT()) {
tokens.push_back("LEFT");
}
if (ctx->RIGHT()) {
tokens.push_back("RIGHT");
}
if (ctx->OUTER()) {
tokens.push_back("OUTER");
}
if (ctx->SEMI()) {
tokens.push_back("SEMI");
}
if (ctx->ALL()) {
tokens.push_back("ALL");
}
if (ctx->ANTI()) {
tokens.push_back("ANTI");
}
if (ctx->ANY()) {
tokens.push_back("ANY");
}
if (ctx->ASOF()) {
tokens.push_back("ASOF");
}
return boost::algorithm::join(tokens, " ");
}
VISIT(JoinOpFull) {
vector<string> tokens;
if (ctx->FULL()) {
tokens.push_back("FULL");
}
if (ctx->OUTER()) {
tokens.push_back("OUTER");
}
if (ctx->ALL()) {
tokens.push_back("ALL");
}
if (ctx->ANY()) {
tokens.push_back("ANY");
}
return boost::algorithm::join(tokens, " ");
}
VISIT_UNSUPPORTED(JoinOpCross)
VISIT(JoinConstraintClause) {
PyObject* column_expr_list = visitAsPyObject(ctx->columnExprList());
Py_ssize_t column_expr_list_size = PyList_Size(column_expr_list);
if (column_expr_list_size == -1) {
Py_DECREF(column_expr_list);
throw PyInternalError();
}
if (column_expr_list_size > 1) {
Py_DECREF(column_expr_list);
throw NotImplementedError("Unsupported: JOIN ... ON with multiple expressions");
}
PyObject* expr = Py_NewRef(PyList_GET_ITEM(column_expr_list, 0));
Py_DECREF(column_expr_list);
RETURN_NEW_AST_NODE("JoinConstraint", "{s:N,s:s}", "expr", expr, "constraint_type", ctx->USING() ? "USING" : "ON");
}
VISIT(SampleClause) {
PyObject* sample_ratio_expr = visitAsPyObject(ctx->ratioExpr(0));
PyObject* offset_ratio_expr;
try {
offset_ratio_expr = visitAsPyObjectOrNone(ctx->ratioExpr(1));
} catch (...) {
Py_DECREF(sample_ratio_expr);
throw;
}
RETURN_NEW_AST_NODE(
"SampleExpr", "{s:N,s:N}", "sample_value", sample_ratio_expr, "offset_value", offset_ratio_expr
);
}
VISIT(OrderExprList) { return visitPyListOfObjects(ctx->orderExpr()); }
VISIT(OrderExpr) {
const char* order = ctx->DESC() || ctx->DESCENDING() ? "DESC" : "ASC";
RETURN_NEW_AST_NODE("OrderExpr", "{s:N,s:s}", "expr", visitAsPyObject(ctx->columnExpr()), "order", order);
}
VISIT(RatioExpr) {
auto placeholder_ctx = ctx->placeholder();
if (placeholder_ctx) {
return visitAsPyObject(placeholder_ctx);
}
auto number_literal_ctxs = ctx->numberLiteral();
if (number_literal_ctxs.size() > 2) {
throw ParsingError("RatioExpr must have at most two number literals");
} else if (number_literal_ctxs.size() == 0) {
throw ParsingError("RatioExpr must have at least one number literal");
}
auto left_ctx = number_literal_ctxs[0];
auto right_ctx = ctx->SLASH() && number_literal_ctxs.size() > 1 ? number_literal_ctxs[1] : NULL;
PyObject* left = visitAsPyObject(left_ctx);
PyObject* right;
try {
right = visitAsPyObjectOrNone(right_ctx);
} catch (...) {
Py_DECREF(left);
throw;
}
RETURN_NEW_AST_NODE("RatioExpr", "{s:N,s:N}", "left", left, "right", right);
}
VISIT_UNSUPPORTED(SettingExprList)
VISIT_UNSUPPORTED(SettingExpr)
VISIT(WindowExpr) {
auto frame_ctx = ctx->winFrameClause();
PyObject* frame = visitAsPyObjectOrNone(frame_ctx);
int is_frame_a_tuple = PyTuple_Check(frame);
if (is_frame_a_tuple == -1) {
Py_DECREF(frame);
throw PyInternalError();
}
if (is_frame_a_tuple) {
Py_ssize_t frame_tuple_size = PyTuple_Size(frame);
if (frame_tuple_size == -1) {
Py_DECREF(frame);
throw PyInternalError();
}
if (frame_tuple_size != 2) {
Py_DECREF(frame);
throw ParsingError("WindowExpr frame must be a tuple of size 2");
}
}
PyObject* frame_start = Py_NewRef(is_frame_a_tuple ? PyTuple_GET_ITEM(frame, 0) : frame);
PyObject* frame_end = Py_NewRef(is_frame_a_tuple ? PyTuple_GET_ITEM(frame, 1) : Py_None);
Py_DECREF(frame);
PyObject* frame_method = frame_ctx && frame_ctx->RANGE() ? PyUnicode_FromString("RANGE")
: frame_ctx && frame_ctx->ROWS() ? PyUnicode_FromString("ROWS")
: Py_NewRef(Py_None);
if (!frame_method) {
Py_DECREF(frame_start);
Py_DECREF(frame_end);
throw PyInternalError();
}
PyObject* partition_by;
try {
partition_by = visitAsPyObjectOrNone(ctx->winPartitionByClause());
} catch (...) {
Py_DECREF(frame_start);
Py_DECREF(frame_end);
Py_DECREF(frame_method);
throw;
}
PyObject* order_by;
try {
order_by = visitAsPyObjectOrNone(ctx->winOrderByClause());
} catch (...) {
Py_DECREF(frame_start);
Py_DECREF(frame_end);
Py_DECREF(frame_method);
Py_DECREF(partition_by);
throw;
}
RETURN_NEW_AST_NODE(
"WindowExpr", "{s:N,s:N,s:N,s:N,s:N}", "partition_by", partition_by, "order_by", order_by, "frame_method",
frame_method, "frame_start", frame_start, "frame_end", frame_end
);
}
VISIT(WinPartitionByClause) { return visit(ctx->columnExprList()); }
VISIT(WinOrderByClause) { return visit(ctx->orderExprList()); }
VISIT(WinFrameClause) { return visit(ctx->winFrameExtend()); }
VISIT(FrameStart) { return visit(ctx->winFrameBound()); }
VISIT(FrameBetween) {
PyObject* min = visitAsPyObject(ctx->winFrameBound(0));
PyObject* max;
try {
max = visitAsPyObject(ctx->winFrameBound(1));
} catch (...) {
Py_DECREF(min);
throw;
}
return Py_BuildValue("NN", min, max);
}
VISIT(WinFrameBound) {
if (ctx->PRECEDING() || ctx->FOLLOWING()) {
PyObject* number;
if (ctx->numberLiteral()) {
PyObject* constant = visitAsPyObject(ctx->numberLiteral());
number = PyObject_GetAttrString(constant, "value");
Py_DECREF(constant);
if (!number) throw PyInternalError();
} else {
number = Py_NewRef(Py_None);
}
RETURN_NEW_AST_NODE(
"WindowFrameExpr", "{s:s,s:N}", "frame_type", ctx->PRECEDING() ? "PRECEDING" : "FOLLOWING", "frame_value",
number
);
} else {
RETURN_NEW_AST_NODE("WindowFrameExpr", "{s:s}", "frame_type", "CURRENT ROW");
}
}
VISIT(Expr) { return visit(ctx->columnExpr()); }
VISIT_UNSUPPORTED(ColumnTypeExprSimple)
VISIT_UNSUPPORTED(ColumnTypeExprNested)
VISIT_UNSUPPORTED(ColumnTypeExprEnum)
VISIT_UNSUPPORTED(ColumnTypeExprComplex)
VISIT_UNSUPPORTED(ColumnTypeExprParam)
VISIT(ColumnExprList) { return visitPyListOfObjects(ctx->columnExpr()); }
VISIT(ColumnExprTernaryOp) {
PyObject* arg_1 = visitAsPyObject(ctx->columnExpr(0));
PyObject* arg_2;
try {
arg_2 = visitAsPyObject(ctx->columnExpr(1));
} catch (...) {
Py_DECREF(arg_1);
throw;
}
PyObject* arg_3;
try {
arg_3 = visitAsPyObject(ctx->columnExpr(2));
} catch (...) {
Py_DECREF(arg_1);
Py_DECREF(arg_2);
throw;
}
RETURN_NEW_AST_NODE("Call", "{s:s, s:[NNN]}", "name", "if", "args", arg_1, arg_2, arg_3);
}
VISIT(ColumnExprAlias) {
string alias;
if (ctx->identifier()) {
alias = visitAsString(ctx->identifier());
} else if (ctx->STRING_LITERAL()) {
alias = parse_string_literal_ctx(ctx->STRING_LITERAL());
} else {
throw ParsingError("A ColumnExprAlias must have the alias in some form");
}
PyObject* expr = visitAsPyObject(ctx->columnExpr());
if (find(RESERVED_KEYWORDS.begin(), RESERVED_KEYWORDS.end(), boost::algorithm::to_lower_copy(alias)) !=
RESERVED_KEYWORDS.end()) {
Py_DECREF(expr);
throw SyntaxError("\"" + alias + "\" cannot be an alias or identifier, as it's a reserved keyword");
}
RETURN_NEW_AST_NODE("Alias", "{s:N,s:s#}", "expr", expr, "alias", alias.data(), alias.size());
}
VISIT(ColumnExprNegate) {
PyObject* left = build_ast_node("Constant", "{s:i}", "value", 0);
if (!left) throw PyInternalError();
PyObject* op = get_ast_enum_member("ArithmeticOperationOp", "Sub");
if (!op) {
Py_DECREF(left);
throw PyInternalError();
}
PyObject* right;
try {
right = visitAsPyObject(ctx->columnExpr());
} catch (...) {
Py_DECREF(op);
Py_DECREF(left);
throw;
}
RETURN_NEW_AST_NODE("ArithmeticOperation", "{s:N,s:N,s:N}", "left", left, "right", right, "op", op);
}
VISIT(ColumnExprSubquery) { return visit(ctx->selectSetStmt()); }
VISIT(ColumnExprArray) {
RETURN_NEW_AST_NODE("Array", "{s:N}", "exprs", visitAsPyObjectOrEmptyList(ctx->columnExprList()));
}
VISIT(ColumnExprDict) {
RETURN_NEW_AST_NODE("Dict", "{s:N}", "items", visitAsPyObjectOrEmptyList(ctx->kvPairList()));
}
VISIT_UNSUPPORTED(ColumnExprSubstring)
VISIT_UNSUPPORTED(ColumnExprCast)
VISIT(ColumnExprPrecedence1) {
PyObject* op;
if (ctx->SLASH()) {
op = get_ast_enum_member("ArithmeticOperationOp", "Div");
} else if (ctx->ASTERISK()) {
op = get_ast_enum_member("ArithmeticOperationOp", "Mult");
} else if (ctx->PERCENT()) {
op = get_ast_enum_member("ArithmeticOperationOp", "Mod");
} else {
throw ParsingError("Unsupported value of rule ColumnExprPrecedence1");
}
if (!op) throw PyInternalError();
PyObject* left;
try {
left = visitAsPyObject(ctx->columnExpr(0));
} catch (...) {
Py_DECREF(op);
throw;
}
PyObject* right;
try {
right = visitAsPyObject(ctx->right);
} catch (...) {
Py_DECREF(op);
Py_DECREF(left);
throw;
}
RETURN_NEW_AST_NODE("ArithmeticOperation", "{s:N,s:N,s:N}", "left", left, "right", right, "op", op);
}
VISIT(ColumnExprPrecedence2) {
PyObject* left = visitAsPyObject(ctx->left);
PyObject* right;
try {
right = visitAsPyObject(ctx->right);
} catch (...) {
Py_DECREF(left);
throw;
}
if (ctx->PLUS()) {
PyObject* op = get_ast_enum_member("ArithmeticOperationOp", "Add");
if (!op) {
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
}
RETURN_NEW_AST_NODE("ArithmeticOperation", "{s:N,s:N,s:N}", "left", left, "right", right, "op", op);
} else if (ctx->DASH()) {
PyObject* op = get_ast_enum_member("ArithmeticOperationOp", "Sub");
if (!op) {
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
}
RETURN_NEW_AST_NODE("ArithmeticOperation", "{s:N,s:N,s:N}", "left", left, "right", right, "op", op);
} else if (ctx->CONCAT()) {
#define IS_NODE_A_CONCAT_CALL(VAR) /* This is complex because of all the error handling, hence a macro */ \
int is_##VAR##_a_concat_call = false; \
int is_##VAR##_a_call = is_ast_node_instance(VAR, "Call"); \
if (is_##VAR##_a_call == -1) { \
Py_DECREF(left); \
Py_DECREF(right); \
throw PyInternalError(); \
} \
if (is_##VAR##_a_call) { \
PyObject* VAR##_name = PyObject_GetAttrString(VAR, "name"); \
if (!VAR##_name) { \
Py_DECREF(left); \
Py_DECREF(right); \
Py_DECREF(concat_as_str); \
throw PyInternalError(); \
} \
PyObject* VAR##_name_lower = PyObject_CallMethod(VAR##_name, "lower", NULL); \
Py_DECREF(VAR##_name); \
if (!VAR##_name_lower) { \
Py_DECREF(left); \
Py_DECREF(right); \
Py_DECREF(concat_as_str); \
throw PyInternalError(); \
} \
is_##VAR##_a_concat_call = PyObject_RichCompareBool(VAR##_name_lower, concat_as_str, Py_EQ); \
Py_DECREF(VAR##_name_lower); \
if (is_##VAR##_a_concat_call == -1) { \
Py_DECREF(left); \
Py_DECREF(right); \
Py_DECREF(concat_as_str); \
throw PyInternalError(); \
} \
}
PyObject* concat_as_str = PyUnicode_FromString("concat");
if (!concat_as_str) {
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
}
IS_NODE_A_CONCAT_CALL(left);
IS_NODE_A_CONCAT_CALL(right);
Py_DECREF(concat_as_str);
#undef IS_NODE_A_CONCAT_CALL
PyObject* args = is_left_a_concat_call ? PyObject_GetAttrString(left, "args") : Py_BuildValue("[O]", left);
if (!args) {
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
}
if (is_right_a_concat_call) {
PyObject* right_args = PyObject_GetAttrString(right, "args");
if (!right_args) {
Py_DECREF(args);
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
}
int err_indicator = X_PyList_Extend(args, right_args);
Py_DECREF(right_args);
if (err_indicator == -1) {
Py_DECREF(args);
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
}
} else {
int err_indicator = PyList_Append(args, right);
if (err_indicator == -1) {
Py_DECREF(args);
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
}
}
Py_DECREF(right);
Py_DECREF(left);
RETURN_NEW_AST_NODE("Call", "{s:s,s:N}", "name", "concat", "args", args);
} else {
Py_DECREF(right);
Py_DECREF(left);
throw ParsingError("Unsupported value of rule ColumnExprPrecedence2");
}
}
VISIT(ColumnExprPrecedence3) {
PyObject* op = NULL;
if (ctx->EQ_SINGLE() || ctx->EQ_DOUBLE()) {
op = get_ast_enum_member("CompareOperationOp", "Eq");
} else if (ctx->NOT_EQ()) {
op = get_ast_enum_member("CompareOperationOp", "NotEq");
} else if (ctx->LT()) {
op = get_ast_enum_member("CompareOperationOp", "Lt");
} else if (ctx->LT_EQ()) {
op = get_ast_enum_member("CompareOperationOp", "LtEq");
} else if (ctx->GT()) {
op = get_ast_enum_member("CompareOperationOp", "Gt");
} else if (ctx->GT_EQ()) {
op = get_ast_enum_member("CompareOperationOp", "GtEq");
} else if (ctx->LIKE()) {
if (ctx->NOT()) {
op = get_ast_enum_member("CompareOperationOp", "NotLike");
} else {
op = get_ast_enum_member("CompareOperationOp", "Like");
}
} else if (ctx->ILIKE()) {
if (ctx->NOT()) {
op = get_ast_enum_member("CompareOperationOp", "NotILike");
} else {
op = get_ast_enum_member("CompareOperationOp", "ILike");
}
} else if (ctx->REGEX_SINGLE() or ctx->REGEX_DOUBLE()) {
op = get_ast_enum_member("CompareOperationOp", "Regex");
} else if (ctx->NOT_REGEX()) {
op = get_ast_enum_member("CompareOperationOp", "NotRegex");
} else if (ctx->IREGEX_SINGLE() or ctx->IREGEX_DOUBLE()) {
op = get_ast_enum_member("CompareOperationOp", "IRegex");
} else if (ctx->NOT_IREGEX()) {
op = get_ast_enum_member("CompareOperationOp", "NotIRegex");
} else if (ctx->IN()) {
if (ctx->COHORT()) {
if (ctx->NOT()) {
op = get_ast_enum_member("CompareOperationOp", "NotInCohort");
} else {
op = get_ast_enum_member("CompareOperationOp", "InCohort");
}
} else {
if (ctx->NOT()) {
op = get_ast_enum_member("CompareOperationOp", "NotIn");
} else {
op = get_ast_enum_member("CompareOperationOp", "In");
}
}
} else {
throw ParsingError("Unsupported value of rule ColumnExprPrecedence3");
}
if (!op) throw PyInternalError();
PyObject* left;
try {
left = visitAsPyObject(ctx->left);
} catch (...) {
Py_DECREF(op);
throw;
}
PyObject* right;
try {
right = visitAsPyObject(ctx->right);
} catch (...) {
Py_DECREF(op);
Py_DECREF(left);
throw;
}
RETURN_NEW_AST_NODE("CompareOperation", "{s:N,s:N,s:N}", "left", left, "right", right, "op", op);
}
VISIT(ColumnExprInterval) {
auto interval_ctx = ctx->interval();
const char* name;
if (interval_ctx->SECOND()) {
name = "toIntervalSecond";
} else if (interval_ctx->MINUTE()) {
name = "toIntervalMinute";
} else if (interval_ctx->HOUR()) {
name = "toIntervalHour";
} else if (interval_ctx->DAY()) {
name = "toIntervalDay";
} else if (interval_ctx->WEEK()) {
name = "toIntervalWeek";
} else if (interval_ctx->MONTH()) {
name = "toIntervalMonth";
} else if (interval_ctx->QUARTER()) {
name = "toIntervalQuarter";
} else if (interval_ctx->YEAR()) {
name = "toIntervalYear";
} else {
throw ParsingError("Unsupported value of rule ColumnExprInterval");
}
RETURN_NEW_AST_NODE("Call", "{s:s,s:[N]}", "name", name, "args", visitAsPyObject(ctx->columnExpr()));
}
VISIT(ColumnExprIsNull) {
PyObject* null_constant = build_ast_node("Constant", "{s:O}", "value", Py_None);
if (!null_constant) throw PyInternalError();
PyObject* op = get_ast_enum_member("CompareOperationOp", ctx->NOT() ? "NotEq" : "Eq");
if (!op) {
Py_DECREF(null_constant);
throw PyInternalError();
}
PyObject* left;
try {
left = visitAsPyObject(ctx->columnExpr());
} catch (...) {
Py_DECREF(op);
Py_DECREF(null_constant);
throw;
}
RETURN_NEW_AST_NODE("CompareOperation", "{s:N,s:N,s:N}", "left", left, "right", null_constant, "op", op);
}
VISIT(ColumnExprTrim) {
const char* name;
if (ctx->LEADING()) {
name = "trimLeft";
} else if (ctx->TRAILING()) {
name = "trimRight";
} else if (ctx->BOTH()) {
name = "trim";
} else {
throw ParsingError("Unsupported value of rule ColumnExprTrim");
}
PyObject* expr = visitAsPyObject(ctx->columnExpr());
PyObject* value = visitAsPyObject(ctx->string());
if (!value) throw PyInternalError();
RETURN_NEW_AST_NODE("Call", "{s:s,s:[NN]}", "name", name, "args", expr, value);
}
VISIT(ColumnExprTuple) {
RETURN_NEW_AST_NODE("Tuple", "{s:N}", "exprs", visitAsPyObjectOrEmptyList(ctx->columnExprList()));
}
VISIT(ColumnExprArrayAccess) {
PyObject* property = visitAsPyObject(ctx->columnExpr(1));
PyObject* object;
try {
object = visitAsPyObject(ctx->columnExpr(0));
} catch (...) {
Py_DECREF(property);
throw;
}
RETURN_NEW_AST_NODE("ArrayAccess", "{s:N,s:N}", "array", object, "property", property);
}
VISIT(ColumnExprNullArrayAccess) {
PyObject* property = visitAsPyObject(ctx->columnExpr(1));
PyObject* object;
try {
object = visitAsPyObject(ctx->columnExpr(0));
} catch (...) {
Py_DECREF(property);
throw;
}
RETURN_NEW_AST_NODE("ArrayAccess", "{s:N,s:N,s:O}", "array", object, "property", property, "nullish", Py_True);
}
VISIT(ColumnExprPropertyAccess) {
string identifier = visitAsString(ctx->identifier());
PyObject* property = build_ast_node("Constant", "{s:s#}", "value", identifier.data(), identifier.size());
if (!property) {
throw PyInternalError();
}
PyObject* object;
try {
object = visitAsPyObject(ctx->columnExpr());
} catch (...) {
Py_DECREF(property);
throw;
}
RETURN_NEW_AST_NODE("ArrayAccess", "{s:N,s:N}", "array", object, "property", property);
}
VISIT(ColumnExprNullPropertyAccess) {
string identifier = visitAsString(ctx->identifier());
PyObject* property = build_ast_node("Constant", "{s:s#}", "value", identifier.data(), identifier.size());
if (!property) {
throw PyInternalError();
}
PyObject* object;
try {
object = visitAsPyObject(ctx->columnExpr());
} catch (...) {
Py_DECREF(property);
throw;
}
RETURN_NEW_AST_NODE("ArrayAccess", "{s:N,s:N,s:O}", "array", object, "property", property, "nullish", Py_True);
}
VISIT_UNSUPPORTED(ColumnExprBetween)
VISIT(ColumnExprParens) { return visit(ctx->columnExpr()); }
VISIT_UNSUPPORTED(ColumnExprTimestamp)
VISIT(ColumnExprAnd) {
PyObject* left = visitAsPyObject(ctx->columnExpr(0));
PyObject* right;
try {
right = visitAsPyObject(ctx->columnExpr(1));
} catch (...) {
Py_DECREF(left);
throw;
}
int is_left_an_and = is_ast_node_instance(left, "And");
if (is_left_an_and == -1) {
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
}
PyObject* exprs = is_left_an_and ? PyObject_GetAttrString(left, "exprs") : Py_BuildValue("[O]", left);
int is_right_an_and = is_ast_node_instance(right, "And");
if (is_right_an_and == -1) goto right_check_error;
if (is_right_an_and) {
PyObject* right_exprs = PyObject_GetAttrString(right, "exprs");
if (!right_exprs) goto right_check_error;
int err_indicator = X_PyList_Extend(exprs, right_exprs);
Py_DECREF(right_exprs);
if (err_indicator == -1) goto right_check_error;
} else {
int err_indicator = PyList_Append(exprs, right);
if (err_indicator == -1) goto right_check_error;
}
goto right_check_success;
right_check_error:
Py_DECREF(exprs);
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
right_check_success:
Py_DECREF(right);
Py_DECREF(left);
RETURN_NEW_AST_NODE("And", "{s:N}", "exprs", exprs);
}
VISIT(ColumnExprOr) {
PyObject* left = visitAsPyObject(ctx->columnExpr(0));
PyObject* right;
try {
right = visitAsPyObject(ctx->columnExpr(1));
} catch (...) {
Py_DECREF(left);
throw;
}
int is_left_an_or = is_ast_node_instance(left, "Or");
if (is_left_an_or == -1) {
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
}
PyObject* exprs = is_left_an_or ? PyObject_GetAttrString(left, "exprs") : Py_BuildValue("[O]", left);
int is_right_an_or = is_ast_node_instance(right, "Or");
if (is_right_an_or == -1) goto right_check_error;
if (is_right_an_or) {
PyObject* right_exprs = PyObject_GetAttrString(right, "exprs");
if (!right_exprs) goto right_check_error;
int err_indicator = X_PyList_Extend(exprs, right_exprs);
if (err_indicator == -1) goto right_check_error;
Py_DECREF(right_exprs);
} else {
int err_indicator = PyList_Append(exprs, right);
if (err_indicator == -1) goto right_check_error;
}
goto right_check_success;
right_check_error:
Py_DECREF(exprs);
Py_DECREF(left);
Py_DECREF(right);
throw PyInternalError();
right_check_success:
Py_DECREF(right);
Py_DECREF(left);
RETURN_NEW_AST_NODE("Or", "{s:N}", "exprs", exprs);
}
VISIT(ColumnExprTupleAccess) {
PyObject* index = PyLong_FromString(ctx->DECIMAL_LITERAL()->getText().c_str(), NULL, 10);
if (!index) throw PyInternalError();
PyObject* tuple;
try {
tuple = visitAsPyObject(ctx->columnExpr());
} catch (...) {
Py_DECREF(index);
throw;
}
RETURN_NEW_AST_NODE("TupleAccess", "{s:N,s:N}", "tuple", tuple, "index", index);
}
VISIT(ColumnExprNullTupleAccess) {
PyObject* index = PyLong_FromString(ctx->DECIMAL_LITERAL()->getText().c_str(), NULL, 10);
if (!index) throw PyInternalError();
PyObject* tuple;
try {
tuple = visitAsPyObject(ctx->columnExpr());
} catch (...) {
Py_DECREF(index);
throw;
}
RETURN_NEW_AST_NODE("TupleAccess", "{s:N,s:N,s:O}", "tuple", tuple, "index", index, "nullish", Py_True);
}
VISIT(ColumnExprCase) {
auto column_expr_ctx = ctx->columnExpr();
size_t columns_size = column_expr_ctx.size();
PyObject* columns = visitPyListOfObjects(column_expr_ctx);
if (ctx->caseExpr) {
PyObject *arg_0 = NULL, *arg_1 = NULL, *arg_2 = NULL, *arg_3 = NULL, *args = NULL;
PyObject* temp_expr_lists[2] = {NULL, NULL};
arg_0 = PyList_GET_ITEM(columns, 0);
arg_1 = build_ast_node("Array", "{s:[]}", "exprs");
if (!arg_1) goto error;
arg_2 = build_ast_node("Array", "{s:[]}", "exprs");
if (!arg_2) goto error;
arg_3 = PyList_GET_ITEM(columns, columns_size - 1);
args = Py_BuildValue("[ONNO]", arg_0, arg_1, arg_2, arg_3);
if (!args) goto error;
temp_expr_lists[0] = PyObject_GetAttrString(arg_1, "exprs");
if (!temp_expr_lists[0]) goto error;
temp_expr_lists[1] = PyObject_GetAttrString(arg_2, "exprs");
if (!temp_expr_lists[1]) goto error;
for (size_t index = 1; index < columns_size - 1; index++) {
PyObject* item = PyList_GetItem(columns, index);
if (!item) goto error;
int err_indicator = PyList_Append(temp_expr_lists[(index - 1) % 2], item);
if (err_indicator == -1) goto error;
}
Py_DECREF(temp_expr_lists[1]);
Py_DECREF(temp_expr_lists[0]);
Py_DECREF(columns);
goto success;
error:
Py_XDECREF(temp_expr_lists[1]);
Py_XDECREF(temp_expr_lists[0]);
Py_XDECREF(args);
Py_XDECREF(arg_2);
Py_XDECREF(arg_1);
Py_XDECREF(columns);
throw PyInternalError();
success:
RETURN_NEW_AST_NODE("Call", "{s:s,s:N}", "name", "transform", "args", args);
} else {
RETURN_NEW_AST_NODE("Call", "{s:s,s:N}", "name", columns_size == 3 ? "if" : "multiIf", "args", columns);
}
}
VISIT_UNSUPPORTED(ColumnExprDate)
VISIT(ColumnExprNot) { RETURN_NEW_AST_NODE("Not", "{s:N}", "expr", visitAsPyObject(ctx->columnExpr())); }
VISIT(ColumnExprWinFunctionTarget) {
auto column_expr_list_ctx = ctx->columnExprs;
string name = visitAsString(ctx->identifier(0));
string over_identifier = visitAsString(ctx->identifier(1));
PyObject* exprs = visitAsPyObjectOrEmptyList(column_expr_list_ctx);
PyObject* args;
try {
args = visitAsPyObjectOrEmptyList(ctx->columnArgList);
} catch (...) {
Py_DECREF(exprs);
throw;
}
RETURN_NEW_AST_NODE(
"WindowFunction", "{s:s#,s:N,s:N,s:s#}", "name", name.data(), name.size(), "exprs", exprs, "args", args,
"over_identifier", over_identifier.data(), over_identifier.size()
);
}
VISIT(ColumnExprWinFunction) {
string identifier = visitAsString(ctx->identifier());
auto column_expr_list_ctx = ctx->columnExprs;
PyObject* exprs = visitAsPyObjectOrEmptyList(column_expr_list_ctx);
PyObject* args;
try {
args = visitAsPyObjectOrEmptyList(ctx->columnArgList);
} catch (...) {
Py_DECREF(exprs);
throw;
}
PyObject* over_expr;
try {
over_expr = visitAsPyObjectOrNone(ctx->windowExpr());
} catch (...) {
Py_DECREF(exprs);
Py_DECREF(args);
throw;
}
RETURN_NEW_AST_NODE(
"WindowFunction", "{s:s#,s:N,s:N,s:N}", "name", identifier.data(), identifier.size(), "exprs", exprs,
"args", args, "over_expr", over_expr
);
}
VISIT(ColumnExprIdentifier) { return visit(ctx->columnIdentifier()); }
VISIT(ColumnExprFunction) {
string name = visitAsString(ctx->identifier());
// if two LPARENs ()(), make sure the first one is at least an empty list
PyObject* params;
if (ctx->LPAREN(1)) {
params = visitAsPyObjectOrEmptyList(ctx->columnExprs);
} else {
params = visitAsPyObjectOrNone(ctx->columnExprs);
}
PyObject* args;
try {
args = visitAsPyObjectOrEmptyList(ctx->columnArgList);
} catch (...) {
Py_DECREF(params);
throw;
}
RETURN_NEW_AST_NODE(
"Call", "{s:s#,s:N,s:N,s:O}", "name", name.data(), name.size(), "params", params, "args", args, "distinct",
ctx->DISTINCT() ? Py_True : Py_False
);
}
VISIT(ColumnExprAsterisk) {
auto table_identifier_ctx = ctx->tableIdentifier();
if (table_identifier_ctx) {
vector<string> table = any_cast<vector<string>>(visit(table_identifier_ctx));
table.push_back("*");
RETURN_NEW_AST_NODE("Field", "{s:N}", "chain", X_PyList_FromStrings(table));
}
RETURN_NEW_AST_NODE("Field", "{s:[s]}", "chain", "*");
}
VISIT(ColumnExprTagElement) { return visit(ctx->hogqlxTagElement()); }
VISIT(ColumnLambdaExpr) {
PyObject* expr;
auto column_expr_ctx = ctx->columnExpr();
auto block_ctx = ctx->block();
if (!column_expr_ctx && !block_ctx) {
throw ParsingError("ColumnLambdaExpr must have either a columnExpr or a block");
}
if (column_expr_ctx) {
expr = visitAsPyObject(column_expr_ctx);
} else {
expr = visitAsPyObject(block_ctx);
}
PyObject* args;
try {
args = X_PyList_FromStrings(visitAsVectorOfStrings(ctx->identifier()));
} catch (...) {
Py_DECREF(expr);
throw;
}
RETURN_NEW_AST_NODE("Lambda", "{s:N,s:N}", "args", args, "expr", expr);
}
VISIT(WithExprList) {
PyObject* ctes = PyDict_New();
if (!ctes) throw PyInternalError();
for (auto with_expr_ctx : ctx->withExpr()) {
PyObject* cte;
try {
cte = visitAsPyObject(with_expr_ctx);
} catch (...) {
Py_DECREF(ctes);
throw;
}
PyObject* name = PyObject_GetAttrString(cte, "name");
if (!name) {
Py_DECREF(cte);
Py_DECREF(ctes);
throw PyInternalError();
}
int err_indicator = PyDict_SetItem(ctes, name, cte);
if (err_indicator == -1) {
Py_DECREF(name);
Py_DECREF(cte);
Py_DECREF(ctes);
throw PyInternalError();
}
Py_DECREF(name);
Py_DECREF(cte);
}
return ctes;
}
VISIT(WithExprSubquery) {
string name = visitAsString(ctx->identifier());
RETURN_NEW_AST_NODE(
"CTE", "{s:s#,s:N,s:s}", "name", name.data(), name.size(), "expr", visitAsPyObject(ctx->selectSetStmt()),
"cte_type", "subquery"
);
}
VISIT(WithExprColumn) {
string name = visitAsString(ctx->identifier());
PyObject* expr = visitAsPyObject(ctx->columnExpr());
RETURN_NEW_AST_NODE("CTE", "{s:s#,s:N,s:s}", "name", name.data(), name.size(), "expr", expr, "cte_type", "column");
}
VISIT(ColumnIdentifier) {
auto placeholder_ctx = ctx->placeholder();
if (placeholder_ctx) {
return visitAsPyObject(placeholder_ctx);
}
auto table_identifier_ctx = ctx->tableIdentifier();
auto nested_identifier_ctx = ctx->nestedIdentifier();
vector<string> table =
table_identifier_ctx ? any_cast<vector<string>>(visit(table_identifier_ctx)) : vector<string>();
vector<string> nested =
nested_identifier_ctx ? any_cast<vector<string>>(visit(nested_identifier_ctx)) : vector<string>();
if (table.size() == 0 && nested.size() > 0) {
string text = ctx->getText();
boost::algorithm::to_lower(text);
if (!text.compare("true")) {
RETURN_NEW_AST_NODE("Constant", "{s:O}", "value", Py_True);
}
if (!text.compare("false")) {
RETURN_NEW_AST_NODE("Constant", "{s:O}", "value", Py_False);
}
RETURN_NEW_AST_NODE("Field", "{s:N}", "chain", X_PyList_FromStrings(nested));
}
vector<string> table_plus_nested = table;
table_plus_nested.insert(table_plus_nested.end(), nested.begin(), nested.end());
RETURN_NEW_AST_NODE("Field", "{s:N}", "chain", X_PyList_FromStrings(table_plus_nested));
}
VISIT(NestedIdentifier) { return visitAsVectorOfStrings(ctx->identifier()); }
VISIT(TableExprIdentifier) {
vector<string> chain = any_cast<vector<string>>(visit(ctx->tableIdentifier()));
RETURN_NEW_AST_NODE("Field", "{s:N}", "chain", X_PyList_FromStrings(chain));
}
VISIT(TableExprSubquery) { return visit(ctx->selectSetStmt()); }
VISIT(TableExprPlaceholder) { return visitAsPyObject(ctx->placeholder()); }
VISIT(TableExprAlias) {
auto alias_ctx = ctx->alias();
string alias = any_cast<string>(alias_ctx ? visit(alias_ctx) : visit(ctx->identifier()));
if (find(RESERVED_KEYWORDS.begin(), RESERVED_KEYWORDS.end(), boost::algorithm::to_lower_copy(alias)) !=
RESERVED_KEYWORDS.end()) {
throw SyntaxError("ALIAS is a reserved keyword");
}
PyObject* py_alias = PyUnicode_FromStringAndSize(alias.data(), alias.size());
if (!py_alias) throw PyInternalError();
PyObject* table;
try {
table = visitAsPyObject(ctx->tableExpr());
} catch (...) {
Py_DECREF(py_alias);
throw;
}
int is_table_a_join_expr = is_ast_node_instance(table, "JoinExpr");
if (is_table_a_join_expr == -1) {
Py_DECREF(py_alias);
throw PyInternalError();
}
if (is_table_a_join_expr) {
int err_indicator = PyObject_SetAttrString(table, "alias", py_alias);
Py_DECREF(py_alias);
if (err_indicator == -1) {
Py_DECREF(table);
throw PyInternalError();
}
return table;
}
RETURN_NEW_AST_NODE("JoinExpr", "{s:N,s:N}", "table", table, "alias", py_alias);
}
VISIT(TableExprFunction) { return visit(ctx->tableFunctionExpr()); }
VISIT(TableExprTag) { return visit(ctx->hogqlxTagElement()); }
VISIT(TableFunctionExpr) {
string table_name = visitAsString(ctx->identifier());
auto table_args_ctx = ctx->tableArgList();
PyObject* table_args = table_args_ctx ? visitAsPyObject(table_args_ctx) : PyList_New(0);
if (!table_args) throw PyInternalError();
PyObject* table = build_ast_node("Field", "{s:[s#]}", "chain", table_name.data(), table_name.size());
if (!table) {
Py_DECREF(table_args);
throw PyInternalError();
}
RETURN_NEW_AST_NODE("JoinExpr", "{s:N,s:N}", "table", table, "table_args", table_args);
}
VISIT(TableIdentifier) {
string text = visitAsString(ctx->identifier());
auto database_identifier_ctx = ctx->databaseIdentifier();
if (database_identifier_ctx) {
return vector<string>{visitAsString(database_identifier_ctx), text};
}
return vector<string>{text};
}
VISIT(TableArgList) { return visitPyListOfObjects(ctx->columnExpr()); }
VISIT(DatabaseIdentifier) { return visit(ctx->identifier()); }
VISIT_UNSUPPORTED(FloatingLiteral)
VISIT(NumberLiteral) {
string text = ctx->getText();
boost::algorithm::to_lower(text);
if (text.find(".") != string::npos || text.find("e") != string::npos || !text.compare("-inf") ||
!text.compare("inf") || !text.compare("nan")) {
PyObject* py_text = PyUnicode_FromStringAndSize(text.data(), text.size());
if (!py_text) throw PyInternalError();
PyObject* value = PyFloat_FromString(py_text);
Py_DECREF(py_text);
if (!value) throw PyInternalError();
RETURN_NEW_AST_NODE("Constant", "{s:N}", "value", value);
} else {
PyObject* value = PyLong_FromString(text.c_str(), NULL, 10);
if (!value) throw PyInternalError();
RETURN_NEW_AST_NODE("Constant", "{s:N}", "value", value);
}
}
VISIT(Literal) {
if (ctx->NULL_SQL()) {
RETURN_NEW_AST_NODE("Constant", "{s:O}", "value", Py_None);
}
auto string_literal_terminal = ctx->STRING_LITERAL();
if (string_literal_terminal) {
string text = parse_string_literal_ctx(string_literal_terminal);
RETURN_NEW_AST_NODE("Constant", "{s:s#}", "value", text.data(), text.size());
}
return visitChildren(ctx);
}
VISIT_UNSUPPORTED(Interval)
VISIT_UNSUPPORTED(Keyword)
VISIT_UNSUPPORTED(KeywordForAlias)
VISIT(Alias) {
string text = ctx->getText();
if (text.size() >= 2) {
char first_char = text.front();
char last_char = text.back();
if ((first_char == '`' && last_char == '`') || (first_char == '"' && last_char == '"')) {
return parse_string_literal_text(text);
}
}
return text;
}
VISIT(Identifier) {
string text = ctx->getText();
if (text.size() >= 2) {
char first_char = text.front();
char last_char = text.back();
if ((first_char == '`' && last_char == '`') || (first_char == '"' && last_char == '"')) {
return parse_string_literal_text(text);
}
}
return text;
}
VISIT(HogqlxTagAttribute) {
string name = visitAsString(ctx->identifier());
auto column_expr_ctx = ctx->columnExpr();
if (column_expr_ctx) {
RETURN_NEW_AST_NODE(
"HogQLXAttribute", "{s:s#,s:N}", "name", name.data(), name.size(), "value", visitAsPyObject(column_expr_ctx)
);
}
auto string_ctx = ctx->string();
if (string_ctx) {
PyObject* value = visitAsPyObject(string_ctx);
if (!value) throw PyInternalError();
RETURN_NEW_AST_NODE("HogQLXAttribute", "{s:s#,s:N}", "name", name.data(), name.size(), "value", value);
}
PyObject* value = build_ast_node("Constant", "{s:O}", "value", Py_True);
if (!value) throw PyInternalError();
RETURN_NEW_AST_NODE("HogQLXAttribute", "{s:s#,s:N}", "name", name.data(), name.size(), "value", value);
}
VISIT(HogqlxTagElementClosed) {
string kind = visitAsString(ctx->identifier());
RETURN_NEW_AST_NODE(
"HogQLXTag", "{s:s#,s:N}", "kind", kind.data(), kind.size(), "attributes",
visitPyListOfObjects(ctx->hogqlxTagAttribute())
);
}
VISIT(HogqlxTagElementNested) {
string opening = visitAsString(ctx->identifier(0));
string closing = visitAsString(ctx->identifier(1));
if (opening != closing) {
throw SyntaxError("Opening and closing HogQLX tags must match. Got " + opening + " and " + closing);
}
auto tag_element_ctx = ctx->hogqlxTagElement();
auto column_expr_ctx = ctx->columnExpr();
auto tag_attribute_ctx = ctx->hogqlxTagAttribute();
PyObject* attributes = PyList_New(tag_attribute_ctx.size() + (tag_element_ctx || column_expr_ctx ? 1 : 0));
if (!attributes) throw PyInternalError();
bool found_source = false;
for (size_t i = 0; i < tag_attribute_ctx.size(); i++) {
PyObject* object;
try {
object = visitAsPyObject(tag_attribute_ctx[i]);
} catch (...) {
Py_DECREF(attributes);
throw;
}
PyList_SET_ITEM(attributes, i, object);
PyObject* name = PyObject_GetAttrString(object, "name");
if (!name) {
Py_DECREF(attributes);
throw PyInternalError();
}
PyObject* source_as_str = PyUnicode_FromString("source");
if (!source_as_str) {
Py_DECREF(name);
Py_DECREF(attributes);
throw PyInternalError();
}
int tentative_found_source = PyObject_RichCompareBool(name, source_as_str, Py_EQ);
Py_DECREF(source_as_str);
Py_DECREF(name);
if (tentative_found_source == -1) {
Py_DECREF(attributes);
throw PyInternalError();
}
if (tentative_found_source) {
found_source = true;
}
}
if (tag_element_ctx) {
if (found_source) {
Py_DECREF(attributes);
throw SyntaxError("Nested HogQLX tags cannot have a source attribute");
}
PyObject* source_attribute = build_ast_node(
"HogQLXAttribute", "{s:s#,s:N}", "name", "source", 6, "value", visitAsPyObject(ctx->hogqlxTagElement())
);
if (!source_attribute) {
Py_DECREF(attributes);
throw PyInternalError();
}
PyList_SET_ITEM(attributes, tag_attribute_ctx.size(), source_attribute);
} else if (column_expr_ctx) {
if (found_source) {
Py_DECREF(attributes);
throw SyntaxError("Nested HogQLX tags cannot have a source attribute");
}
PyObject* source_attribute = build_ast_node(
"HogQLXAttribute", "{s:s#,s:N}", "name", "source", 6, "value", visitAsPyObject(ctx->columnExpr())
);
if (!source_attribute) {
Py_DECREF(attributes);
throw PyInternalError();
}
PyList_SET_ITEM(attributes, tag_attribute_ctx.size(), source_attribute);
}
RETURN_NEW_AST_NODE("HogQLXTag", "{s:s#,s:N}", "kind", opening.data(), opening.size(), "attributes", attributes);
}
VISIT(Placeholder) {
RETURN_NEW_AST_NODE("Placeholder", "{s:N}", "expr", visitAsPyObject(ctx->columnExpr()));
}
VISIT_UNSUPPORTED(EnumValue)
VISIT(ColumnExprNullish) {
PyObject* value = visitAsPyObject(ctx->columnExpr(0));
PyObject* fallback;
try {
fallback = visitAsPyObject(ctx->columnExpr(1));
} catch (...) {
Py_DECREF(value);
throw;
}
RETURN_NEW_AST_NODE("Call", "{s:s, s:[NN]}", "name", "ifNull", "args", value, fallback);
}
VISIT(ColumnExprCall) {
PyObject* expr = visitAsPyObject(ctx->columnExpr());
PyObject* args;
try {
args = visitAsPyObjectOrEmptyList(ctx->columnExprList());
} catch (...) {
Py_DECREF(expr);
throw;
}
RETURN_NEW_AST_NODE("ExprCall", "{s:N, s:N}", "expr", expr, "args", args);
}
VISIT(ColumnExprTemplateString) { return visit(ctx->templateString()); }
VISIT(String) {
auto string_literal = ctx->STRING_LITERAL();
if (string_literal) {
string text = parse_string_literal_ctx(string_literal);
RETURN_NEW_AST_NODE("Constant", "{s:s#}", "value", text.data(), text.size());
}
return visit(ctx->templateString());
}
VISIT(TemplateString) {
auto string_contents = ctx->stringContents();
if (string_contents.size() == 0) {
string empty = "";
RETURN_NEW_AST_NODE("Constant", "{s:s}", "value", "");
}
if (string_contents.size() == 1) {
return visit(string_contents[0]);
}
PyObject* args = visitPyListOfObjects(string_contents);
if (!args) throw PyInternalError();
RETURN_NEW_AST_NODE("Call", "{s:s,s:N}", "name", "concat", "args", args);
}
VISIT(FullTemplateString) {
auto string_contents_full = ctx->stringContentsFull();
if (string_contents_full.size() == 0) {
string empty = "";
RETURN_NEW_AST_NODE("Constant", "{s:s}", "value", "");
}
if (string_contents_full.size() == 1) {
return visit(string_contents_full[0]);
}
PyObject* args = visitPyListOfObjects(string_contents_full);
if (!args) throw PyInternalError();
RETURN_NEW_AST_NODE("Call", "{s:s,s:N}", "name", "concat", "args", args);
}
VISIT(StringContents) {
auto string_text = ctx->STRING_TEXT();
if (string_text) {
string text = parse_string_text_ctx(string_text, true);
RETURN_NEW_AST_NODE("Constant", "{s:s#}", "value", text.data(), text.size());
}
auto column_expr = ctx->columnExpr();
if (column_expr) {
return visit(column_expr);
}
string empty = "";
RETURN_NEW_AST_NODE("Constant", "{s:s}", "value", "");
}
VISIT(StringContentsFull) {
auto full_string_text = ctx->FULL_STRING_TEXT();
if (full_string_text) {
string text = parse_string_text_ctx(full_string_text, false);
RETURN_NEW_AST_NODE("Constant", "{s:s#}", "value", text.data(), text.size());
}
auto column_expr = ctx->columnExpr();
if (column_expr) {
return visit(column_expr);
}
string empty = "";
RETURN_NEW_AST_NODE("Constant", "{s:s}", "value", "");
}
};
class HogQLErrorListener : public antlr4::BaseErrorListener {
public:
string input;
HogQLErrorListener(string input) : input(input) {}
void syntaxError(
antlr4::Recognizer* recognizer,
antlr4::Token* offendingSymbol,
size_t line,
size_t charPositionInLine,
const string& msg,
exception_ptr e
) override {
size_t start = getPosition(line, charPositionInLine);
if (start == string::npos) {
start = 0;
}
throw SyntaxError(msg, start, input.size());
}
private:
size_t getPosition(size_t line, size_t column) {
size_t linePosition = 0;
for (size_t i = 0; i < line - 1; i++) {
size_t endOfLine = input.find("\n", linePosition);
if (endOfLine == string::npos) {
return string::npos;
}
linePosition = endOfLine + 1;
}
return linePosition + column;
}
};
// MODULE STATE
parser_state* get_module_state(PyObject* module) {
return static_cast<parser_state*>(PyModule_GetState(module));
}
// MODULE METHODS
#define METHOD_PARSE_NODE(PASCAL_CASE, CAMEL_CASE, SNAKE_CASE) \
static PyObject* method_parse_##SNAKE_CASE(PyObject* self, PyObject* args, PyObject* kwargs) { \
parser_state* state = get_module_state(self); \
const char* str; \
int internal = 0; \
static const char* kwlist[] = {"input", "is_internal", NULL}; \
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|p", (char**)kwlist, &str, &internal)) { \
return NULL; \
} \
auto input_stream = new antlr4::ANTLRInputStream(str, strnlen(str, 65536)); \
auto lexer = new HogQLLexer(input_stream); \
auto stream = new antlr4::CommonTokenStream(lexer); \
auto parser = new HogQLParser(stream); \
parser->removeErrorListeners(); \
auto error_listener = new HogQLErrorListener(str); \
parser->addErrorListener(error_listener); \
HogQLParser::PASCAL_CASE##Context* parse_tree; \
try { \
parse_tree = parser->CAMEL_CASE(); \
} catch HANDLE_HOGQL_ERROR(SyntaxError, delete error_listener; delete parser; delete stream; delete lexer; \
delete input_stream;) \
catch (const antlr4::EmptyStackException &e) { \
delete error_listener; delete parser; delete stream; delete lexer; delete input_stream; \
PyObject* error_type = PyObject_GetAttrString(state->errors_module, "SyntaxError"); \
if (error_type) { \
PyErr_SetString(error_type, "Unmatched curly bracket"); \
} \
return NULL; \
} catch (...) { \
delete error_listener; delete parser; delete stream; delete lexer; delete input_stream; \
PyObject* error_type = PyObject_GetAttrString(state->errors_module, "ParsingError"); \
if (error_type) { \
PyErr_SetString(error_type, "Unexpected Antlr exception in C++ parser"); \
} \
return NULL; \
}; \
HogQLParseTreeConverter converter = HogQLParseTreeConverter(state, internal == 1); \
PyObject* result_node = converter.visitAsPyObjectFinal(parse_tree); \
delete error_listener; \
delete parser; \
delete stream; \
delete lexer; \
delete input_stream; \
return result_node; \
}
METHOD_PARSE_NODE(Expr, expr, expr)
METHOD_PARSE_NODE(OrderExpr, orderExpr, order_expr)
METHOD_PARSE_NODE(Select, select, select)
METHOD_PARSE_NODE(FullTemplateString, fullTemplateString, full_template_string)
METHOD_PARSE_NODE(Program, program, program)
#undef METHOD_PARSE_NODE
static PyObject* method_parse_string_literal_text(PyObject* self, PyObject* args) {
parser_state* state = get_module_state(self);
const char* str;
if (!PyArg_ParseTuple(args, "s", &str)) {
return NULL;
}
string unquoted_string;
try {
unquoted_string = parse_string_literal_text(str);
} catch HANDLE_HOGQL_ERROR(SyntaxError, );
return PyUnicode_FromStringAndSize(unquoted_string.data(), unquoted_string.size());
}
// MODULE SETUP
static PyMethodDef parser_methods[] = {
{.ml_name = "parse_expr",
.ml_meth = (PyCFunction)method_parse_expr,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Parse the HogQL expression string into an AST"},
{.ml_name = "parse_order_expr",
.ml_meth = (PyCFunction)method_parse_order_expr,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Parse the ORDER BY clause string into an AST"},
{.ml_name = "parse_select",
.ml_meth = (PyCFunction)method_parse_select,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Parse the HogQL SELECT statement string into an AST"},
{.ml_name = "parse_full_template_string",
.ml_meth = (PyCFunction)method_parse_full_template_string,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Parse a Hog template string into an AST"},
{.ml_name = "parse_program",
.ml_meth = (PyCFunction)method_parse_program,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Parse a Hog program into an AST"},
{.ml_name = "parse_string_literal_text",
.ml_meth = method_parse_string_literal_text,
.ml_flags = METH_VARARGS,
.ml_doc = "Unquote the string (an identifier or a string literal))"},
{NULL, NULL, 0, NULL}
};
static int parser_modexec(PyObject* module) {
parser_state* state = get_module_state(module);
state->ast_module = PyImport_ImportModule("posthog.hogql.ast");
if (!state->ast_module) {
return -1;
}
state->base_module = PyImport_ImportModule("posthog.hogql.base");
if (!state->base_module) {
return -1;
}
state->errors_module = PyImport_ImportModule("posthog.hogql.errors");
if (!state->errors_module) {
return -1;
}
return 0;
}
static PyModuleDef_Slot parser_slots[] = {
{Py_mod_exec, (void*)parser_modexec}, // If Python were written in C++, then Py_mod_exec would be typed better, but
// because it's in C, it expects a void pointer
{0, NULL}
};
static int parser_traverse(PyObject* module, visitproc visit, void* arg) {
parser_state* state = get_module_state(module);
Py_VISIT(state->ast_module);
Py_VISIT(state->base_module);
Py_VISIT(state->errors_module);
return 0;
}
static int parser_clear(PyObject* module) {
parser_state* state = get_module_state(module);
Py_CLEAR(state->ast_module);
Py_CLEAR(state->base_module);
Py_CLEAR(state->errors_module);
return 0;
}
static struct PyModuleDef parser = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "hogql_parser",
.m_doc = "HogQL parsing",
.m_size = sizeof(parser_state),
.m_methods = parser_methods,
.m_slots = parser_slots,
.m_traverse = parser_traverse,
.m_clear = parser_clear,
};
PyMODINIT_FUNC PyInit_hogql_parser(void) {
return PyModuleDef_Init(&parser);
}