mirror of
https://github.com/python/cpython.git
synced 2024-11-22 13:28:21 +01:00
01481f2dc1
* The lexer, which include the actual lexeme producing logic, goes into the `lexer` directory. * The wrappers, one wrapper per input mode (file, string, utf-8, and readline), go into the `tokenizer` directory and include logic for creating a lexer instance and managing the buffer for different modes. --------- Co-authored-by: Pablo Galindo <pablogsal@gmail.com> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
150 lines
4.0 KiB
C
150 lines
4.0 KiB
C
#include "Python.h"
|
|
#include "pycore_pystate.h"
|
|
#include "pycore_token.h"
|
|
#include "errcode.h"
|
|
|
|
#include "state.h"
|
|
|
|
/* Never change this */
|
|
#define TABSIZE 8
|
|
|
|
/* Create and initialize a new tok_state structure */
|
|
struct tok_state *
|
|
_PyTokenizer_tok_new(void)
|
|
{
|
|
struct tok_state *tok = (struct tok_state *)PyMem_Malloc(
|
|
sizeof(struct tok_state));
|
|
if (tok == NULL)
|
|
return NULL;
|
|
tok->buf = tok->cur = tok->inp = NULL;
|
|
tok->fp_interactive = 0;
|
|
tok->interactive_src_start = NULL;
|
|
tok->interactive_src_end = NULL;
|
|
tok->start = NULL;
|
|
tok->end = NULL;
|
|
tok->done = E_OK;
|
|
tok->fp = NULL;
|
|
tok->input = NULL;
|
|
tok->tabsize = TABSIZE;
|
|
tok->indent = 0;
|
|
tok->indstack[0] = 0;
|
|
tok->atbol = 1;
|
|
tok->pendin = 0;
|
|
tok->prompt = tok->nextprompt = NULL;
|
|
tok->lineno = 0;
|
|
tok->starting_col_offset = -1;
|
|
tok->col_offset = -1;
|
|
tok->level = 0;
|
|
tok->altindstack[0] = 0;
|
|
tok->decoding_state = STATE_INIT;
|
|
tok->decoding_erred = 0;
|
|
tok->enc = NULL;
|
|
tok->encoding = NULL;
|
|
tok->cont_line = 0;
|
|
tok->filename = NULL;
|
|
tok->decoding_readline = NULL;
|
|
tok->decoding_buffer = NULL;
|
|
tok->readline = NULL;
|
|
tok->type_comments = 0;
|
|
tok->interactive_underflow = IUNDERFLOW_NORMAL;
|
|
tok->underflow = NULL;
|
|
tok->str = NULL;
|
|
tok->report_warnings = 1;
|
|
tok->tok_extra_tokens = 0;
|
|
tok->comment_newline = 0;
|
|
tok->implicit_newline = 0;
|
|
tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0};
|
|
tok->tok_mode_stack_index = 0;
|
|
#ifdef Py_DEBUG
|
|
tok->debug = _Py_GetConfig()->parser_debug;
|
|
#endif
|
|
return tok;
|
|
}
|
|
|
|
static void
|
|
free_fstring_expressions(struct tok_state *tok)
|
|
{
|
|
int index;
|
|
tokenizer_mode *mode;
|
|
|
|
for (index = tok->tok_mode_stack_index; index >= 0; --index) {
|
|
mode = &(tok->tok_mode_stack[index]);
|
|
if (mode->last_expr_buffer != NULL) {
|
|
PyMem_Free(mode->last_expr_buffer);
|
|
mode->last_expr_buffer = NULL;
|
|
mode->last_expr_size = 0;
|
|
mode->last_expr_end = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Free a tok_state structure */
|
|
void
|
|
_PyTokenizer_Free(struct tok_state *tok)
|
|
{
|
|
if (tok->encoding != NULL) {
|
|
PyMem_Free(tok->encoding);
|
|
}
|
|
Py_XDECREF(tok->decoding_readline);
|
|
Py_XDECREF(tok->decoding_buffer);
|
|
Py_XDECREF(tok->readline);
|
|
Py_XDECREF(tok->filename);
|
|
if ((tok->readline != NULL || tok->fp != NULL ) && tok->buf != NULL) {
|
|
PyMem_Free(tok->buf);
|
|
}
|
|
if (tok->input) {
|
|
PyMem_Free(tok->input);
|
|
}
|
|
if (tok->interactive_src_start != NULL) {
|
|
PyMem_Free(tok->interactive_src_start);
|
|
}
|
|
free_fstring_expressions(tok);
|
|
PyMem_Free(tok);
|
|
}
|
|
|
|
void
|
|
_PyToken_Free(struct token *token) {
|
|
Py_XDECREF(token->metadata);
|
|
}
|
|
|
|
void
|
|
_PyToken_Init(struct token *token) {
|
|
token->metadata = NULL;
|
|
}
|
|
|
|
int
|
|
_PyLexer_type_comment_token_setup(struct tok_state *tok, struct token *token, int type, int col_offset,
|
|
int end_col_offset, const char *start, const char *end)
|
|
{
|
|
token->level = tok->level;
|
|
token->lineno = token->end_lineno = tok->lineno;
|
|
token->col_offset = col_offset;
|
|
token->end_col_offset = end_col_offset;
|
|
token->start = start;
|
|
token->end = end;
|
|
return type;
|
|
}
|
|
|
|
int
|
|
_PyLexer_token_setup(struct tok_state *tok, struct token *token, int type, const char *start, const char *end)
|
|
{
|
|
assert((start == NULL && end == NULL) || (start != NULL && end != NULL));
|
|
token->level = tok->level;
|
|
if (ISSTRINGLIT(type)) {
|
|
token->lineno = tok->first_lineno;
|
|
}
|
|
else {
|
|
token->lineno = tok->lineno;
|
|
}
|
|
token->end_lineno = tok->lineno;
|
|
token->col_offset = token->end_col_offset = -1;
|
|
token->start = start;
|
|
token->end = end;
|
|
|
|
if (start != NULL && end != NULL) {
|
|
token->col_offset = tok->starting_col_offset;
|
|
token->end_col_offset = tok->col_offset;
|
|
}
|
|
return type;
|
|
}
|