mirror of
https://github.com/python/cpython.git
synced 2024-11-22 05:26:10 +01:00
01481f2dc1
* The lexer, which include the actual lexeme producing logic, goes into the `lexer` directory. * The wrappers, one wrapper per input mode (file, string, utf-8, and readline), go into the `tokenizer` directory and include logic for creating a lexer instance and managing the buffer for different modes. --------- Co-authored-by: Pablo Galindo <pablogsal@gmail.com> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
135 lines
3.8 KiB
C
135 lines
3.8 KiB
C
#include "Python.h"
|
|
#include "errcode.h"
|
|
|
|
#include "helpers.h"
|
|
#include "../lexer/lexer.h"
|
|
#include "../lexer/state.h"
|
|
#include "../lexer/buffer.h"
|
|
|
|
static int
|
|
tok_readline_string(struct tok_state* tok) {
|
|
PyObject* line = NULL;
|
|
PyObject* raw_line = PyObject_CallNoArgs(tok->readline);
|
|
if (raw_line == NULL) {
|
|
if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
|
|
PyErr_Clear();
|
|
return 1;
|
|
}
|
|
_PyTokenizer_error_ret(tok);
|
|
goto error;
|
|
}
|
|
if(tok->encoding != NULL) {
|
|
if (!PyBytes_Check(raw_line)) {
|
|
PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object");
|
|
_PyTokenizer_error_ret(tok);
|
|
goto error;
|
|
}
|
|
line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line),
|
|
tok->encoding, "replace");
|
|
Py_CLEAR(raw_line);
|
|
if (line == NULL) {
|
|
_PyTokenizer_error_ret(tok);
|
|
goto error;
|
|
}
|
|
} else {
|
|
if(!PyUnicode_Check(raw_line)) {
|
|
PyErr_Format(PyExc_TypeError, "readline() returned a non-string object");
|
|
_PyTokenizer_error_ret(tok);
|
|
goto error;
|
|
}
|
|
line = raw_line;
|
|
raw_line = NULL;
|
|
}
|
|
Py_ssize_t buflen;
|
|
const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen);
|
|
if (buf == NULL) {
|
|
_PyTokenizer_error_ret(tok);
|
|
goto error;
|
|
}
|
|
|
|
// Make room for the null terminator *and* potentially
|
|
// an extra newline character that we may need to artificially
|
|
// add.
|
|
size_t buffer_size = buflen + 2;
|
|
if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
|
|
goto error;
|
|
}
|
|
memcpy(tok->inp, buf, buflen);
|
|
tok->inp += buflen;
|
|
*tok->inp = '\0';
|
|
|
|
tok->line_start = tok->cur;
|
|
Py_DECREF(line);
|
|
return 1;
|
|
error:
|
|
Py_XDECREF(raw_line);
|
|
Py_XDECREF(line);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
tok_underflow_readline(struct tok_state* tok) {
|
|
assert(tok->decoding_state == STATE_NORMAL);
|
|
assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL);
|
|
if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
|
|
tok->cur = tok->inp = tok->buf;
|
|
}
|
|
if (!tok_readline_string(tok)) {
|
|
return 0;
|
|
}
|
|
if (tok->inp == tok->cur) {
|
|
tok->done = E_EOF;
|
|
return 0;
|
|
}
|
|
tok->implicit_newline = 0;
|
|
if (tok->inp[-1] != '\n') {
|
|
assert(tok->inp + 1 < tok->end);
|
|
/* Last line does not end in \n, fake one */
|
|
*tok->inp++ = '\n';
|
|
*tok->inp = '\0';
|
|
tok->implicit_newline = 1;
|
|
}
|
|
|
|
if (tok->tok_mode_stack_index && !_PyLexer_update_fstring_expr(tok, 0)) {
|
|
return 0;
|
|
}
|
|
|
|
ADVANCE_LINENO();
|
|
/* The default encoding is UTF-8, so make sure we don't have any
|
|
non-UTF-8 sequences in it. */
|
|
if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok)) {
|
|
_PyTokenizer_error_ret(tok);
|
|
return 0;
|
|
}
|
|
assert(tok->done == E_OK);
|
|
return tok->done == E_OK;
|
|
}
|
|
|
|
struct tok_state *
|
|
_PyTokenizer_FromReadline(PyObject* readline, const char* enc,
|
|
int exec_input, int preserve_crlf)
|
|
{
|
|
struct tok_state *tok = _PyTokenizer_tok_new();
|
|
if (tok == NULL)
|
|
return NULL;
|
|
if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
|
|
_PyTokenizer_Free(tok);
|
|
return NULL;
|
|
}
|
|
tok->cur = tok->inp = tok->buf;
|
|
tok->end = tok->buf + BUFSIZ;
|
|
tok->fp = NULL;
|
|
if (enc != NULL) {
|
|
tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
|
|
if (!tok->encoding) {
|
|
_PyTokenizer_Free(tok);
|
|
return NULL;
|
|
}
|
|
}
|
|
tok->decoding_state = STATE_NORMAL;
|
|
tok->underflow = &tok_underflow_readline;
|
|
Py_INCREF(readline);
|
|
tok->readline = readline;
|
|
return tok;
|
|
}
|