cpython/Python/clinic/Python-tokenize.c.h

/*[clinic input]
preserve
[clinic start generated code]*/

#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#  include "pycore_gc.h"          // PyGC_Head
#  include "pycore_runtime.h"     // _Py_ID()
#endif
#include "pycore_modsupport.h"    // _PyArg_UnpackKeywords()

static PyObject *
tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline,
                       int extra_tokens, const char *encoding);

static PyObject *
tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
    PyObject *return_value = NULL;
    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)

    #define NUM_KEYWORDS 2
    static struct {
        PyGC_Head _this_is_not_used;
        PyObject_VAR_HEAD
        PyObject *ob_item[NUM_KEYWORDS];
    } _kwtuple = {
        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
        .ob_item = { &_Py_ID(extra_tokens), &_Py_ID(encoding), },
    };
    #undef NUM_KEYWORDS
    #define KWTUPLE (&_kwtuple.ob_base.ob_base)

    #else  // !Py_BUILD_CORE
    #  define KWTUPLE NULL
    #endif  // !Py_BUILD_CORE

    static const char * const _keywords[] = {"", "extra_tokens", "encoding", NULL};
    static _PyArg_Parser _parser = {
        .keywords = _keywords,
        .fname = "tokenizeriter",
        .kwtuple = KWTUPLE,
    };
    #undef KWTUPLE
    PyObject *argsbuf[3];
    PyObject * const *fastargs;
    Py_ssize_t nargs = PyTuple_GET_SIZE(args);
    Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 2;
    PyObject *readline;
    int extra_tokens;
    const char *encoding = NULL;

    fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 1, argsbuf);
    if (!fastargs) {
        goto exit;
    }
    readline = fastargs[0];
    extra_tokens = PyObject_IsTrue(fastargs[1]);
    if (extra_tokens < 0) {
        goto exit;
    }
    if (!noptargs) {
        goto skip_optional_kwonly;
    }
    if (!PyUnicode_Check(fastargs[2])) {
        _PyArg_BadArgument("tokenizeriter", "argument 'encoding'", "str", fastargs[2]);
        goto exit;
    }
    Py_ssize_t encoding_length;
    encoding = PyUnicode_AsUTF8AndSize(fastargs[2], &encoding_length);
    if (encoding == NULL) {
        goto exit;
    }
    if (strlen(encoding) != (size_t)encoding_length) {
        PyErr_SetString(PyExc_ValueError, "embedded null character");
        goto exit;
    }
skip_optional_kwonly:
    return_value = tokenizeriter_new_impl(type, readline, extra_tokens, encoding);

exit:
    return return_value;
}
/*[clinic end generated code: output=dcd6ec48f06a092e input=a9049054013a1b77]*/
Add tests for the C tokenizer and expose it as a private module (GH-27924) 2021-08-24 18:50:05 +02:00			`/*[clinic input]`
			`preserve`
			`[clinic start generated code]*/`

gh-90928: Statically Initialize the Keywords Tuple in Clinic-Generated Code (gh-95860) We only statically initialize for core code and builtin modules. Extension modules still create the tuple at runtime. We'll solve that part of interpreter isolation separately. This change includes generated code. The non-generated changes are in: * Tools/clinic/clinic.py * Python/getargs.c * Include/cpython/modsupport.h * Makefile.pre.in (re-generate global strings after running clinic) * very minor tweaks to Modules/_codecsmodule.c and Python/Python-tokenize.c All other changes are generated code (clinic, global strings). 2022-08-11 23:25:49 +02:00			`#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)`
gh-107603: Argument Clinic: Only include pycore_gc.h if needed (#108726) Argument Clinic now only includes pycore_gc.h if PyGC_Head is needed, and only includes pycore_runtime.h if _Py_ID() is needed. * Add 'condition' optional argument to Clinic.add_include(). * deprecate_keyword_use() includes pycore_runtime.h when using the _PyID() function. * Fix rendering of includes: comments start at the column 35. * Mark PC/clinic/_wmimodule.cpp.h and "Objects/stringlib/clinic/.h.h" header files as generated in .gitattributes. Effects: 42 header files generated by AC no longer include the internal C API, instead of 4 header files before. For example, Modules/clinic/_abc.c.h no longer includes the internal C API. * Fix _testclinic_depr.c.h: it now always includes pycore_runtime.h to get _Py_ID(). 2023-08-31 23:42:34 +02:00			`# include "pycore_gc.h" // PyGC_Head`
			`# include "pycore_runtime.h" // _Py_ID()`
gh-90928: Statically Initialize the Keywords Tuple in Clinic-Generated Code (gh-95860) We only statically initialize for core code and builtin modules. Extension modules still create the tuple at runtime. We'll solve that part of interpreter isolation separately. This change includes generated code. The non-generated changes are in: * Tools/clinic/clinic.py * Python/getargs.c * Include/cpython/modsupport.h * Makefile.pre.in (re-generate global strings after running clinic) * very minor tweaks to Modules/_codecsmodule.c and Python/Python-tokenize.c All other changes are generated code (clinic, global strings). 2022-08-11 23:25:49 +02:00			`#endif`
gh-110964: Remove private _PyArg functions (#110966) Move the following private functions and structures to pycore_modsupport.h internal C API: * _PyArg_BadArgument() * _PyArg_CheckPositional() * _PyArg_NoKeywords() * _PyArg_NoPositional() * _PyArg_ParseStack() * _PyArg_ParseStackAndKeywords() * _PyArg_Parser structure * _PyArg_UnpackKeywords() * _PyArg_UnpackKeywordsWithVararg() * _PyArg_UnpackStack() * _Py_ANY_VARARGS() Changes: * Python/getargs.h now includes pycore_modsupport.h to export functions. * clinic.py now adds pycore_modsupport.h when one of these functions is used. * Add pycore_modsupport.h includes when a C extension uses one of these functions. * Define Py_BUILD_CORE_MODULE in C extensions which now include directly or indirectly (via code generated by Argument Clinic) pycore_modsupport.h: * _csv * _curses_panel * _dbm * _gdbm * _multiprocessing.posixshmem * _sqlite.row * _statistics * grp * resource * syslog * _testcapi: bad_get() no longer uses METH_FASTCALL calling convention but METH_VARARGS. Replace _PyArg_UnpackStack() with PyArg_ParseTuple(). * _testcapi: add PYTESTCAPI_NEED_INTERNAL_API macro which is defined by _testcapi sub-modules which need the internal C API (pycore_modsupport.h): exceptions.c, float.c, vectorcall.c, watchers.c. * Remove Include/cpython/modsupport.h header file. Include/modsupport.h no longer includes the removed header file. * Fix mypy clinic.py 2023-10-17 14:30:31 +02:00			`#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()`
gh-90928: Statically Initialize the Keywords Tuple in Clinic-Generated Code (gh-95860) We only statically initialize for core code and builtin modules. Extension modules still create the tuple at runtime. We'll solve that part of interpreter isolation separately. This change includes generated code. The non-generated changes are in: * Tools/clinic/clinic.py * Python/getargs.c * Include/cpython/modsupport.h * Makefile.pre.in (re-generate global strings after running clinic) * very minor tweaks to Modules/_codecsmodule.c and Python/Python-tokenize.c All other changes are generated code (clinic, global strings). 2022-08-11 23:25:49 +02:00
Add tests for the C tokenizer and expose it as a private module (GH-27924) 2021-08-24 18:50:05 +02:00			`static PyObject *`
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (#105070) 2023-05-30 23:43:34 +02:00			`tokenizeriter_new_impl(PyTypeObject type, PyObject readline,`
			`int extra_tokens, const char *encoding);`
Add tests for the C tokenizer and expose it as a private module (GH-27924) 2021-08-24 18:50:05 +02:00
			`static PyObject *`
			`tokenizeriter_new(PyTypeObject type, PyObject args, PyObject *kwargs)`
			`{`
			`PyObject *return_value = NULL;`
gh-90928: Improve static initialization of keywords tuple in AC (#95907) 2022-08-13 12:09:40 +02:00			`#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)`
gh-90928: Statically Initialize the Keywords Tuple in Clinic-Generated Code (gh-95860) We only statically initialize for core code and builtin modules. Extension modules still create the tuple at runtime. We'll solve that part of interpreter isolation separately. This change includes generated code. The non-generated changes are in: * Tools/clinic/clinic.py * Python/getargs.c * Include/cpython/modsupport.h * Makefile.pre.in (re-generate global strings after running clinic) * very minor tweaks to Modules/_codecsmodule.c and Python/Python-tokenize.c All other changes are generated code (clinic, global strings). 2022-08-11 23:25:49 +02:00
gh-102856: Python tokenizer implementation for PEP 701 (#104323) This commit replaces the Python implementation of the tokenize module with an implementation that reuses the real C tokenizer via a private extension module. The tokenize module now implements a compatibility layer that transforms tokens from the C tokenizer into Python tokenize tokens for backward compatibility. As the C tokenizer does not emit some tokens that the Python tokenizer provides (such as comments and non-semantic newlines), a new special mode has been added to the C tokenizer mode that currently is only used via the extension module that exposes it to the Python layer. This new mode forces the C tokenizer to emit these new extra tokens and add the appropriate metadata that is needed to match the old Python implementation. Co-authored-by: Pablo Galindo <pablogsal@gmail.com> 2023-05-21 02:03:02 +02:00			`#define NUM_KEYWORDS 2`
gh-90928: Statically Initialize the Keywords Tuple in Clinic-Generated Code (gh-95860) We only statically initialize for core code and builtin modules. Extension modules still create the tuple at runtime. We'll solve that part of interpreter isolation separately. This change includes generated code. The non-generated changes are in: * Tools/clinic/clinic.py * Python/getargs.c * Include/cpython/modsupport.h * Makefile.pre.in (re-generate global strings after running clinic) * very minor tweaks to Modules/_codecsmodule.c and Python/Python-tokenize.c All other changes are generated code (clinic, global strings). 2022-08-11 23:25:49 +02:00			`static struct {`
			`PyGC_Head _this_is_not_used;`
			`PyObject_VAR_HEAD`
			`PyObject *ob_item[NUM_KEYWORDS];`
			`} _kwtuple = {`
			`.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)`
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (#105070) 2023-05-30 23:43:34 +02:00			`.ob_item = { &_Py_ID(extra_tokens), &_Py_ID(encoding), },`
gh-90928: Statically Initialize the Keywords Tuple in Clinic-Generated Code (gh-95860) We only statically initialize for core code and builtin modules. Extension modules still create the tuple at runtime. We'll solve that part of interpreter isolation separately. This change includes generated code. The non-generated changes are in: * Tools/clinic/clinic.py * Python/getargs.c * Include/cpython/modsupport.h * Makefile.pre.in (re-generate global strings after running clinic) * very minor tweaks to Modules/_codecsmodule.c and Python/Python-tokenize.c All other changes are generated code (clinic, global strings). 2022-08-11 23:25:49 +02:00			`};`
			`#undef NUM_KEYWORDS`
gh-90928: Improve static initialization of keywords tuple in AC (#95907) 2022-08-13 12:09:40 +02:00			`#define KWTUPLE (&_kwtuple.ob_base.ob_base)`

			`#else // !Py_BUILD_CORE`
			`# define KWTUPLE NULL`
			`#endif // !Py_BUILD_CORE`
gh-90928: Statically Initialize the Keywords Tuple in Clinic-Generated Code (gh-95860) We only statically initialize for core code and builtin modules. Extension modules still create the tuple at runtime. We'll solve that part of interpreter isolation separately. This change includes generated code. The non-generated changes are in: * Tools/clinic/clinic.py * Python/getargs.c * Include/cpython/modsupport.h * Makefile.pre.in (re-generate global strings after running clinic) * very minor tweaks to Modules/_codecsmodule.c and Python/Python-tokenize.c All other changes are generated code (clinic, global strings). 2022-08-11 23:25:49 +02:00
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (#105070) 2023-05-30 23:43:34 +02:00			`static const char * const _keywords[] = {"", "extra_tokens", "encoding", NULL};`
gh-90928: Statically Initialize the Keywords Tuple in Clinic-Generated Code (gh-95860) We only statically initialize for core code and builtin modules. Extension modules still create the tuple at runtime. We'll solve that part of interpreter isolation separately. This change includes generated code. The non-generated changes are in: * Tools/clinic/clinic.py * Python/getargs.c * Include/cpython/modsupport.h * Makefile.pre.in (re-generate global strings after running clinic) * very minor tweaks to Modules/_codecsmodule.c and Python/Python-tokenize.c All other changes are generated code (clinic, global strings). 2022-08-11 23:25:49 +02:00			`static _PyArg_Parser _parser = {`
			`.keywords = _keywords,`
			`.fname = "tokenizeriter",`
			`.kwtuple = KWTUPLE,`
			`};`
			`#undef KWTUPLE`
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (#105070) 2023-05-30 23:43:34 +02:00			`PyObject *argsbuf[3];`
Add tests for the C tokenizer and expose it as a private module (GH-27924) 2021-08-24 18:50:05 +02:00			`PyObject * const *fastargs;`
			`Py_ssize_t nargs = PyTuple_GET_SIZE(args);`
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (#105070) 2023-05-30 23:43:34 +02:00			`Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 2;`
			`PyObject *readline;`
gh-102856: Python tokenizer implementation for PEP 701 (#104323) This commit replaces the Python implementation of the tokenize module with an implementation that reuses the real C tokenizer via a private extension module. The tokenize module now implements a compatibility layer that transforms tokens from the C tokenizer into Python tokenize tokens for backward compatibility. As the C tokenizer does not emit some tokens that the Python tokenizer provides (such as comments and non-semantic newlines), a new special mode has been added to the C tokenizer mode that currently is only used via the extension module that exposes it to the Python layer. This new mode forces the C tokenizer to emit these new extra tokens and add the appropriate metadata that is needed to match the old Python implementation. Co-authored-by: Pablo Galindo <pablogsal@gmail.com> 2023-05-21 02:03:02 +02:00			`int extra_tokens;`
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (#105070) 2023-05-30 23:43:34 +02:00			`const char *encoding = NULL;`
Add tests for the C tokenizer and expose it as a private module (GH-27924) 2021-08-24 18:50:05 +02:00
gh-102856: Python tokenizer implementation for PEP 701 (#104323) This commit replaces the Python implementation of the tokenize module with an implementation that reuses the real C tokenizer via a private extension module. The tokenize module now implements a compatibility layer that transforms tokens from the C tokenizer into Python tokenize tokens for backward compatibility. As the C tokenizer does not emit some tokens that the Python tokenizer provides (such as comments and non-semantic newlines), a new special mode has been added to the C tokenizer mode that currently is only used via the extension module that exposes it to the Python layer. This new mode forces the C tokenizer to emit these new extra tokens and add the appropriate metadata that is needed to match the old Python implementation. Co-authored-by: Pablo Galindo <pablogsal@gmail.com> 2023-05-21 02:03:02 +02:00			`fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 1, argsbuf);`
Add tests for the C tokenizer and expose it as a private module (GH-27924) 2021-08-24 18:50:05 +02:00			`if (!fastargs) {`
			`goto exit;`
			`}`
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (#105070) 2023-05-30 23:43:34 +02:00			`readline = fastargs[0];`
			`extra_tokens = PyObject_IsTrue(fastargs[1]);`
			`if (extra_tokens < 0) {`
Add tests for the C tokenizer and expose it as a private module (GH-27924) 2021-08-24 18:50:05 +02:00			`goto exit;`
			`}`
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (#105070) 2023-05-30 23:43:34 +02:00			`if (!noptargs) {`
			`goto skip_optional_kwonly;`
			`}`
			`if (!PyUnicode_Check(fastargs[2])) {`
			`_PyArg_BadArgument("tokenizeriter", "argument 'encoding'", "str", fastargs[2]);`
Add tests for the C tokenizer and expose it as a private module (GH-27924) 2021-08-24 18:50:05 +02:00			`goto exit;`
			`}`
gh-111089: Revert PyUnicode_AsUTF8() changes (#111833) * Revert "gh-111089: Use PyUnicode_AsUTF8() in Argument Clinic (#111585)" This reverts commit d9b606b3d04fc56fb0bcc479d7d6c14562edb5e2. * Revert "gh-111089: Use PyUnicode_AsUTF8() in getargs.c (#111620)" This reverts commit cde1071b2a72e8261ca66053ef61431b7f3a81fd. * Revert "gh-111089: PyUnicode_AsUTF8() now raises on embedded NUL (#111091)" This reverts commit d731579bfb9a497cfb0076cb6b221058a20088fe. * Revert "gh-111089: Add PyUnicode_AsUTF8() to the limited C API (#111121)" This reverts commit d8f32be5b6a736dc2fc9dca3f1bf176c82fc9b44. * Revert "gh-111089: Use PyUnicode_AsUTF8() in sqlite3 (#111122)" This reverts commit 37e4e20eaa8f27ada926d49e5971fecf0477ad26. 2023-11-07 23:36:13 +01:00			`Py_ssize_t encoding_length;`
			`encoding = PyUnicode_AsUTF8AndSize(fastargs[2], &encoding_length);`
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (#105070) 2023-05-30 23:43:34 +02:00			`if (encoding == NULL) {`
Add tests for the C tokenizer and expose it as a private module (GH-27924) 2021-08-24 18:50:05 +02:00			`goto exit;`
			`}`
gh-111089: Revert PyUnicode_AsUTF8() changes (#111833) * Revert "gh-111089: Use PyUnicode_AsUTF8() in Argument Clinic (#111585)" This reverts commit d9b606b3d04fc56fb0bcc479d7d6c14562edb5e2. * Revert "gh-111089: Use PyUnicode_AsUTF8() in getargs.c (#111620)" This reverts commit cde1071b2a72e8261ca66053ef61431b7f3a81fd. * Revert "gh-111089: PyUnicode_AsUTF8() now raises on embedded NUL (#111091)" This reverts commit d731579bfb9a497cfb0076cb6b221058a20088fe. * Revert "gh-111089: Add PyUnicode_AsUTF8() to the limited C API (#111121)" This reverts commit d8f32be5b6a736dc2fc9dca3f1bf176c82fc9b44. * Revert "gh-111089: Use PyUnicode_AsUTF8() in sqlite3 (#111122)" This reverts commit 37e4e20eaa8f27ada926d49e5971fecf0477ad26. 2023-11-07 23:36:13 +01:00			`if (strlen(encoding) != (size_t)encoding_length) {`
			`PyErr_SetString(PyExc_ValueError, "embedded null character");`
			`goto exit;`
			`}`
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (#105070) 2023-05-30 23:43:34 +02:00			`skip_optional_kwonly:`
			`return_value = tokenizeriter_new_impl(type, readline, extra_tokens, encoding);`
Add tests for the C tokenizer and expose it as a private module (GH-27924) 2021-08-24 18:50:05 +02:00
			`exit:`
			`return return_value;`
			`}`
gh-111089: Revert PyUnicode_AsUTF8() changes (#111833) * Revert "gh-111089: Use PyUnicode_AsUTF8() in Argument Clinic (#111585)" This reverts commit d9b606b3d04fc56fb0bcc479d7d6c14562edb5e2. * Revert "gh-111089: Use PyUnicode_AsUTF8() in getargs.c (#111620)" This reverts commit cde1071b2a72e8261ca66053ef61431b7f3a81fd. * Revert "gh-111089: PyUnicode_AsUTF8() now raises on embedded NUL (#111091)" This reverts commit d731579bfb9a497cfb0076cb6b221058a20088fe. * Revert "gh-111089: Add PyUnicode_AsUTF8() to the limited C API (#111121)" This reverts commit d8f32be5b6a736dc2fc9dca3f1bf176c82fc9b44. * Revert "gh-111089: Use PyUnicode_AsUTF8() in sqlite3 (#111122)" This reverts commit 37e4e20eaa8f27ada926d49e5971fecf0477ad26. 2023-11-07 23:36:13 +01:00			`/[clinic end generated code: output=dcd6ec48f06a092e input=a9049054013a1b77]/`