0
0
mirror of https://github.com/python/cpython.git synced 2024-11-30 18:51:15 +01:00
cpython/Objects/stringlib/repr.h
Victor Stinner 0518edc170
gh-119396: Optimize unicode_repr() (#119617)
Use stringlib to specialize unicode_repr() for each string kind
(UCS1, UCS2, UCS4).

Benchmark:

+-------------------------------------+---------+----------------------+
| Benchmark                           | ref     | change2              |
+=====================================+=========+======================+
| repr('abc')                         | 100 ns  | 103 ns: 1.02x slower |
+-------------------------------------+---------+----------------------+
| repr('a' * 100)                     | 369 ns  | 369 ns: 1.00x slower |
+-------------------------------------+---------+----------------------+
| repr(('a' + squote) * 100)          | 1.21 us | 946 ns: 1.27x faster |
+-------------------------------------+---------+----------------------+
| repr(('a' + nl) * 100)              | 1.23 us | 907 ns: 1.36x faster |
+-------------------------------------+---------+----------------------+
| repr(dquote + ('a' + squote) * 100) | 1.08 us | 858 ns: 1.25x faster |
+-------------------------------------+---------+----------------------+
| Geometric mean                      | (ref)   | 1.16x faster         |
+-------------------------------------+---------+----------------------+
2024-05-28 18:05:20 +02:00

96 lines
3.1 KiB
C

/* stringlib: repr() implementation */
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
static void
STRINGLIB(repr)(PyObject *unicode, Py_UCS4 quote,
STRINGLIB_CHAR *odata)
{
Py_ssize_t isize = PyUnicode_GET_LENGTH(unicode);
const void *idata = PyUnicode_DATA(unicode);
int ikind = PyUnicode_KIND(unicode);
*odata++ = quote;
for (Py_ssize_t i = 0; i < isize; i++) {
Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
/* Escape quotes and backslashes */
if ((ch == quote) || (ch == '\\')) {
*odata++ = '\\';
*odata++ = ch;
continue;
}
/* Map special whitespace to '\t', \n', '\r' */
if (ch == '\t') {
*odata++ = '\\';
*odata++ = 't';
}
else if (ch == '\n') {
*odata++ = '\\';
*odata++ = 'n';
}
else if (ch == '\r') {
*odata++ = '\\';
*odata++ = 'r';
}
/* Map non-printable US ASCII to '\xhh' */
else if (ch < ' ' || ch == 0x7F) {
*odata++ = '\\';
*odata++ = 'x';
*odata++ = Py_hexdigits[(ch >> 4) & 0x000F];
*odata++ = Py_hexdigits[ch & 0x000F];
}
/* Copy ASCII characters as-is */
else if (ch < 0x7F) {
*odata++ = ch;
}
/* Non-ASCII characters */
else {
/* Map Unicode whitespace and control characters
(categories Z* and C* except ASCII space)
*/
if (!Py_UNICODE_ISPRINTABLE(ch)) {
*odata++ = '\\';
/* Map 8-bit characters to '\xhh' */
if (ch <= 0xff) {
*odata++ = 'x';
*odata++ = Py_hexdigits[(ch >> 4) & 0x000F];
*odata++ = Py_hexdigits[ch & 0x000F];
}
/* Map 16-bit characters to '\uxxxx' */
else if (ch <= 0xffff) {
*odata++ = 'u';
*odata++ = Py_hexdigits[(ch >> 12) & 0xF];
*odata++ = Py_hexdigits[(ch >> 8) & 0xF];
*odata++ = Py_hexdigits[(ch >> 4) & 0xF];
*odata++ = Py_hexdigits[ch & 0xF];
}
/* Map 21-bit characters to '\U00xxxxxx' */
else {
*odata++ = 'U';
*odata++ = Py_hexdigits[(ch >> 28) & 0xF];
*odata++ = Py_hexdigits[(ch >> 24) & 0xF];
*odata++ = Py_hexdigits[(ch >> 20) & 0xF];
*odata++ = Py_hexdigits[(ch >> 16) & 0xF];
*odata++ = Py_hexdigits[(ch >> 12) & 0xF];
*odata++ = Py_hexdigits[(ch >> 8) & 0xF];
*odata++ = Py_hexdigits[(ch >> 4) & 0xF];
*odata++ = Py_hexdigits[ch & 0xF];
}
}
/* Copy characters as-is */
else {
*odata++ = ch;
}
}
}
*odata = quote;
}