mirror of
https://github.com/python/cpython.git
synced 2024-11-30 18:51:15 +01:00
0518edc170
Use stringlib to specialize unicode_repr() for each string kind (UCS1, UCS2, UCS4). Benchmark: +-------------------------------------+---------+----------------------+ | Benchmark | ref | change2 | +=====================================+=========+======================+ | repr('abc') | 100 ns | 103 ns: 1.02x slower | +-------------------------------------+---------+----------------------+ | repr('a' * 100) | 369 ns | 369 ns: 1.00x slower | +-------------------------------------+---------+----------------------+ | repr(('a' + squote) * 100) | 1.21 us | 946 ns: 1.27x faster | +-------------------------------------+---------+----------------------+ | repr(('a' + nl) * 100) | 1.23 us | 907 ns: 1.36x faster | +-------------------------------------+---------+----------------------+ | repr(dquote + ('a' + squote) * 100) | 1.08 us | 858 ns: 1.25x faster | +-------------------------------------+---------+----------------------+ | Geometric mean | (ref) | 1.16x faster | +-------------------------------------+---------+----------------------+
96 lines
3.1 KiB
C
96 lines
3.1 KiB
C
/* stringlib: repr() implementation */
|
|
|
|
#ifndef STRINGLIB_FASTSEARCH_H
|
|
#error must include "stringlib/fastsearch.h" before including this module
|
|
#endif
|
|
|
|
|
|
static void
|
|
STRINGLIB(repr)(PyObject *unicode, Py_UCS4 quote,
|
|
STRINGLIB_CHAR *odata)
|
|
{
|
|
Py_ssize_t isize = PyUnicode_GET_LENGTH(unicode);
|
|
const void *idata = PyUnicode_DATA(unicode);
|
|
int ikind = PyUnicode_KIND(unicode);
|
|
|
|
*odata++ = quote;
|
|
for (Py_ssize_t i = 0; i < isize; i++) {
|
|
Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
|
|
|
|
/* Escape quotes and backslashes */
|
|
if ((ch == quote) || (ch == '\\')) {
|
|
*odata++ = '\\';
|
|
*odata++ = ch;
|
|
continue;
|
|
}
|
|
|
|
/* Map special whitespace to '\t', \n', '\r' */
|
|
if (ch == '\t') {
|
|
*odata++ = '\\';
|
|
*odata++ = 't';
|
|
}
|
|
else if (ch == '\n') {
|
|
*odata++ = '\\';
|
|
*odata++ = 'n';
|
|
}
|
|
else if (ch == '\r') {
|
|
*odata++ = '\\';
|
|
*odata++ = 'r';
|
|
}
|
|
|
|
/* Map non-printable US ASCII to '\xhh' */
|
|
else if (ch < ' ' || ch == 0x7F) {
|
|
*odata++ = '\\';
|
|
*odata++ = 'x';
|
|
*odata++ = Py_hexdigits[(ch >> 4) & 0x000F];
|
|
*odata++ = Py_hexdigits[ch & 0x000F];
|
|
}
|
|
|
|
/* Copy ASCII characters as-is */
|
|
else if (ch < 0x7F) {
|
|
*odata++ = ch;
|
|
}
|
|
|
|
/* Non-ASCII characters */
|
|
else {
|
|
/* Map Unicode whitespace and control characters
|
|
(categories Z* and C* except ASCII space)
|
|
*/
|
|
if (!Py_UNICODE_ISPRINTABLE(ch)) {
|
|
*odata++ = '\\';
|
|
/* Map 8-bit characters to '\xhh' */
|
|
if (ch <= 0xff) {
|
|
*odata++ = 'x';
|
|
*odata++ = Py_hexdigits[(ch >> 4) & 0x000F];
|
|
*odata++ = Py_hexdigits[ch & 0x000F];
|
|
}
|
|
/* Map 16-bit characters to '\uxxxx' */
|
|
else if (ch <= 0xffff) {
|
|
*odata++ = 'u';
|
|
*odata++ = Py_hexdigits[(ch >> 12) & 0xF];
|
|
*odata++ = Py_hexdigits[(ch >> 8) & 0xF];
|
|
*odata++ = Py_hexdigits[(ch >> 4) & 0xF];
|
|
*odata++ = Py_hexdigits[ch & 0xF];
|
|
}
|
|
/* Map 21-bit characters to '\U00xxxxxx' */
|
|
else {
|
|
*odata++ = 'U';
|
|
*odata++ = Py_hexdigits[(ch >> 28) & 0xF];
|
|
*odata++ = Py_hexdigits[(ch >> 24) & 0xF];
|
|
*odata++ = Py_hexdigits[(ch >> 20) & 0xF];
|
|
*odata++ = Py_hexdigits[(ch >> 16) & 0xF];
|
|
*odata++ = Py_hexdigits[(ch >> 12) & 0xF];
|
|
*odata++ = Py_hexdigits[(ch >> 8) & 0xF];
|
|
*odata++ = Py_hexdigits[(ch >> 4) & 0xF];
|
|
*odata++ = Py_hexdigits[ch & 0xF];
|
|
}
|
|
}
|
|
/* Copy characters as-is */
|
|
else {
|
|
*odata++ = ch;
|
|
}
|
|
}
|
|
}
|
|
*odata = quote;
|
|
}
|