From 3d60dfbe1755e00ab20d0ee81281886be77ad5da Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 30 Aug 2024 14:57:33 +0200 Subject: [PATCH] gh-121645: Add PyBytes_Join() function (#121646) * Replace _PyBytes_Join() with PyBytes_Join(). * Keep _PyBytes_Join() as an alias to PyBytes_Join(). --- Doc/c-api/bytes.rst | 18 +++++++++ Doc/whatsnew/3.14.rst | 5 +++ Include/cpython/bytesobject.h | 7 ++-- Lib/test/test_capi/test_bytes.py | 40 +++++++++++++++++++ ...-07-12-13-40-59.gh-issue-121645.96QvD3.rst | 2 + Modules/_io/bufferedio.c | 4 +- Modules/_io/iobase.c | 2 +- Modules/_sre/sre.c | 4 +- Modules/_testcapi/bytes.c | 15 +++++++ Objects/bytesobject.c | 16 ++++++-- 10 files changed, 101 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-07-12-13-40-59.gh-issue-121645.96QvD3.rst diff --git a/Doc/c-api/bytes.rst b/Doc/c-api/bytes.rst index bca78a9c369..3d0501c8253 100644 --- a/Doc/c-api/bytes.rst +++ b/Doc/c-api/bytes.rst @@ -189,6 +189,24 @@ called with a non-bytes parameter. to *newpart* (i.e. decrements its reference count). +.. c:function:: PyObject* PyBytes_Join(PyObject *sep, PyObject *iterable) + + Similar to ``sep.join(iterable)`` in Python. + + *sep* must be Python :class:`bytes` object. + (Note that :c:func:`PyUnicode_Join` accepts ``NULL`` separator and treats + it as a space, whereas :c:func:`PyBytes_Join` doesn't accept ``NULL`` + separator.) + + *iterable* must be an iterable object yielding objects that implement the + :ref:`buffer protocol `. + + On success, return a new :class:`bytes` object. + On error, set an exception and return ``NULL``. + + .. versionadded: 3.14 + + .. c:function:: int _PyBytes_Resize(PyObject **bytes, Py_ssize_t newsize) Resize a bytes object. *newsize* will be the new length of the bytes object. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 44b373ac95d..975af420f9b 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -485,6 +485,11 @@ New Features (Contributed by Victor Stinner in :gh:`120389`.) +* Add :c:func:`PyBytes_Join(sep, iterable) ` function, + similar to ``sep.join(iterable)`` in Python. + (Contributed by Victor Stinner in :gh:`121645`.) + + Porting to Python 3.14 ---------------------- diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 41537210b74..cf3f0387ecf 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -32,6 +32,7 @@ static inline Py_ssize_t PyBytes_GET_SIZE(PyObject *op) { } #define PyBytes_GET_SIZE(self) PyBytes_GET_SIZE(_PyObject_CAST(self)) -/* _PyBytes_Join(sep, x) is like sep.join(x). sep must be PyBytesObject*, - x must be an iterable object. */ -PyAPI_FUNC(PyObject*) _PyBytes_Join(PyObject *sep, PyObject *x); +PyAPI_FUNC(PyObject*) PyBytes_Join(PyObject *sep, PyObject *iterable); + +// Alias kept for backward compatibility +#define _PyBytes_Join PyBytes_Join diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index d5f047bcf18..5908d79e140 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -249,6 +249,46 @@ class CAPITest(unittest.TestCase): # CRASHES resize(NULL, 0, False) # CRASHES resize(NULL, 3, False) + def test_join(self): + """Test PyBytes_Join()""" + bytes_join = _testcapi.bytes_join + + self.assertEqual(bytes_join(b'', []), b'') + self.assertEqual(bytes_join(b'sep', []), b'') + + self.assertEqual(bytes_join(b'', [b'a', b'b', b'c']), b'abc') + self.assertEqual(bytes_join(b'-', [b'a', b'b', b'c']), b'a-b-c') + self.assertEqual(bytes_join(b' - ', [b'a', b'b', b'c']), b'a - b - c') + self.assertEqual(bytes_join(b'-', [bytearray(b'abc'), + memoryview(b'def')]), + b'abc-def') + + self.assertEqual(bytes_join(b'-', iter([b'a', b'b', b'c'])), b'a-b-c') + + # invalid 'sep' argument + with self.assertRaises(TypeError): + bytes_join(bytearray(b'sep'), []) + with self.assertRaises(TypeError): + bytes_join(memoryview(b'sep'), []) + with self.assertRaises(TypeError): + bytes_join('', []) # empty Unicode string + with self.assertRaises(TypeError): + bytes_join('unicode', []) + with self.assertRaises(TypeError): + bytes_join(123, []) + with self.assertRaises(SystemError): + self.assertEqual(bytes_join(NULL, [b'a', b'b', b'c']), b'abc') + + # invalid 'iterable' argument + with self.assertRaises(TypeError): + bytes_join(b'', [b'bytes', 'unicode']) + with self.assertRaises(TypeError): + bytes_join(b'', [b'bytes', 123]) + with self.assertRaises(TypeError): + bytes_join(b'', 123) + with self.assertRaises(SystemError): + bytes_join(b'', NULL) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C_API/2024-07-12-13-40-59.gh-issue-121645.96QvD3.rst b/Misc/NEWS.d/next/C_API/2024-07-12-13-40-59.gh-issue-121645.96QvD3.rst new file mode 100644 index 00000000000..1cca9b2230c --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-07-12-13-40-59.gh-issue-121645.96QvD3.rst @@ -0,0 +1,2 @@ +Add :c:func:`PyBytes_Join(sep, iterable) ` function, similar to +``sep.join(iterable)`` in Python. Patch by Victor Stinner. diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index e45323c93a1..bc5fff54a62 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -1283,7 +1283,7 @@ found: Py_CLEAR(res); goto end; } - Py_XSETREF(res, _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks)); + Py_XSETREF(res, PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks)); end: LEAVE_BUFFERED(self) @@ -1736,7 +1736,7 @@ _bufferedreader_read_all(buffered *self) goto cleanup; } else { - tmp = _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks); + tmp = PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks); res = tmp; goto cleanup; } diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index 184e0b7d1aa..419e5516b5c 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -999,7 +999,7 @@ _io__RawIOBase_readall_impl(PyObject *self) return NULL; } } - result = _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks); + result = PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), chunks); Py_DECREF(chunks); return result; } diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 01420d1a10b..32f91af8dcf 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -1287,7 +1287,7 @@ pattern_subx(_sremodulestate* module_state, } else { if (state.isbytes) - item = _PyBytes_Join(joiner, list); + item = PyBytes_Join(joiner, list); else item = PyUnicode_Join(joiner, list); Py_DECREF(joiner); @@ -2918,7 +2918,7 @@ expand_template(TemplateObject *self, MatchObject *match) } else { Py_SET_SIZE(list, count); - result = _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list); + result = PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list); } cleanup: diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 02294d8887a..33903de14ba 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -37,8 +37,23 @@ bytes_resize(PyObject *Py_UNUSED(module), PyObject *args) } +/* Test PyBytes_Join() */ +static PyObject * +bytes_join(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *sep, *iterable; + if (!PyArg_ParseTuple(args, "OO", &sep, &iterable)) { + return NULL; + } + NULLABLE(sep); + NULLABLE(iterable); + return PyBytes_Join(sep, iterable); +} + + static PyMethodDef test_methods[] = { {"bytes_resize", bytes_resize, METH_VARARGS}, + {"bytes_join", bytes_join, METH_VARARGS}, {NULL}, }; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index e88b199d89f..c467b242b4c 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1867,11 +1867,19 @@ bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes) } PyObject * -_PyBytes_Join(PyObject *sep, PyObject *x) +PyBytes_Join(PyObject *sep, PyObject *iterable) { - assert(sep != NULL && PyBytes_Check(sep)); - assert(x != NULL); - return bytes_join((PyBytesObject*)sep, x); + if (sep == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + if (!PyBytes_Check(sep)) { + PyErr_Format(PyExc_TypeError, + "sep: expected bytes, got %T", sep); + return NULL; + } + + return stringlib_bytes_join(sep, iterable); } /*[clinic input]