diff --git a/Doc/c-api/bytes.rst b/Doc/c-api/bytes.rst index bca78a9c369385a..1240e8e3e4a69ea 100644 --- a/Doc/c-api/bytes.rst +++ b/Doc/c-api/bytes.rst @@ -201,3 +201,52 @@ called with a non-bytes parameter. reallocation fails, the original bytes object at *\*bytes* is deallocated, *\*bytes* is set to ``NULL``, :exc:`MemoryError` is set, and ``-1`` is returned. + +PyBytesWriter +^^^^^^^^^^^^^ + +The :c:type:`PyBytesWriter` API can be used to create a Python :class:`bytes` +object. + +.. versionadded:: 3.14 + +.. c:type:: PyBytesWriter + + A bytes writer instance. + + The instance must be destroyed by :c:func:`PyBytesWriter_Finish` on + success, or :c:func:`PyBytesWriter_Discard` on error. + +.. c:function:: PyBytesWriter* PyBytesWriter_Create(Py_ssize_t size, char **str) + + Create a bytes writer instance. + Preallocate *size* bytes. + + On success, set *\*str* and return a new writer. + On error, set an exception and return ``NULL``. + +.. c:function:: PyObject* PyBytesWriter_Finish(PyBytesWriter *writer, char *str) + + Return the final Python :class:`bytes` object and destroy the writer + instance. + + On success, return a bytes object. + On error, set an exception and return ``NULL``. + +.. c:function:: void PyBytesWriter_Discard(PyBytesWriter *writer) + + Discard the internal bytes buffer and destroy the writer instance. + +.. c:function:: int PyBytesWriter_Prepare(PyBytesWriter *writer, char **str, Py_ssize_t size) + + Allocate *size* bytes to prepare writing *size* bytes into *writer*. + + On success, update *\*str* and return ``0``. + On error, set an exception and return ``-1``. + +.. c:function:: int PyBytesWriter_WriteBytes(PyBytesWriter *writer, char **str, const void *bytes, Py_ssize_t size) + + Write a the bytes string *bytes* of *size* bytes into *writer*. + + On success, update *\*str* and return ``0``. + On error, set an exception and return ``-1``. diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 246cf47df62e783..99180675df08724 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1521,6 +1521,7 @@ object. .. c:function:: PyUnicodeWriter* PyUnicodeWriter_Create(Py_ssize_t length) Create a Unicode writer instance. + Preallocate *length* characters. Set an exception and return ``NULL`` on error. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index da9b45cd8e58b38..c60f8d80b456892 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -357,6 +357,17 @@ New Features (Contributed by Victor Stinner in :gh:`119182`.) +* Add a new :c:type:`PyBytesWriter` API to create a Python :class:`bytes` + object: + + * :c:func:`PyBytesWriter_Create`; + * :c:func:`PyBytesWriter_Finish`; + * :c:func:`PyBytesWriter_Discard`; + * :c:func:`PyBytesWriter_Prepare`; + * :c:func:`PyBytesWriter_WriteBytes`. + + (Contributed by Victor Stinner in :gh:`121710`.) + Porting to Python 3.14 ---------------------- diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index 816823716e9a6f5..c773a05c25fdfef 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -31,3 +31,26 @@ static inline Py_ssize_t PyBytes_GET_SIZE(PyObject *op) { return Py_SIZE(self); } #define PyBytes_GET_SIZE(self) PyBytes_GET_SIZE(_PyObject_CAST(self)) + + +/* --- PyBytesWriter ------------------------------------------------------ */ + +typedef struct PyBytesWriter PyBytesWriter; + +PyAPI_FUNC(PyBytesWriter*) PyBytesWriter_Create( + Py_ssize_t size, + char **str); +PyAPI_FUNC(PyObject *) PyBytesWriter_Finish( + PyBytesWriter *writer, + char *str); +PyAPI_FUNC(void) PyBytesWriter_Discard(PyBytesWriter *writer); + +PyAPI_FUNC(int) PyBytesWriter_Prepare( + PyBytesWriter *writer, + char **str, + Py_ssize_t size); +PyAPI_FUNC(int) PyBytesWriter_WriteBytes( + PyBytesWriter *writer, + char **str, + const void *bytes, + Py_ssize_t size); diff --git a/Misc/NEWS.d/next/C_API/2024-07-13-21-49-12.gh-issue-121710.j-9Vhk.rst b/Misc/NEWS.d/next/C_API/2024-07-13-21-49-12.gh-issue-121710.j-9Vhk.rst new file mode 100644 index 000000000000000..017614d732eec64 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-07-13-21-49-12.gh-issue-121710.j-9Vhk.rst @@ -0,0 +1,10 @@ +Add a new :c:type:`PyBytesWriter` API to create a Python :class:`bytes` +object: + +* :c:func:`PyBytesWriter_Create`; +* :c:func:`PyBytesWriter_Finish`; +* :c:func:`PyBytesWriter_Discard`; +* :c:func:`PyBytesWriter_Prepare`; +* :c:func:`PyBytesWriter_WriteBytes`. + +Patch by Victor Stinner. diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c index 02294d8887abb78..030715b3659ab24 100644 --- a/Modules/_testcapi/bytes.c +++ b/Modules/_testcapi/bytes.c @@ -37,8 +37,140 @@ bytes_resize(PyObject *Py_UNUSED(module), PyObject *args) } +static int +bytes_equal(PyObject *obj, const char *str) +{ + return (PyBytes_Size(obj) == (Py_ssize_t)strlen(str) + && strcmp(PyBytes_AsString(obj), str) == 0); +} + + +/* Test PyBytesWriter API */ +static PyObject * +test_byteswriter(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + char *str; + PyBytesWriter *writer = PyBytesWriter_Create(3, &str); + if (writer == NULL) { + return NULL; + } + + if (PyBytesWriter_WriteBytes(writer, &str, "abc", 3) < 0) { + goto error; + } + + // write empty string + if (PyBytesWriter_WriteBytes(writer, &str, "", 0) < 0) { + goto error; + } + + PyObject *obj = PyBytesWriter_Finish(writer, str); + if (obj == NULL) { + return NULL; + } + + assert(bytes_equal(obj, "abc")); + Py_DECREF(obj); + + Py_RETURN_NONE; + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + + +/* Test PyBytesWriter_Discard() */ +static PyObject * +test_byteswriter_discard(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + char *str; + PyBytesWriter *writer = PyBytesWriter_Create(3, &str); + if (writer == NULL) { + return NULL; + } + assert(PyBytesWriter_WriteBytes(writer, &str, "abc", 3) == 0); + + PyBytesWriter_Discard(writer); + Py_RETURN_NONE; +} + + +/* Test PyBytesWriter_WriteBytes() */ +static PyObject * +test_byteswriter_writebytes(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + char *str; + PyBytesWriter *writer = PyBytesWriter_Create(0, &str); + if (writer == NULL) { + return NULL; + } + + if (PyBytesWriter_WriteBytes(writer, &str, "abc", 3) < 0) { + goto error; + } + if (PyBytesWriter_WriteBytes(writer, &str, "def", 3) < 0) { + goto error; + } + + PyObject *obj = PyBytesWriter_Finish(writer, str); + if (obj == NULL) { + return NULL; + } + + assert(bytes_equal(obj, "abcdef")); + Py_DECREF(obj); + + Py_RETURN_NONE; + +error: + PyBytesWriter_Discard(writer); + return NULL; +} + + +/* Test PyBytesWriter_Prepare() */ +static PyObject * +test_byteswriter_prepare(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + char *str; + PyBytesWriter *writer = PyBytesWriter_Create(0, &str); + if (writer == NULL) { + return NULL; + } + + // test error on purpose (negative size) + assert(PyBytesWriter_Prepare(writer, &str, -3) < 0); + assert(PyErr_ExceptionMatches(PyExc_ValueError)); + PyErr_Clear(); + + if (PyBytesWriter_Prepare(writer, &str, 3) < 0) { + PyBytesWriter_Discard(writer); + return NULL; + } + + // Write "abc" + memcpy(str, "abc", 3); + str += 3; + + PyObject *obj = PyBytesWriter_Finish(writer, str); + if (obj == NULL) { + return NULL; + } + + assert(bytes_equal(obj, "abc")); + Py_DECREF(obj); + + Py_RETURN_NONE; +} + + static PyMethodDef test_methods[] = { {"bytes_resize", bytes_resize, METH_VARARGS}, + {"test_byteswriter", test_byteswriter, METH_NOARGS}, + {"test_byteswriter_discard", test_byteswriter_discard, METH_NOARGS}, + {"test_byteswriter_writebytes", test_byteswriter_writebytes, METH_NOARGS}, + {"test_byteswriter_prepare", test_byteswriter_prepare, METH_NOARGS}, {NULL}, }; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 459df6ceacf3a86..c472805d1c1038a 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3379,12 +3379,45 @@ _PyBytesWriter_Init(_PyBytesWriter *writer) #endif } + +PyBytesWriter* PyBytesWriter_Create(Py_ssize_t size, char **pstr) +{ + _PyBytesWriter *writer = PyMem_Malloc(sizeof(_PyBytesWriter)); + if (writer == NULL) { + PyErr_NoMemory(); + return NULL; + } + _PyBytesWriter_Init(writer); + + char *str = _PyBytesWriter_Alloc(writer, size); + if (str == NULL) { + PyBytesWriter_Discard((PyBytesWriter*)writer); + return NULL; + } + + // Always enable overallocation + writer->overallocate = 1; + + *pstr = str; + return (PyBytesWriter*)writer; +} + + void _PyBytesWriter_Dealloc(_PyBytesWriter *writer) { Py_CLEAR(writer->buffer); } + +void +PyBytesWriter_Discard(PyBytesWriter *writer) +{ + _PyBytesWriter_Dealloc((_PyBytesWriter*)writer); + PyMem_Free(writer); +} + + Py_LOCAL_INLINE(char*) _PyBytesWriter_AsString(_PyBytesWriter *writer) { @@ -3449,6 +3482,27 @@ _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str) } #endif + +static int +PyBytesWriter_CheckPtr(PyBytesWriter *pub_writer, char *str) +{ + if (str == NULL) { + PyErr_SetString(PyExc_ValueError, "str is NULL"); + return -1; + } + + _PyBytesWriter *writer = (_PyBytesWriter*)pub_writer; + const char *start = _PyBytesWriter_AsString(writer); + const char *end = start + writer->allocated; + + if (str < start || end < str) { + PyErr_SetString(PyExc_ValueError, "str is out of bounds"); + return -1; + } + return 0; +} + + void* _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size) { @@ -3546,6 +3600,27 @@ _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size) return str; } + +int +PyBytesWriter_Prepare(PyBytesWriter *writer, char **str, Py_ssize_t size) +{ + if (PyBytesWriter_CheckPtr(writer, *str) < 0) { + return -1; + } + if (size < 0) { + PyErr_SetString(PyExc_ValueError, "size must be positive"); + return -1; + } + + char *str2 = _PyBytesWriter_Prepare((_PyBytesWriter*)writer, *str, size); + if (str2 == NULL) { + return -1; + } + *str = str2; + return 0; +} + + /* Allocate the buffer to write size bytes. Return the pointer to the beginning of buffer data. Raise an exception and return NULL on error. */ @@ -3623,6 +3698,21 @@ _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str) return result; } + +PyObject * +PyBytesWriter_Finish(PyBytesWriter *writer, char *str) +{ + if (PyBytesWriter_CheckPtr(writer, str) < 0) { + PyMem_Free(writer); + return NULL; + } + + PyObject *res = _PyBytesWriter_Finish((_PyBytesWriter*)writer, str); + PyMem_Free(writer); + return res; +} + + void* _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr, const void *bytes, Py_ssize_t size) @@ -3640,6 +3730,25 @@ _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr, } +int +PyBytesWriter_WriteBytes(PyBytesWriter *writer, char **str, + const void *bytes, Py_ssize_t size) +{ + if (PyBytesWriter_CheckPtr(writer, *str) < 0) { + return -1; + } + + char *str2 = _PyBytesWriter_WriteBytes((_PyBytesWriter *)writer, *str, + bytes, size); + if (str2 == NULL) { + return -1; + } + + *str = str2; + return 0; +} + + void _PyBytes_Repeat(char* dest, Py_ssize_t len_dest, const char* src, Py_ssize_t len_src)