Skip to content

Commit

Permalink
pythongh-119182: Add PyUnicodeWriter C API
Browse files Browse the repository at this point in the history
Move the private _PyUnicodeWriter API to the internal C API.
  • Loading branch information
vstinner committed May 22, 2024
1 parent 9b422fc commit fd85a1b
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 114 deletions.
139 changes: 25 additions & 114 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,121 +444,32 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
Py_ssize_t size);


/* --- _PyUnicodeWriter API ----------------------------------------------- */

typedef struct {
PyObject *buffer;
void *data;
int kind;
Py_UCS4 maxchar;
Py_ssize_t size;
Py_ssize_t pos;

/* minimum number of allocated characters (default: 0) */
Py_ssize_t min_length;

/* minimum character (default: 127, ASCII) */
Py_UCS4 min_char;

/* If non-zero, overallocate the buffer (default: 0). */
unsigned char overallocate;

/* If readonly is 1, buffer is a shared string (cannot be modified)
and size is set to 0. */
unsigned char readonly;
} _PyUnicodeWriter ;

// Initialize a Unicode writer.
//
// By default, the minimum buffer size is 0 character and overallocation is
// disabled. Set min_length, min_char and overallocate attributes to control
// the allocation of the buffer.
PyAPI_FUNC(void)
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);

/* Prepare the buffer to write 'length' characters
with the specified maximum character.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
(((MAXCHAR) <= (WRITER)->maxchar \
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
? 0 \
: (((LENGTH) == 0) \
? 0 \
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))

/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);

/* Prepare the buffer to have at least the kind KIND.
For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
support characters in range U+000-U+FFFF.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
((KIND) <= (WRITER)->kind \
? 0 \
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))

/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
macro instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
int kind);

/* Append a Unicode character.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
Py_UCS4 ch
);

/* Append a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
PyObject *str /* Unicode string */
);

/* Append a substring of a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
PyObject *str, /* Unicode string */
/* --- PyUnicodeWriter API ------------------------------------------------ */

typedef struct PyUnicodeWriter PyUnicodeWriter;

PyAPI_FUNC(PyUnicodeWriter*) PyUnicodeWriter_Create(void);
PyAPI_FUNC(void) PyUnicodeWriter_Free(PyUnicodeWriter *writer);
PyAPI_FUNC(PyObject*) PyUnicodeWriter_Finish(PyUnicodeWriter *writer);
PyAPI_FUNC(void) PyUnicodeWriter_SetOverallocate(
PyUnicodeWriter *writer,
int overallocate);

PyAPI_FUNC(int) PyUnicodeWriter_WriteChar(
PyUnicodeWriter *writer,
Py_UCS4 ch);
PyAPI_FUNC(int) PyUnicodeWriter_WriteASCIIString(
PyUnicodeWriter *writer,
const char *ascii,
Py_ssize_t len);
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
PyUnicodeWriter *writer,
PyObject *str);
PyAPI_FUNC(int) PyUnicodeWriter_WriteSubstring(
PyUnicodeWriter *writer,
PyObject *str,
Py_ssize_t start,
Py_ssize_t end
);

/* Append an ASCII-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
const char *str, /* ASCII-encoded byte string */
Py_ssize_t len /* number of bytes, or -1 if unknown */
);

/* Append a latin1-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
const char *str, /* latin1-encoded byte string */
Py_ssize_t len /* length in bytes */
);

/* Get the value of the writer as a Unicode string. Clear the
buffer of the writer. Raise an exception and return NULL
on error. */
PyAPI_FUNC(PyObject *)
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);

/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void)
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);

Py_ssize_t stop);

/* --- Manage the default encoding ---------------------------------------- */

Expand Down
118 changes: 118 additions & 0 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ extern "C" {
#include "pycore_identifier.h" // _Py_Identifier
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI

typedef struct _PyUnicodeWriter _PyUnicodeWriter;

/* --- Characters Type APIs ----------------------------------------------- */

extern int _PyUnicode_IsXidStart(Py_UCS4 ch);
Expand Down Expand Up @@ -319,6 +321,122 @@ extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
PyAPI_FUNC(const char *) _PyUnicode_AsUTF8NoNUL(PyObject *);


/* --- _PyUnicodeWriter API ----------------------------------------------- */

struct _PyUnicodeWriter {
PyObject *buffer;
void *data;
int kind;
Py_UCS4 maxchar;
Py_ssize_t size;
Py_ssize_t pos;

/* minimum number of allocated characters (default: 0) */
Py_ssize_t min_length;

/* minimum character (default: 127, ASCII) */
Py_UCS4 min_char;

/* If non-zero, overallocate the buffer (default: 0). */
unsigned char overallocate;

/* If readonly is 1, buffer is a shared string (cannot be modified)
and size is set to 0. */
unsigned char readonly;
};

// Initialize a Unicode writer.
//
// By default, the minimum buffer size is 0 character and overallocation is
// disabled. Set min_length, min_char and overallocate attributes to control
// the allocation of the buffer.
PyAPI_FUNC(void)
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);

/* Prepare the buffer to write 'length' characters
with the specified maximum character.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
(((MAXCHAR) <= (WRITER)->maxchar \
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
? 0 \
: (((LENGTH) == 0) \
? 0 \
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))

/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);

/* Prepare the buffer to have at least the kind KIND.
For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
support characters in range U+000-U+FFFF.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
((KIND) <= (WRITER)->kind \
? 0 \
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))

/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
macro instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
int kind);

/* Append a Unicode character.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
Py_UCS4 ch
);

/* Append a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
PyObject *str /* Unicode string */
);

/* Append a substring of a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
PyObject *str, /* Unicode string */
Py_ssize_t start,
Py_ssize_t end
);

/* Append an ASCII-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
const char *str, /* ASCII-encoded byte string */
Py_ssize_t len /* number of bytes, or -1 if unknown */
);

/* Append a latin1-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
const char *str, /* latin1-encoded byte string */
Py_ssize_t len /* length in bytes */
);

/* Get the value of the writer as a Unicode string. Clear the
buffer of the writer. Raise an exception and return NULL
on error. */
PyAPI_FUNC(PyObject *)
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);

/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void)
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);


#ifdef __cplusplus
}
#endif
Expand Down
Loading

0 comments on commit fd85a1b

Please sign in to comment.