Skip to content

Commit

Permalink
Issue python#3163: The struct module gets new format characters 'n' a…
Browse files Browse the repository at this point in the history
…nd 'N'

supporting C integer types `ssize_t` and `size_t`, respectively.
  • Loading branch information
pitrou committed Oct 6, 2011
1 parent 15a66cf commit 45d9c91
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 30 deletions.
21 changes: 17 additions & 4 deletions Doc/library/struct.rst
Original file line number Diff line number Diff line change
Expand Up @@ -187,17 +187,24 @@ platform-dependent.
| ``Q`` | :c:type:`unsigned long | integer | 8 | \(2), \(3) |
| | long` | | | |
+--------+--------------------------+--------------------+----------------+------------+
| ``f`` | :c:type:`float` | float | 4 | \(4) |
| ``n`` | :c:type:`ssize_t` | integer | | \(4) |
+--------+--------------------------+--------------------+----------------+------------+
| ``d`` | :c:type:`double` | float | 8 | \(4) |
| ``N`` | :c:type:`size_t` | integer | | \(4) |
+--------+--------------------------+--------------------+----------------+------------+
| ``f`` | :c:type:`float` | float | 4 | \(5) |
+--------+--------------------------+--------------------+----------------+------------+
| ``d`` | :c:type:`double` | float | 8 | \(5) |
+--------+--------------------------+--------------------+----------------+------------+
| ``s`` | :c:type:`char[]` | bytes | | |
+--------+--------------------------+--------------------+----------------+------------+
| ``p`` | :c:type:`char[]` | bytes | | |
+--------+--------------------------+--------------------+----------------+------------+
| ``P`` | :c:type:`void \*` | integer | | \(5) |
| ``P`` | :c:type:`void \*` | integer | | \(6) |
+--------+--------------------------+--------------------+----------------+------------+

.. versionchanged:: 3.3
Added support for the ``'n'`` and ``'N'`` formats.

Notes:

(1)
Expand All @@ -219,11 +226,17 @@ Notes:
Use of the :meth:`__index__` method for non-integers is new in 3.2.

(4)
The ``'n'`` and ``'N'`` conversion codes are only available for the native
size (selected as the default or with the ``'@'`` byte order character).
For the standard size, you can use whichever of the other integer formats
fits your application.

(5)
For the ``'f'`` and ``'d'`` conversion codes, the packed representation uses
the IEEE 754 binary32 (for ``'f'``) or binary64 (for ``'d'``) format,
regardless of the floating-point format used by the platform.

(5)
(6)
The ``'P'`` format character is only available for the native byte ordering
(selected as the default or with the ``'@'`` byte order character). The byte
order character ``'='`` chooses to use little- or big-endian ordering based
Expand Down
66 changes: 41 additions & 25 deletions Lib/test/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,19 @@
ISBIGENDIAN = sys.byteorder == "big"
IS32BIT = sys.maxsize == 0x7fffffff

integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q'
integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q', 'n', 'N'
byteorders = '', '@', '=', '<', '>', '!'

def iter_integer_formats(byteorders=byteorders):
for code in integer_codes:
for byteorder in byteorders:
if (byteorder in ('', '@') and code in ('q', 'Q') and
not HAVE_LONG_LONG):
continue
if (byteorder not in ('', '@') and code in ('n', 'N')):
continue
yield code, byteorder

# Native 'q' packing isn't available on systems that don't have the C
# long long type.
try:
Expand Down Expand Up @@ -141,14 +151,13 @@ def test_calcsize(self):
}

# standard integer sizes
for code in integer_codes:
for byteorder in '=', '<', '>', '!':
format = byteorder+code
size = struct.calcsize(format)
self.assertEqual(size, expected_size[code])
for code, byteorder in iter_integer_formats(('=', '<', '>', '!')):
format = byteorder+code
size = struct.calcsize(format)
self.assertEqual(size, expected_size[code])

# native integer sizes
native_pairs = 'bB', 'hH', 'iI', 'lL'
native_pairs = 'bB', 'hH', 'iI', 'lL', 'nN'
if HAVE_LONG_LONG:
native_pairs += 'qQ',
for format_pair in native_pairs:
Expand All @@ -166,9 +175,11 @@ def test_calcsize(self):
if HAVE_LONG_LONG:
self.assertLessEqual(8, struct.calcsize('q'))
self.assertLessEqual(struct.calcsize('l'), struct.calcsize('q'))
self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('i'))
self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('P'))

def test_integers(self):
# Integer tests (bBhHiIlLqQ).
# Integer tests (bBhHiIlLqQnN).
import binascii

class IntTester(unittest.TestCase):
Expand All @@ -182,11 +193,11 @@ def __init__(self, format):
self.byteorder)
self.bytesize = struct.calcsize(format)
self.bitsize = self.bytesize * 8
if self.code in tuple('bhilq'):
if self.code in tuple('bhilqn'):
self.signed = True
self.min_value = -(2**(self.bitsize-1))
self.max_value = 2**(self.bitsize-1) - 1
elif self.code in tuple('BHILQ'):
elif self.code in tuple('BHILQN'):
self.signed = False
self.min_value = 0
self.max_value = 2**self.bitsize - 1
Expand Down Expand Up @@ -316,14 +327,23 @@ def __int__(self):
struct.pack, self.format,
obj)

for code in integer_codes:
for byteorder in byteorders:
if (byteorder in ('', '@') and code in ('q', 'Q') and
not HAVE_LONG_LONG):
continue
for code, byteorder in iter_integer_formats():
format = byteorder+code
t = IntTester(format)
t.run()

def test_nN_code(self):
# n and N don't exist in standard sizes
def assertStructError(func, *args, **kwargs):
with self.assertRaises(struct.error) as cm:
func(*args, **kwargs)
self.assertIn("bad char in struct format", str(cm.exception))
for code in 'nN':
for byteorder in ('=', '<', '>', '!'):
format = byteorder+code
t = IntTester(format)
t.run()
assertStructError(struct.calcsize, format)
assertStructError(struct.pack, format, 0)
assertStructError(struct.unpack, format, b"")

def test_p_code(self):
# Test p ("Pascal string") code.
Expand Down Expand Up @@ -377,14 +397,10 @@ def test_705836(self):
self.assertRaises(OverflowError, struct.pack, ">f", big)

def test_1530559(self):
for byteorder in '', '@', '=', '<', '>', '!':
for code in integer_codes:
if (byteorder in ('', '@') and code in ('q', 'Q') and
not HAVE_LONG_LONG):
continue
format = byteorder + code
self.assertRaises(struct.error, struct.pack, format, 1.0)
self.assertRaises(struct.error, struct.pack, format, 1.5)
for code, byteorder in iter_integer_formats():
format = byteorder + code
self.assertRaises(struct.error, struct.pack, format, 1.0)
self.assertRaises(struct.error, struct.pack, format, 1.5)
self.assertRaises(struct.error, struct.pack, 'P', 1.0)
self.assertRaises(struct.error, struct.pack, 'P', 1.5)

Expand Down
3 changes: 3 additions & 0 deletions Misc/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,9 @@ Core and Builtins
Library
-------

- Issue #3163: The struct module gets new format characters 'n' and 'N'
supporting C integer types ``ssize_t`` and ``size_t``, respectively.

- Issue #13099: Fix sqlite3.Cursor.lastrowid under a Turkish locale.
Reported and diagnosed by Thomas Kluyver.

Expand Down
90 changes: 89 additions & 1 deletion Modules/_struct.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,15 @@ typedef struct { char c; long x; } st_long;
typedef struct { char c; float x; } st_float;
typedef struct { char c; double x; } st_double;
typedef struct { char c; void *x; } st_void_p;
typedef struct { char c; size_t x; } st_size_t;

#define SHORT_ALIGN (sizeof(st_short) - sizeof(short))
#define INT_ALIGN (sizeof(st_int) - sizeof(int))
#define LONG_ALIGN (sizeof(st_long) - sizeof(long))
#define FLOAT_ALIGN (sizeof(st_float) - sizeof(float))
#define DOUBLE_ALIGN (sizeof(st_double) - sizeof(double))
#define VOID_P_ALIGN (sizeof(st_void_p) - sizeof(void *))
#define SIZE_T_ALIGN (sizeof(st_size_t) - sizeof(size_t))

/* We can't support q and Q in native mode unless the compiler does;
in std mode, they're 8 bytes on all platforms. */
Expand Down Expand Up @@ -213,6 +215,52 @@ get_ulonglong(PyObject *v, unsigned PY_LONG_LONG *p)

#endif

/* Same, but handling Py_ssize_t */

static int
get_ssize_t(PyObject *v, Py_ssize_t *p)
{
Py_ssize_t x;

v = get_pylong(v);
if (v == NULL)
return -1;
assert(PyLong_Check(v));
x = PyLong_AsSsize_t(v);
Py_DECREF(v);
if (x == (Py_ssize_t)-1 && PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(StructError,
"argument out of range");
return -1;
}
*p = x;
return 0;
}

/* Same, but handling size_t */

static int
get_size_t(PyObject *v, size_t *p)
{
size_t x;

v = get_pylong(v);
if (v == NULL)
return -1;
assert(PyLong_Check(v));
x = PyLong_AsSize_t(v);
Py_DECREF(v);
if (x == (size_t)-1 && PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(StructError,
"argument out of range");
return -1;
}
*p = x;
return 0;
}


#define RANGE_ERROR(x, f, flag, mask) return _range_error(f, flag)

Expand Down Expand Up @@ -369,6 +417,23 @@ nu_ulong(const char *p, const formatdef *f)
return PyLong_FromUnsignedLong(x);
}

static PyObject *
nu_ssize_t(const char *p, const formatdef *f)
{
Py_ssize_t x;
memcpy((char *)&x, p, sizeof x);
return PyLong_FromSsize_t(x);
}

static PyObject *
nu_size_t(const char *p, const formatdef *f)
{
size_t x;
memcpy((char *)&x, p, sizeof x);
return PyLong_FromSize_t(x);
}


/* Native mode doesn't support q or Q unless the platform C supports
long long (or, on Windows, __int64). */

Expand Down Expand Up @@ -558,6 +623,26 @@ np_ulong(char *p, PyObject *v, const formatdef *f)
return 0;
}

static int
np_ssize_t(char *p, PyObject *v, const formatdef *f)
{
Py_ssize_t x;
if (get_ssize_t(v, &x) < 0)
return -1;
memcpy(p, (char *)&x, sizeof x);
return 0;
}

static int
np_size_t(char *p, PyObject *v, const formatdef *f)
{
size_t x;
if (get_size_t(v, &x) < 0)
return -1;
memcpy(p, (char *)&x, sizeof x);
return 0;
}

#ifdef HAVE_LONG_LONG

static int
Expand Down Expand Up @@ -651,6 +736,8 @@ static formatdef native_table[] = {
{'I', sizeof(int), INT_ALIGN, nu_uint, np_uint},
{'l', sizeof(long), LONG_ALIGN, nu_long, np_long},
{'L', sizeof(long), LONG_ALIGN, nu_ulong, np_ulong},
{'n', sizeof(size_t), SIZE_T_ALIGN, nu_ssize_t, np_ssize_t},
{'N', sizeof(size_t), SIZE_T_ALIGN, nu_size_t, np_size_t},
#ifdef HAVE_LONG_LONG
{'q', sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_longlong, np_longlong},
{'Q', sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_ulonglong,np_ulonglong},
Expand Down Expand Up @@ -1951,7 +2038,8 @@ these can be preceded by a decimal repeat count:\n\
l:long; L:unsigned long; f:float; d:double.\n\
Special cases (preceding decimal count indicates length):\n\
s:string (array of char); p: pascal string (with count byte).\n\
Special case (only available in native format):\n\
Special cases (only available in native format):\n\
n:ssize_t; N:size_t;\n\
P:an integer type that is wide enough to hold a pointer.\n\
Special case (not in native mode unless 'long long' in platform C):\n\
q:long long; Q:unsigned long long\n\
Expand Down

0 comments on commit 45d9c91

Please sign in to comment.