Issue python#3163: The struct module gets new format characters 'n' a…

…nd 'N' supporting C integer types `ssize_t` and `size_t`, respectively.
floum · Oct 6, 2011 · 45d9c91 · 45d9c91
1 parent 15a66cf
commit 45d9c91
Show file tree

Hide file tree

Showing 4 changed files with 150 additions and 30 deletions.
diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst
@@ -187,17 +187,24 @@ platform-dependent.
 | ``Q``  | :c:type:`unsigned long   | integer            | 8              | \(2), \(3) |
 |        | long`                    |                    |                |            |
 +--------+--------------------------+--------------------+----------------+------------+
-| ``f``  | :c:type:`float`          | float              | 4              | \(4)       |
+| ``n``  | :c:type:`ssize_t`        | integer            |                | \(4)       |
 +--------+--------------------------+--------------------+----------------+------------+
-| ``d``  | :c:type:`double`         | float              | 8              | \(4)       |
+| ``N``  | :c:type:`size_t`         | integer            |                | \(4)       |
++--------+--------------------------+--------------------+----------------+------------+
+| ``f``  | :c:type:`float`          | float              | 4              | \(5)       |
++--------+--------------------------+--------------------+----------------+------------+
+| ``d``  | :c:type:`double`         | float              | 8              | \(5)       |
 +--------+--------------------------+--------------------+----------------+------------+
 | ``s``  | :c:type:`char[]`         | bytes              |                |            |
 +--------+--------------------------+--------------------+----------------+------------+
 | ``p``  | :c:type:`char[]`         | bytes              |                |            |
 +--------+--------------------------+--------------------+----------------+------------+
-| ``P``  | :c:type:`void \*`        | integer            |                | \(5)       |
+| ``P``  | :c:type:`void \*`        | integer            |                | \(6)       |
 +--------+--------------------------+--------------------+----------------+------------+
 
+.. versionchanged:: 3.3
+   Added support for the ``'n'`` and ``'N'`` formats.
+
 Notes:
 
 (1)
@@ -219,11 +226,17 @@ Notes:
       Use of the :meth:`__index__` method for non-integers is new in 3.2.
 
 (4)
+   The ``'n'`` and ``'N'`` conversion codes are only available for the native
+   size (selected as the default or with the ``'@'`` byte order character).
+   For the standard size, you can use whichever of the other integer formats
+   fits your application.
+
+(5)
    For the ``'f'`` and ``'d'`` conversion codes, the packed representation uses
    the IEEE 754 binary32 (for ``'f'``) or binary64 (for ``'d'``) format,
    regardless of the floating-point format used by the platform.
 
-(5)
+(6)
    The ``'P'`` format character is only available for the native byte ordering
    (selected as the default or with the ``'@'`` byte order character). The byte
    order character ``'='`` chooses to use little- or big-endian ordering based

diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py
@@ -8,9 +8,19 @@
 ISBIGENDIAN = sys.byteorder == "big"
 IS32BIT = sys.maxsize == 0x7fffffff
 
-integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q'
+integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q', 'n', 'N'
 byteorders = '', '@', '=', '<', '>', '!'
 
+def iter_integer_formats(byteorders=byteorders):
+    for code in integer_codes:
+        for byteorder in byteorders:
+            if (byteorder in ('', '@') and code in ('q', 'Q') and
+                not HAVE_LONG_LONG):
+                continue
+            if (byteorder not in ('', '@') and code in ('n', 'N')):
+                continue
+            yield code, byteorder
+
 # Native 'q' packing isn't available on systems that don't have the C
 # long long type.
 try:
@@ -141,14 +151,13 @@ def test_calcsize(self):
             }
 
         # standard integer sizes
-        for code in integer_codes:
-            for byteorder in '=', '<', '>', '!':
-                format = byteorder+code
-                size = struct.calcsize(format)
-                self.assertEqual(size, expected_size[code])
+        for code, byteorder in iter_integer_formats(('=', '<', '>', '!')):
+            format = byteorder+code
+            size = struct.calcsize(format)
+            self.assertEqual(size, expected_size[code])
 
         # native integer sizes
-        native_pairs = 'bB', 'hH', 'iI', 'lL'
+        native_pairs = 'bB', 'hH', 'iI', 'lL', 'nN'
         if HAVE_LONG_LONG:
             native_pairs += 'qQ',
         for format_pair in native_pairs:
@@ -166,9 +175,11 @@ def test_calcsize(self):
         if HAVE_LONG_LONG:
             self.assertLessEqual(8, struct.calcsize('q'))
             self.assertLessEqual(struct.calcsize('l'), struct.calcsize('q'))
+        self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('i'))
+        self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('P'))
 
     def test_integers(self):
-        # Integer tests (bBhHiIlLqQ).
+        # Integer tests (bBhHiIlLqQnN).
         import binascii
 
         class IntTester(unittest.TestCase):
@@ -182,11 +193,11 @@ def __init__(self, format):
                                      self.byteorder)
                 self.bytesize = struct.calcsize(format)
                 self.bitsize = self.bytesize * 8
-                if self.code in tuple('bhilq'):
+                if self.code in tuple('bhilqn'):
                     self.signed = True
                     self.min_value = -(2**(self.bitsize-1))
                     self.max_value = 2**(self.bitsize-1) - 1
-                elif self.code in tuple('BHILQ'):
+                elif self.code in tuple('BHILQN'):
                     self.signed = False
                     self.min_value = 0
                     self.max_value = 2**self.bitsize - 1
@@ -316,14 +327,23 @@ def __int__(self):
                                       struct.pack, self.format,
                                       obj)
 
-        for code in integer_codes:
-            for byteorder in byteorders:
-                if (byteorder in ('', '@') and code in ('q', 'Q') and
-                    not HAVE_LONG_LONG):
-                    continue
+        for code, byteorder in iter_integer_formats():
+            format = byteorder+code
+            t = IntTester(format)
+            t.run()
+
+    def test_nN_code(self):
+        # n and N don't exist in standard sizes
+        def assertStructError(func, *args, **kwargs):
+            with self.assertRaises(struct.error) as cm:
+                func(*args, **kwargs)
+            self.assertIn("bad char in struct format", str(cm.exception))
+        for code in 'nN':
+            for byteorder in ('=', '<', '>', '!'):
                 format = byteorder+code
-                t = IntTester(format)
-                t.run()
+                assertStructError(struct.calcsize, format)
+                assertStructError(struct.pack, format, 0)
+                assertStructError(struct.unpack, format, b"")
 
     def test_p_code(self):
         # Test p ("Pascal string") code.
@@ -377,14 +397,10 @@ def test_705836(self):
         self.assertRaises(OverflowError, struct.pack, ">f", big)
 
     def test_1530559(self):
-        for byteorder in '', '@', '=', '<', '>', '!':
-            for code in integer_codes:
-                if (byteorder in ('', '@') and code in ('q', 'Q') and
-                    not HAVE_LONG_LONG):
-                    continue
-                format = byteorder + code
-                self.assertRaises(struct.error, struct.pack, format, 1.0)
-                self.assertRaises(struct.error, struct.pack, format, 1.5)
+        for code, byteorder in iter_integer_formats():
+            format = byteorder + code
+            self.assertRaises(struct.error, struct.pack, format, 1.0)
+            self.assertRaises(struct.error, struct.pack, format, 1.5)
         self.assertRaises(struct.error, struct.pack, 'P', 1.0)
         self.assertRaises(struct.error, struct.pack, 'P', 1.5)
 

diff --git a/Misc/NEWS b/Misc/NEWS
@@ -294,6 +294,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #3163: The struct module gets new format characters 'n' and 'N'
+  supporting C integer types ``ssize_t`` and ``size_t``, respectively.
+
 - Issue #13099: Fix sqlite3.Cursor.lastrowid under a Turkish locale.
   Reported and diagnosed by Thomas Kluyver.
 

diff --git a/Modules/_struct.c b/Modules/_struct.c
@@ -58,13 +58,15 @@ typedef struct { char c; long x; } st_long;
 typedef struct { char c; float x; } st_float;
 typedef struct { char c; double x; } st_double;
 typedef struct { char c; void *x; } st_void_p;
+typedef struct { char c; size_t x; } st_size_t;
 
 #define SHORT_ALIGN (sizeof(st_short) - sizeof(short))
 #define INT_ALIGN (sizeof(st_int) - sizeof(int))
 #define LONG_ALIGN (sizeof(st_long) - sizeof(long))
 #define FLOAT_ALIGN (sizeof(st_float) - sizeof(float))
 #define DOUBLE_ALIGN (sizeof(st_double) - sizeof(double))
 #define VOID_P_ALIGN (sizeof(st_void_p) - sizeof(void *))
+#define SIZE_T_ALIGN (sizeof(st_size_t) - sizeof(size_t))
 
 /* We can't support q and Q in native mode unless the compiler does;
    in std mode, they're 8 bytes on all platforms. */
@@ -213,6 +215,52 @@ get_ulonglong(PyObject *v, unsigned PY_LONG_LONG *p)
 
 #endif
 
+/* Same, but handling Py_ssize_t */
+
+static int
+get_ssize_t(PyObject *v, Py_ssize_t *p)
+{
+    Py_ssize_t x;
+
+    v = get_pylong(v);
+    if (v == NULL)
+        return -1;
+    assert(PyLong_Check(v));
+    x = PyLong_AsSsize_t(v);
+    Py_DECREF(v);
+    if (x == (Py_ssize_t)-1 && PyErr_Occurred()) {
+        if (PyErr_ExceptionMatches(PyExc_OverflowError))
+            PyErr_SetString(StructError,
+                            "argument out of range");
+        return -1;
+    }
+    *p = x;
+    return 0;
+}
+
+/* Same, but handling size_t */
+
+static int
+get_size_t(PyObject *v, size_t *p)
+{
+    size_t x;
+
+    v = get_pylong(v);
+    if (v == NULL)
+        return -1;
+    assert(PyLong_Check(v));
+    x = PyLong_AsSize_t(v);
+    Py_DECREF(v);
+    if (x == (size_t)-1 && PyErr_Occurred()) {
+        if (PyErr_ExceptionMatches(PyExc_OverflowError))
+            PyErr_SetString(StructError,
+                            "argument out of range");
+        return -1;
+    }
+    *p = x;
+    return 0;
+}
+
 
 #define RANGE_ERROR(x, f, flag, mask) return _range_error(f, flag)
 
@@ -369,6 +417,23 @@ nu_ulong(const char *p, const formatdef *f)
     return PyLong_FromUnsignedLong(x);
 }
 
+static PyObject *
+nu_ssize_t(const char *p, const formatdef *f)
+{
+    Py_ssize_t x;
+    memcpy((char *)&x, p, sizeof x);
+    return PyLong_FromSsize_t(x);
+}
+
+static PyObject *
+nu_size_t(const char *p, const formatdef *f)
+{
+    size_t x;
+    memcpy((char *)&x, p, sizeof x);
+    return PyLong_FromSize_t(x);
+}
+
+
 /* Native mode doesn't support q or Q unless the platform C supports
    long long (or, on Windows, __int64). */
 
@@ -558,6 +623,26 @@ np_ulong(char *p, PyObject *v, const formatdef *f)
     return 0;
 }
 
+static int
+np_ssize_t(char *p, PyObject *v, const formatdef *f)
+{
+    Py_ssize_t x;
+    if (get_ssize_t(v, &x) < 0)
+        return -1;
+    memcpy(p, (char *)&x, sizeof x);
+    return 0;
+}
+
+static int
+np_size_t(char *p, PyObject *v, const formatdef *f)
+{
+    size_t x;
+    if (get_size_t(v, &x) < 0)
+        return -1;
+    memcpy(p, (char *)&x, sizeof x);
+    return 0;
+}
+
 #ifdef HAVE_LONG_LONG
 
 static int
@@ -651,6 +736,8 @@ static formatdef native_table[] = {
     {'I',       sizeof(int),    INT_ALIGN,      nu_uint,        np_uint},
     {'l',       sizeof(long),   LONG_ALIGN,     nu_long,        np_long},
     {'L',       sizeof(long),   LONG_ALIGN,     nu_ulong,       np_ulong},
+    {'n',       sizeof(size_t), SIZE_T_ALIGN,   nu_ssize_t,     np_ssize_t},
+    {'N',       sizeof(size_t), SIZE_T_ALIGN,   nu_size_t,      np_size_t},
 #ifdef HAVE_LONG_LONG
     {'q',       sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_longlong, np_longlong},
     {'Q',       sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_ulonglong,np_ulonglong},
@@ -1951,7 +2038,8 @@ these can be preceded by a decimal repeat count:\n\
   l:long; L:unsigned long; f:float; d:double.\n\
 Special cases (preceding decimal count indicates length):\n\
   s:string (array of char); p: pascal string (with count byte).\n\
-Special case (only available in native format):\n\
+Special cases (only available in native format):\n\
+  n:ssize_t; N:size_t;\n\
   P:an integer type that is wide enough to hold a pointer.\n\
 Special case (not in native mode unless 'long long' in platform C):\n\
   q:long long; Q:unsigned long long\n\