Skip to content

Commit

Permalink
Issue python#13505: Make pickling of bytes object compatible with Pyt…
Browse files Browse the repository at this point in the history
…hon 2.

Initial patch by sbt.
  • Loading branch information
avassalotti committed Dec 13, 2011
1 parent 7b7e39a commit 3bfc65a
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 56 deletions.
6 changes: 5 additions & 1 deletion Lib/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,11 @@ def save_float(self, obj, pack=struct.pack):

def save_bytes(self, obj, pack=struct.pack):
if self.proto < 3:
self.save_reduce(bytes, (list(obj),), obj=obj)
if len(obj) == 0:
self.save_reduce(bytes, (), obj=obj)
else:
self.save_reduce(codecs.encode,
(str(obj, 'latin1'), 'latin1'), obj=obj)
return
n = len(obj)
if n < 256:
Expand Down
70 changes: 31 additions & 39 deletions Lib/pickletools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2083,27 +2083,22 @@ def __init__(self, value):
29: ( MARK
30: d DICT (MARK at 29)
31: p PUT 2
34: c GLOBAL '__builtin__ bytes'
53: p PUT 3
56: ( MARK
57: ( MARK
58: l LIST (MARK at 57)
34: c GLOBAL '_codecs encode'
50: p PUT 3
53: ( MARK
54: V UNICODE 'abc'
59: p PUT 4
62: L LONG 97
67: a APPEND
68: L LONG 98
73: a APPEND
74: L LONG 99
79: a APPEND
80: t TUPLE (MARK at 56)
81: p PUT 5
84: R REDUCE
85: p PUT 6
88: V UNICODE 'def'
93: p PUT 7
96: s SETITEM
97: a APPEND
98: . STOP
62: V UNICODE 'latin1'
70: p PUT 5
73: t TUPLE (MARK at 53)
74: p PUT 6
77: R REDUCE
78: p PUT 7
81: V UNICODE 'def'
86: p PUT 8
89: s SETITEM
90: a APPEND
91: . STOP
highest protocol among opcodes = 0
Try again with a "binary" pickle.
Expand All @@ -2122,25 +2117,22 @@ def __init__(self, value):
14: q BINPUT 1
16: } EMPTY_DICT
17: q BINPUT 2
19: c GLOBAL '__builtin__ bytes'
38: q BINPUT 3
40: ( MARK
41: ] EMPTY_LIST
42: q BINPUT 4
44: ( MARK
45: K BININT1 97
47: K BININT1 98
49: K BININT1 99
51: e APPENDS (MARK at 44)
52: t TUPLE (MARK at 40)
53: q BINPUT 5
55: R REDUCE
56: q BINPUT 6
58: X BINUNICODE 'def'
66: q BINPUT 7
68: s SETITEM
69: e APPENDS (MARK at 3)
70: . STOP
19: c GLOBAL '_codecs encode'
35: q BINPUT 3
37: ( MARK
38: X BINUNICODE 'abc'
46: q BINPUT 4
48: X BINUNICODE 'latin1'
59: q BINPUT 5
61: t TUPLE (MARK at 37)
62: q BINPUT 6
64: R REDUCE
65: q BINPUT 7
67: X BINUNICODE 'def'
75: q BINPUT 8
77: s SETITEM
78: e APPENDS (MARK at 3)
79: . STOP
highest protocol among opcodes = 1
Exercise the INST/OBJ/BUILD family.
Expand Down
12 changes: 9 additions & 3 deletions Lib/test/pickletester.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,9 +636,15 @@ def test_unicode_high_plane(self):

def test_bytes(self):
for proto in protocols:
for u in b'', b'xyz', b'xyz'*100:
p = self.dumps(u)
self.assertEqual(self.loads(p), u)
for s in b'', b'xyz', b'xyz'*100:
p = self.dumps(s)
self.assertEqual(self.loads(p), s)
for s in [bytes([i]) for i in range(256)]:
p = self.dumps(s)
self.assertEqual(self.loads(p), s)
for s in [bytes([i, i]) for i in range(256)]:
p = self.dumps(s)
self.assertEqual(self.loads(p), s)

def test_ints(self):
import sys
Expand Down
3 changes: 3 additions & 0 deletions Misc/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ What's New in Python 3.2.3?
Core and Builtins
-----------------

- Issue #13505: Pickle bytes objects in a way that is compatible with
Python 2 when using protocols <= 2.

- Issue #11147: Fix an unused argument in _Py_ANNOTATE_MEMORY_ORDER. (Fix
given by Campbell Barton).

Expand Down
58 changes: 45 additions & 13 deletions Modules/_pickle.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ typedef struct UnpicklerObject {
char *errors; /* Name of errors handling scheme to used when
decoding strings. The default value is
"strict". */
Py_ssize_t *marks; /* Mark stack, used for unpickling container
Py_ssize_t *marks; /* Mark stack, used for unpickling container
objects. */
Py_ssize_t num_marks; /* Number of marks in the mark stack. */
Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Expand Down Expand Up @@ -1700,34 +1700,66 @@ save_bytes(PicklerObject *self, PyObject *obj)
if (self->proto < 3) {
/* Older pickle protocols do not have an opcode for pickling bytes
objects. Therefore, we need to fake the copy protocol (i.e.,
the __reduce__ method) to permit bytes object unpickling. */
the __reduce__ method) to permit bytes object unpickling.
Here we use a hack to be compatible with Python 2. Since in Python
2 'bytes' is just an alias for 'str' (which has different
parameters than the actual bytes object), we use codecs.encode
to create the appropriate 'str' object when unpickled using
Python 2 *and* the appropriate 'bytes' object when unpickled
using Python 3. Again this is a hack and we don't need to do this
with newer protocols. */
static PyObject *codecs_encode = NULL;
PyObject *reduce_value = NULL;
PyObject *bytelist = NULL;
int status;

bytelist = PySequence_List(obj);
if (bytelist == NULL)
return -1;
if (codecs_encode == NULL) {
PyObject *codecs_module = PyImport_ImportModule("codecs");
if (codecs_module == NULL) {
return -1;
}
codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
Py_DECREF(codecs_module);
if (codecs_encode == NULL) {
return -1;
}
}

reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
bytelist);
if (reduce_value == NULL) {
Py_DECREF(bytelist);
return -1;
if (PyBytes_GET_SIZE(obj) == 0) {
reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
}
else {
static PyObject *latin1 = NULL;
PyObject *unicode_str =
PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
PyBytes_GET_SIZE(obj),
"strict");
if (unicode_str == NULL)
return -1;
if (latin1 == NULL) {
latin1 = PyUnicode_InternFromString("latin1");
if (latin1 == NULL)
return -1;
}
reduce_value = Py_BuildValue("(O(OO))",
codecs_encode, unicode_str, latin1);
Py_DECREF(unicode_str);
}

if (reduce_value == NULL)
return -1;

/* save_reduce() will memoize the object automatically. */
status = save_reduce(self, reduce_value, obj);
Py_DECREF(reduce_value);
Py_DECREF(bytelist);
return status;
}
else {
Py_ssize_t size;
char header[5];
Py_ssize_t len;

size = PyBytes_Size(obj);
size = PyBytes_GET_SIZE(obj);
if (size < 0)
return -1;

Expand Down

0 comments on commit 3bfc65a

Please sign in to comment.