1 """Create portable serialized representations of Python objects.
3 See module cPickle for a (much) faster implementation.
4 See module copy_reg for a mechanism for registering custom picklers.
5 See module pickletools source for extensive comments.
15 dumps(object) -> string
17 loads(string) -> object
27 __version__
= "$Revision$" # Code version
30 from copy_reg
import dispatch_table
31 from copy_reg
import _extension_registry
, _inverted_registry
, _extension_cache
37 __all__
= ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
38 "Unpickler", "dump", "dumps", "load", "loads"]
40 # These are purely informational; no code uses these.
41 format_version
= "2.0" # File format version we write
42 compatible_formats
= ["1.0", # Original protocol 0
43 "1.1", # Protocol 0 with INST added
44 "1.2", # Original protocol 1
45 "1.3", # Protocol 1 with BINFLOAT added
47 ] # Old format versions we can read
49 # Keep in synch with cPickle. This is the highest protocol number we
53 # Why use struct.pack() for pickling but marshal.loads() for
54 # unpickling? struct.pack() is 40% faster than marshal.dumps(), but
55 # marshal.loads() is twice as fast as struct.unpack()!
56 mloads
= marshal
.loads
58 class PickleError(Exception):
59 """A common base class for the other pickling exceptions."""
62 class PicklingError(PickleError
):
63 """This exception is raised when an unpicklable object is passed to the
69 class UnpicklingError(PickleError
):
70 """This exception is raised when there is a problem unpickling an object,
71 such as a security violation.
73 Note that other exceptions may also be raised during unpickling, including
74 (but not necessarily limited to) AttributeError, EOFError, ImportError,
80 # An instance of _Stop is raised by Unpickler.load_stop() in response to
81 # the STOP opcode, passing the object that is the result of unpickling.
82 class _Stop(Exception):
83 def __init__(self
, value
):
86 # Jython has PyStringMap; it's a dict subclass with string keys
88 from org
.python
.core
import PyStringMap
92 # UnicodeType may or may not be exported (normally imported from types)
98 # Pickle opcodes. See pickletools.py for extensive docs. The listing
99 # here is in kind-of alphabetical order of 1-character pickle code.
100 # pickletools groups them by purpose.
102 MARK
= '(' # push special markobject on stack
103 STOP
= '.' # every pickle ends with STOP
104 POP
= '0' # discard topmost stack item
105 POP_MARK
= '1' # discard stack top through topmost markobject
106 DUP
= '2' # duplicate top stack item
107 FLOAT
= 'F' # push float object; decimal string argument
108 INT
= 'I' # push integer or bool; decimal string argument
109 BININT
= 'J' # push four-byte signed int
110 BININT1
= 'K' # push 1-byte unsigned int
111 LONG
= 'L' # push long; decimal string argument
112 BININT2
= 'M' # push 2-byte unsigned int
113 NONE
= 'N' # push None
114 PERSID
= 'P' # push persistent object; id is taken from string arg
115 BINPERSID
= 'Q' # " " " ; " " " " stack
116 REDUCE
= 'R' # apply callable to argtuple, both on stack
117 STRING
= 'S' # push string; NL-terminated string argument
118 BINSTRING
= 'T' # push string; counted binary string argument
119 SHORT_BINSTRING
= 'U' # " " ; " " " " < 256 bytes
120 UNICODE
= 'V' # push Unicode string; raw-unicode-escaped'd argument
121 BINUNICODE
= 'X' # " " " ; counted UTF-8 string argument
122 APPEND
= 'a' # append stack top to list below it
123 BUILD
= 'b' # call __setstate__ or __dict__.update()
124 GLOBAL
= 'c' # push self.find_class(modname, name); 2 string args
125 DICT
= 'd' # build a dict from stack items
126 EMPTY_DICT
= '}' # push empty dict
127 APPENDS
= 'e' # extend list on stack by topmost stack slice
128 GET
= 'g' # push item from memo on stack; index is string arg
129 BINGET
= 'h' # " " " " " " ; " " 1-byte arg
130 INST
= 'i' # build & push class instance
131 LONG_BINGET
= 'j' # push item from memo on stack; index is 4-byte arg
132 LIST
= 'l' # build list from topmost stack items
133 EMPTY_LIST
= ']' # push empty list
134 OBJ
= 'o' # build & push class instance
135 PUT
= 'p' # store stack top in memo; index is string arg
136 BINPUT
= 'q' # " " " " " ; " " 1-byte arg
137 LONG_BINPUT
= 'r' # " " " " " ; " " 4-byte arg
138 SETITEM
= 's' # add key+value pair to dict
139 TUPLE
= 't' # build tuple from topmost stack items
140 EMPTY_TUPLE
= ')' # push empty tuple
141 SETITEMS
= 'u' # modify dict by adding topmost key+value pairs
142 BINFLOAT
= 'G' # push float; arg is 8-byte float encoding
144 TRUE
= 'I01\n' # not an opcode; see INT docs in pickletools.py
145 FALSE
= 'I00\n' # not an opcode; see INT docs in pickletools.py
149 PROTO
= '\x80' # identify pickle protocol
150 NEWOBJ
= '\x81' # build object by applying cls.__new__ to argtuple
151 EXT1
= '\x82' # push object from extension registry; 1-byte index
152 EXT2
= '\x83' # ditto, but 2-byte index
153 EXT4
= '\x84' # ditto, but 4-byte index
154 TUPLE1
= '\x85' # build 1-tuple from stack top
155 TUPLE2
= '\x86' # build 2-tuple from two topmost stack items
156 TUPLE3
= '\x87' # build 3-tuple from three topmost stack items
157 NEWTRUE
= '\x88' # push True
158 NEWFALSE
= '\x89' # push False
159 LONG1
= '\x8a' # push long from < 256 bytes
160 LONG4
= '\x8b' # push really big long
162 _tuplesize2code
= [EMPTY_TUPLE
, TUPLE1
, TUPLE2
, TUPLE3
]
165 __all__
.extend([x
for x
in dir() if re
.match("[A-Z][A-Z0-9_]+$",x
)])
173 def __init__(self
, file, protocol
=None):
174 """This takes a file-like object for writing a pickle data stream.
176 The optional protocol argument tells the pickler to use the
177 given protocol; supported protocols are 0, 1, 2. The default
178 protocol is 0, to be backwards compatible. (Protocol 0 is the
179 only protocol that can be written to a file opened in text
180 mode and read back successfully. When using a protocol higher
181 than 0, make sure the file is opened in binary mode, both when
182 pickling and unpickling.)
184 Protocol 1 is more efficient than protocol 0; protocol 2 is
185 more efficient than protocol 1.
187 Specifying a negative protocol version selects the highest
188 protocol version supported. The higher the protocol used, the
189 more recent the version of Python needed to read the pickle
192 The file parameter must have a write() method that accepts a single
193 string argument. It can thus be an open file object, a StringIO
194 object, or any other custom object that meets this interface.
200 protocol
= HIGHEST_PROTOCOL
201 elif not 0 <= protocol
<= HIGHEST_PROTOCOL
:
202 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL
)
203 self
.write
= file.write
205 self
.proto
= int(protocol
)
206 self
.bin
= protocol
>= 1
209 def clear_memo(self
):
210 """Clears the pickler's "memo".
212 The memo is the data structure that remembers which objects the
213 pickler has already seen, so that shared or recursive objects are
214 pickled by reference and not by value. This method is useful when
221 """Write a pickled representation of obj to the open file."""
223 self
.write(PROTO
+ chr(self
.proto
))
227 def memoize(self
, obj
):
228 """Store an object in the memo."""
230 # The Pickler memo is a dictionary mapping object ids to 2-tuples
231 # that contain the Unpickler memo key and the object being memoized.
232 # The memo key is written to the pickle and will become
233 # the key in the Unpickler's memo. The object is stored in the
234 # Pickler memo so that transient objects are kept alive during
237 # The use of the Unpickler memo length as the memo key is just a
238 # convention. The only requirement is that the memo values be unique.
239 # But there appears no advantage to any other scheme, and this
240 # scheme allows the Unpickler memo to be implemented as a plain (but
241 # growable) array, indexed by memo key.
244 assert id(obj
) not in self
.memo
245 memo_len
= len(self
.memo
)
246 self
.write(self
.put(memo_len
))
247 self
.memo
[id(obj
)] = memo_len
, obj
249 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
250 def put(self
, i
, pack
=struct
.pack
):
253 return BINPUT
+ chr(i
)
255 return LONG_BINPUT
+ pack("<i", i
)
257 return PUT
+ repr(i
) + '\n'
259 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
260 def get(self
, i
, pack
=struct
.pack
):
263 return BINGET
+ chr(i
)
265 return LONG_BINGET
+ pack("<i", i
)
267 return GET
+ repr(i
) + '\n'
270 # Check for persistent id (defined by a subclass)
271 pid
= self
.persistent_id(obj
)
277 x
= self
.memo
.get(id(obj
))
279 self
.write(self
.get(x
[0]))
282 # Check the type dispatch table
284 f
= self
.dispatch
.get(t
)
286 f(self
, obj
) # Call unbound method with explicit self
289 # Check for a class with a custom metaclass; treat as regular class
291 issc
= issubclass(t
, TypeType
)
292 except TypeError: # t is not a class (old Boost; see SF #502085)
295 self
.save_global(obj
)
298 # Check copy_reg.dispatch_table
299 reduce = dispatch_table
.get(t
)
303 # Check for a __reduce_ex__ method, fall back to __reduce__
304 reduce = getattr(obj
, "__reduce_ex__", None)
306 rv
= reduce(self
.proto
)
308 reduce = getattr(obj
, "__reduce__", None)
312 raise PicklingError("Can't pickle %r object: %r" %
315 # Check for string returned by reduce(), meaning "save as global"
316 if type(rv
) is StringType
:
317 self
.save_global(obj
, rv
)
320 # Assert that reduce() returned a tuple
321 if type(rv
) is not TupleType
:
322 raise PicklingError("%s must return string or tuple" % reduce)
324 # Assert that it returned an appropriately sized tuple
326 if not (2 <= l
<= 5):
327 raise PicklingError("Tuple returned by %s must have "
328 "two to five elements" % reduce)
330 # Save the reduce() output and finally memoize the object
331 self
.save_reduce(obj
=obj
, *rv
)
333 def persistent_id(self
, obj
):
334 # This exists so a subclass can override it
337 def save_pers(self
, pid
):
338 # Save a persistent id reference
341 self
.write(BINPERSID
)
343 self
.write(PERSID
+ str(pid
) + '\n')
345 def save_reduce(self
, func
, args
, state
=None,
346 listitems
=None, dictitems
=None, obj
=None):
347 # This API is called by some subclasses
349 # Assert that args is a tuple or None
350 if not isinstance(args
, TupleType
):
351 raise PicklingError("args from reduce() should be a tuple")
353 # Assert that func is callable
354 if not callable(func
):
355 raise PicklingError("func from reduce should be callable")
360 # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
361 if self
.proto
>= 2 and getattr(func
, "__name__", "") == "__newobj__":
362 # A __reduce__ implementation can direct protocol 2 to
363 # use the more efficient NEWOBJ opcode, while still
364 # allowing protocol 0 and 1 to work normally. For this to
365 # work, the function returned by __reduce__ should be
366 # called __newobj__, and its first argument should be a
367 # new-style class. The implementation for __newobj__
368 # should be as follows, although pickle has no way to
371 # def __newobj__(cls, *args):
372 # return cls.__new__(cls, *args)
374 # Protocols 0 and 1 will pickle a reference to __newobj__,
375 # while protocol 2 (and above) will pickle a reference to
376 # cls, the remaining args tuple, and the NEWOBJ code,
377 # which calls cls.__new__(cls, *args) at unpickling time
378 # (see load_newobj below). If __reduce__ returns a
379 # three-tuple, the state from the third tuple item will be
380 # pickled regardless of the protocol, calling __setstate__
381 # at unpickling time (see load_build below).
383 # Note that no standard __newobj__ implementation exists;
384 # you have to provide your own. This is to enforce
385 # compatibility with Python 2.2 (pickles written using
386 # protocol 0 or 1 in Python 2.3 should be unpicklable by
389 if not hasattr(cls
, "__new__"):
391 "args[0] from __newobj__ args has no __new__")
392 if obj
is not None and cls
is not obj
.__class
__:
394 "args[0] from __newobj__ args has the wrong class")
407 # More new special cases (that work with older protocols as
408 # well): when __reduce__ returns a tuple with 4 or 5 items,
409 # the 4th and 5th item should be iterators that provide list
410 # items and dict items (as (key, value) tuples), or None.
412 if listitems
is not None:
413 self
._batch
_appends
(listitems
)
415 if dictitems
is not None:
416 self
._batch
_setitems
(dictitems
)
418 if state
is not None:
422 # Methods below this point are dispatched through the dispatch table
426 def save_none(self
, obj
):
428 dispatch
[NoneType
] = save_none
430 def save_bool(self
, obj
):
432 self
.write(obj
and NEWTRUE
or NEWFALSE
)
434 self
.write(obj
and TRUE
or FALSE
)
435 dispatch
[bool] = save_bool
437 def save_int(self
, obj
, pack
=struct
.pack
):
439 # If the int is small enough to fit in a signed 4-byte 2's-comp
440 # format, we can store it more efficiently than the general
442 # First one- and two-byte unsigned ints:
445 self
.write(BININT1
+ chr(obj
))
448 self
.write("%c%c%c" % (BININT2
, obj
&0xff, obj
>>8))
450 # Next check for 4-byte signed ints:
451 high_bits
= obj
>> 31 # note that Python shift sign-extends
452 if high_bits
== 0 or high_bits
== -1:
453 # All high bits are copies of bit 2**31, so the value
454 # fits in a 4-byte signed int.
455 self
.write(BININT
+ pack("<i", obj
))
457 # Text pickle, or int too big to fit in signed 4-byte format.
458 self
.write(INT
+ repr(obj
) + '\n')
459 dispatch
[IntType
] = save_int
461 def save_long(self
, obj
, pack
=struct
.pack
):
463 bytes
= encode_long(obj
)
466 self
.write(LONG1
+ chr(n
) + bytes
)
468 self
.write(LONG4
+ pack("<i", n
) + bytes
)
470 self
.write(LONG
+ repr(obj
) + '\n')
471 dispatch
[LongType
] = save_long
473 def save_float(self
, obj
, pack
=struct
.pack
):
475 self
.write(BINFLOAT
+ pack('>d', obj
))
477 self
.write(FLOAT
+ repr(obj
) + '\n')
478 dispatch
[FloatType
] = save_float
480 def save_string(self
, obj
, pack
=struct
.pack
):
484 self
.write(SHORT_BINSTRING
+ chr(n
) + obj
)
486 self
.write(BINSTRING
+ pack("<i", n
) + obj
)
488 self
.write(STRING
+ repr(obj
) + '\n')
490 dispatch
[StringType
] = save_string
492 def save_unicode(self
, obj
, pack
=struct
.pack
):
494 encoding
= obj
.encode('utf-8')
496 self
.write(BINUNICODE
+ pack("<i", n
) + encoding
)
498 obj
= obj
.replace("\\", "\\u005c")
499 obj
= obj
.replace("\n", "\\u000a")
500 self
.write(UNICODE
+ obj
.encode('raw-unicode-escape') + '\n')
502 dispatch
[UnicodeType
] = save_unicode
504 if StringType
== UnicodeType
:
505 # This is true for Jython
506 def save_string(self
, obj
, pack
=struct
.pack
):
507 unicode = obj
.isunicode()
511 obj
= obj
.encode("utf-8")
513 if l
< 256 and not unicode:
514 self
.write(SHORT_BINSTRING
+ chr(l
) + obj
)
518 self
.write(BINUNICODE
+ s
+ obj
)
520 self
.write(BINSTRING
+ s
+ obj
)
523 obj
= obj
.replace("\\", "\\u005c")
524 obj
= obj
.replace("\n", "\\u000a")
525 obj
= obj
.encode('raw-unicode-escape')
526 self
.write(UNICODE
+ obj
+ '\n')
528 self
.write(STRING
+ repr(obj
) + '\n')
530 dispatch
[StringType
] = save_string
532 def save_tuple(self
, obj
):
546 if n
<= 3 and proto
>= 2:
549 # Subtle. Same as in the big comment below.
551 get
= self
.get(memo
[id(obj
)][0])
554 write(_tuplesize2code
[n
])
558 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
559 # has more than 3 elements.
565 # Subtle. d was not in memo when we entered save_tuple(), so
566 # the process of saving the tuple's elements must have saved
567 # the tuple itself: the tuple is recursive. The proper action
568 # now is to throw away everything we put on the stack, and
569 # simply GET the tuple (it's already constructed). This check
570 # could have been done in the "for element" loop instead, but
571 # recursive tuples are a rare thing.
572 get
= self
.get(memo
[id(obj
)][0])
574 write(POP_MARK
+ get
)
575 else: # proto 0 -- POP_MARK not available
576 write(POP
* (n
+1) + get
)
583 dispatch
[TupleType
] = save_tuple
585 # save_empty_tuple() isn't used by anything in Python 2.3. However, I
586 # found a Pickler subclass in Zope3 that calls it, so it's not harmless
588 def save_empty_tuple(self
, obj
):
589 self
.write(EMPTY_TUPLE
)
591 def save_list(self
, obj
):
596 else: # proto 0 -- can't use EMPTY_LIST
600 self
._batch
_appends
(iter(obj
))
602 dispatch
[ListType
] = save_list
604 # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
605 # out of synch, though.
608 def _batch_appends(self
, items
):
609 # Helper to batch up APPENDS sequences
619 r
= xrange(self
._BATCHSIZE
)
620 while items
is not None:
626 except StopIteration:
638 # else tmp is empty, and we're done
640 def save_dict(self
, obj
):
645 else: # proto 0 -- can't use EMPTY_DICT
649 self
._batch
_setitems
(obj
.iteritems())
651 dispatch
[DictionaryType
] = save_dict
652 if not PyStringMap
is None:
653 dispatch
[PyStringMap
] = save_dict
655 def _batch_setitems(self
, items
):
656 # Helper to batch up SETITEMS sequences; proto >= 1 only
667 r
= xrange(self
._BATCHSIZE
)
668 while items
is not None:
672 tmp
.append(items
.next())
673 except StopIteration:
688 # else tmp is empty, and we're done
690 def save_inst(self
, obj
):
697 if hasattr(obj
, '__getinitargs__'):
698 args
= obj
.__getinitargs
__()
699 len(args
) # XXX Assert it's a sequence
700 _keep_alive(args
, memo
)
714 write(INST
+ cls
.__module
__ + '\n' + cls
.__name
__ + '\n')
719 getstate
= obj
.__getstate
__
720 except AttributeError:
724 _keep_alive(stuff
, memo
)
728 dispatch
[InstanceType
] = save_inst
730 def save_global(self
, obj
, name
=None, pack
=struct
.pack
):
737 module
= getattr(obj
, "__module__", None)
739 module
= whichmodule(obj
, name
)
743 mod
= sys
.modules
[module
]
744 klass
= getattr(mod
, name
)
745 except (ImportError, KeyError, AttributeError):
747 "Can't pickle %r: it's not found as %s.%s" %
752 "Can't pickle %r: it's not the same object as %s.%s" %
756 code
= _extension_registry
.get((module
, name
))
760 write(EXT1
+ chr(code
))
762 write("%c%c%c" % (EXT2
, code
&0xff, code
>>8))
764 write(EXT4
+ pack("<i", code
))
767 write(GLOBAL
+ module
+ '\n' + name
+ '\n')
770 dispatch
[ClassType
] = save_global
771 dispatch
[FunctionType
] = save_global
772 dispatch
[BuiltinFunctionType
] = save_global
773 dispatch
[TypeType
] = save_global
777 def _keep_alive(x
, memo
):
778 """Keeps a reference to the object x in the memo.
780 Because we remember objects by their id, we have
781 to assure that possibly temporary objects are kept
782 alive by referencing them.
783 We store a reference at the id of the memo, which should
784 normally not be used unless someone tries to deepcopy
788 memo
[id(memo
)].append(x
)
790 # aha, this is the first one :-)
794 # A cache for whichmodule(), mapping a function object to the name of
795 # the module in which the function was found.
797 classmap
= {} # called classmap for backwards compatibility
799 def whichmodule(func
, funcname
):
800 """Figure out the module in which a function occurs.
802 Search sys.modules for the module.
804 Return a module name.
805 If the function cannot be found, return "__main__".
807 # Python functions should always get an __module__ from their globals.
808 mod
= getattr(func
, "__module__", None)
812 return classmap
[func
]
814 for name
, module
in sys
.modules
.items():
816 continue # skip dummy package entries
817 if name
!= '__main__' and getattr(module
, funcname
, None) is func
:
821 classmap
[func
] = name
825 # Unpickling machinery
829 def __init__(self
, file):
830 """This takes a file-like object for reading a pickle data stream.
832 The protocol version of the pickle is detected automatically, so no
833 proto argument is needed.
835 The file-like object must have two methods, a read() method that
836 takes an integer argument, and a readline() method that requires no
837 arguments. Both methods should return a string. Thus file-like
838 object can be a file object opened for reading, a StringIO object,
839 or any other custom object that meets this interface.
841 self
.readline
= file.readline
842 self
.read
= file.read
846 """Read a pickled object representation from the open file.
848 Return the reconstituted object hierarchy specified in the file.
850 self
.mark
= object() # any new unique object
852 self
.append
= self
.stack
.append
854 dispatch
= self
.dispatch
859 except _Stop
, stopinst
:
860 return stopinst
.value
862 # Return largest index k such that self.stack[k] is self.mark.
863 # If the stack doesn't contain a mark, eventually raises IndexError.
864 # This could be sped by maintaining another stack, of indices at which
865 # the mark appears. For that matter, the latter stack would suffice,
866 # and we wouldn't need to push mark objects on self.stack at all.
867 # Doing so is probably a good thing, though, since if the pickle is
868 # corrupt (or hostile) we may get a clue from finding self.mark embedded
869 # in unpickled objects.
874 while stack
[k
] is not mark
: k
= k
-1
881 dispatch
[''] = load_eof
883 def load_proto(self
):
884 proto
= ord(self
.read(1))
885 if not 0 <= proto
<= 2:
886 raise ValueError, "unsupported pickle protocol: %d" % proto
887 dispatch
[PROTO
] = load_proto
889 def load_persid(self
):
890 pid
= self
.readline()[:-1]
891 self
.append(self
.persistent_load(pid
))
892 dispatch
[PERSID
] = load_persid
894 def load_binpersid(self
):
895 pid
= self
.stack
.pop()
896 self
.append(self
.persistent_load(pid
))
897 dispatch
[BINPERSID
] = load_binpersid
901 dispatch
[NONE
] = load_none
903 def load_false(self
):
905 dispatch
[NEWFALSE
] = load_false
909 dispatch
[NEWTRUE
] = load_true
912 data
= self
.readline()
913 if data
== FALSE
[1:]:
915 elif data
== TRUE
[1:]:
923 dispatch
[INT
] = load_int
925 def load_binint(self
):
926 self
.append(mloads('i' + self
.read(4)))
927 dispatch
[BININT
] = load_binint
929 def load_binint1(self
):
930 self
.append(ord(self
.read(1)))
931 dispatch
[BININT1
] = load_binint1
933 def load_binint2(self
):
934 self
.append(mloads('i' + self
.read(2) + '\000\000'))
935 dispatch
[BININT2
] = load_binint2
938 self
.append(long(self
.readline()[:-1], 0))
939 dispatch
[LONG
] = load_long
941 def load_long1(self
):
942 n
= ord(self
.read(1))
944 self
.append(decode_long(bytes
))
945 dispatch
[LONG1
] = load_long1
947 def load_long4(self
):
948 n
= mloads('i' + self
.read(4))
950 self
.append(decode_long(bytes
))
951 dispatch
[LONG4
] = load_long4
953 def load_float(self
):
954 self
.append(float(self
.readline()[:-1]))
955 dispatch
[FLOAT
] = load_float
957 def load_binfloat(self
, unpack
=struct
.unpack
):
958 self
.append(unpack('>d', self
.read(8))[0])
959 dispatch
[BINFLOAT
] = load_binfloat
961 def load_string(self
):
962 rep
= self
.readline()[:-1]
963 for q
in "\"'": # double or single quote
964 if rep
.startswith(q
):
965 if not rep
.endswith(q
):
966 raise ValueError, "insecure string pickle"
967 rep
= rep
[len(q
):-len(q
)]
970 raise ValueError, "insecure string pickle"
971 self
.append(rep
.decode("string-escape"))
972 dispatch
[STRING
] = load_string
974 def load_binstring(self
):
975 len = mloads('i' + self
.read(4))
976 self
.append(self
.read(len))
977 dispatch
[BINSTRING
] = load_binstring
979 def load_unicode(self
):
980 self
.append(unicode(self
.readline()[:-1],'raw-unicode-escape'))
981 dispatch
[UNICODE
] = load_unicode
983 def load_binunicode(self
):
984 len = mloads('i' + self
.read(4))
985 self
.append(unicode(self
.read(len),'utf-8'))
986 dispatch
[BINUNICODE
] = load_binunicode
988 def load_short_binstring(self
):
989 len = ord(self
.read(1))
990 self
.append(self
.read(len))
991 dispatch
[SHORT_BINSTRING
] = load_short_binstring
993 def load_tuple(self
):
995 self
.stack
[k
:] = [tuple(self
.stack
[k
+1:])]
996 dispatch
[TUPLE
] = load_tuple
998 def load_empty_tuple(self
):
999 self
.stack
.append(())
1000 dispatch
[EMPTY_TUPLE
] = load_empty_tuple
1002 def load_tuple1(self
):
1003 self
.stack
[-1] = (self
.stack
[-1],)
1004 dispatch
[TUPLE1
] = load_tuple1
1006 def load_tuple2(self
):
1007 self
.stack
[-2:] = [(self
.stack
[-2], self
.stack
[-1])]
1008 dispatch
[TUPLE2
] = load_tuple2
1010 def load_tuple3(self
):
1011 self
.stack
[-3:] = [(self
.stack
[-3], self
.stack
[-2], self
.stack
[-1])]
1012 dispatch
[TUPLE3
] = load_tuple3
1014 def load_empty_list(self
):
1015 self
.stack
.append([])
1016 dispatch
[EMPTY_LIST
] = load_empty_list
1018 def load_empty_dictionary(self
):
1019 self
.stack
.append({})
1020 dispatch
[EMPTY_DICT
] = load_empty_dictionary
1022 def load_list(self
):
1024 self
.stack
[k
:] = [self
.stack
[k
+1:]]
1025 dispatch
[LIST
] = load_list
1027 def load_dict(self
):
1030 items
= self
.stack
[k
+1:]
1031 for i
in range(0, len(items
), 2):
1035 self
.stack
[k
:] = [d
]
1036 dispatch
[DICT
] = load_dict
1038 # INST and OBJ differ only in how they get a class object. It's not
1039 # only sensible to do the rest in a common routine, the two routines
1040 # previously diverged and grew different bugs.
1041 # klass is the class to instantiate, and k points to the topmost mark
1042 # object, following which are the arguments for klass.__init__.
1043 def _instantiate(self
, klass
, k
):
1044 args
= tuple(self
.stack
[k
+1:])
1048 type(klass
) is ClassType
and
1049 not hasattr(klass
, "__getinitargs__")):
1051 value
= _EmptyClass()
1052 value
.__class
__ = klass
1054 except RuntimeError:
1055 # In restricted execution, assignment to inst.__class__ is
1058 if not instantiated
:
1060 value
= klass(*args
)
1061 except TypeError, err
:
1062 raise TypeError, "in constructor for %s: %s" % (
1063 klass
.__name
__, str(err
)), sys
.exc_info()[2]
1066 def load_inst(self
):
1067 module
= self
.readline()[:-1]
1068 name
= self
.readline()[:-1]
1069 klass
= self
.find_class(module
, name
)
1070 self
._instantiate
(klass
, self
.marker())
1071 dispatch
[INST
] = load_inst
1074 # Stack is ... markobject classobject arg1 arg2 ...
1076 klass
= self
.stack
.pop(k
+1)
1077 self
._instantiate
(klass
, k
)
1078 dispatch
[OBJ
] = load_obj
1080 def load_newobj(self
):
1081 args
= self
.stack
.pop()
1082 cls
= self
.stack
[-1]
1083 obj
= cls
.__new
__(cls
, *args
)
1084 self
.stack
[-1] = obj
1085 dispatch
[NEWOBJ
] = load_newobj
1087 def load_global(self
):
1088 module
= self
.readline()[:-1]
1089 name
= self
.readline()[:-1]
1090 klass
= self
.find_class(module
, name
)
1092 dispatch
[GLOBAL
] = load_global
1094 def load_ext1(self
):
1095 code
= ord(self
.read(1))
1096 self
.get_extension(code
)
1097 dispatch
[EXT1
] = load_ext1
1099 def load_ext2(self
):
1100 code
= mloads('i' + self
.read(2) + '\000\000')
1101 self
.get_extension(code
)
1102 dispatch
[EXT2
] = load_ext2
1104 def load_ext4(self
):
1105 code
= mloads('i' + self
.read(4))
1106 self
.get_extension(code
)
1107 dispatch
[EXT4
] = load_ext4
1109 def get_extension(self
, code
):
1111 obj
= _extension_cache
.get(code
, nil
)
1115 key
= _inverted_registry
.get(code
)
1117 raise ValueError("unregistered extension code %d" % code
)
1118 obj
= self
.find_class(*key
)
1119 _extension_cache
[code
] = obj
1122 def find_class(self
, module
, name
):
1123 # Subclasses may override this
1125 mod
= sys
.modules
[module
]
1126 klass
= getattr(mod
, name
)
1129 def load_reduce(self
):
1135 dispatch
[REDUCE
] = load_reduce
1139 dispatch
[POP
] = load_pop
1141 def load_pop_mark(self
):
1144 dispatch
[POP_MARK
] = load_pop_mark
1147 self
.append(self
.stack
[-1])
1148 dispatch
[DUP
] = load_dup
1151 self
.append(self
.memo
[self
.readline()[:-1]])
1152 dispatch
[GET
] = load_get
1154 def load_binget(self
):
1155 i
= ord(self
.read(1))
1156 self
.append(self
.memo
[repr(i
)])
1157 dispatch
[BINGET
] = load_binget
1159 def load_long_binget(self
):
1160 i
= mloads('i' + self
.read(4))
1161 self
.append(self
.memo
[repr(i
)])
1162 dispatch
[LONG_BINGET
] = load_long_binget
1165 self
.memo
[self
.readline()[:-1]] = self
.stack
[-1]
1166 dispatch
[PUT
] = load_put
1168 def load_binput(self
):
1169 i
= ord(self
.read(1))
1170 self
.memo
[repr(i
)] = self
.stack
[-1]
1171 dispatch
[BINPUT
] = load_binput
1173 def load_long_binput(self
):
1174 i
= mloads('i' + self
.read(4))
1175 self
.memo
[repr(i
)] = self
.stack
[-1]
1176 dispatch
[LONG_BINPUT
] = load_long_binput
1178 def load_append(self
):
1183 dispatch
[APPEND
] = load_append
1185 def load_appends(self
):
1187 mark
= self
.marker()
1188 list = stack
[mark
- 1]
1189 list.extend(stack
[mark
+ 1:])
1191 dispatch
[APPENDS
] = load_appends
1193 def load_setitem(self
):
1199 dispatch
[SETITEM
] = load_setitem
1201 def load_setitems(self
):
1203 mark
= self
.marker()
1204 dict = stack
[mark
- 1]
1205 for i
in range(mark
+ 1, len(stack
), 2):
1206 dict[stack
[i
]] = stack
[i
+ 1]
1209 dispatch
[SETITEMS
] = load_setitems
1211 def load_build(self
):
1215 setstate
= getattr(inst
, "__setstate__", None)
1220 if isinstance(state
, tuple) and len(state
) == 2:
1221 state
, slotstate
= state
1224 inst
.__dict
__.update(state
)
1225 except RuntimeError:
1226 # XXX In restricted execution, the instance's __dict__
1227 # is not accessible. Use the old way of unpickling
1228 # the instance variables. This is a semantic
1229 # difference when unpickling in restricted
1230 # vs. unrestricted modes.
1231 # Note, however, that cPickle has never tried to do the
1232 # .update() business, and always uses
1233 # PyObject_SetItem(inst.__dict__, key, value) in a
1234 # loop over state.items().
1235 for k
, v
in state
.items():
1238 for k
, v
in slotstate
.items():
1240 dispatch
[BUILD
] = load_build
1242 def load_mark(self
):
1243 self
.append(self
.mark
)
1244 dispatch
[MARK
] = load_mark
1246 def load_stop(self
):
1247 value
= self
.stack
.pop()
1249 dispatch
[STOP
] = load_stop
1251 # Helper class for load_inst/load_obj
1256 # Encode/decode longs in linear time.
1258 import binascii
as _binascii
1261 r
"""Encode a long to a two's complement little-endian binary string.
1262 Note that 0L is a special case, returning an empty string, to save a
1263 byte in the LONG1 pickling context.
1267 >>> encode_long(255L)
1269 >>> encode_long(32767L)
1271 >>> encode_long(-256L)
1273 >>> encode_long(-32768L)
1275 >>> encode_long(-128L)
1277 >>> encode_long(127L)
1286 assert ashex
.startswith("0x")
1287 njunkchars
= 2 + ashex
.endswith('L')
1288 nibbles
= len(ashex
) - njunkchars
1290 # need an even # of nibbles for unhexlify
1291 ashex
= "0x0" + ashex
[2:]
1292 elif int(ashex
[2], 16) >= 8:
1293 # "looks negative", so need a byte of sign bits
1294 ashex
= "0x00" + ashex
[2:]
1296 # Build the 256's-complement: (1L << nbytes) + x. The trick is
1297 # to find the number of bytes in linear time (although that should
1298 # really be a constant-time task).
1300 assert ashex
.startswith("0x")
1301 njunkchars
= 2 + ashex
.endswith('L')
1302 nibbles
= len(ashex
) - njunkchars
1304 # Extend to a full byte.
1310 njunkchars
= 2 + ashex
.endswith('L')
1311 newnibbles
= len(ashex
) - njunkchars
1312 if newnibbles
< nibbles
:
1313 ashex
= "0x" + "0" * (nibbles
- newnibbles
) + ashex
[2:]
1314 if int(ashex
[2], 16) < 8:
1315 # "looks positive", so need a byte of sign bits
1316 ashex
= "0xff" + ashex
[2:]
1318 if ashex
.endswith('L'):
1322 assert len(ashex
) & 1 == 0, (x
, ashex
)
1323 binary
= _binascii
.unhexlify(ashex
)
1326 def decode_long(data
):
1327 r
"""Decode a long from a two's complement little-endian binary string.
1331 >>> decode_long("\xff\x00")
1333 >>> decode_long("\xff\x7f")
1335 >>> decode_long("\x00\xff")
1337 >>> decode_long("\x00\x80")
1339 >>> decode_long("\x80")
1341 >>> decode_long("\x7f")
1348 ashex
= _binascii
.hexlify(data
[::-1])
1349 n
= long(ashex
, 16) # quadratic time before Python 2.3; linear now
1350 if data
[-1] >= '\x80':
1351 n
-= 1L << (nbytes
* 8)
1357 from cStringIO
import StringIO
1359 from StringIO
import StringIO
1361 def dump(obj
, file, protocol
=None):
1362 Pickler(file, protocol
).dump(obj
)
1364 def dumps(obj
, protocol
=None):
1366 Pickler(file, protocol
).dump(obj
)
1367 return file.getvalue()
1370 return Unpickler(file).load()
1373 file = StringIO(str)
1374 return Unpickler(file).load()
1380 return doctest
.testmod()
1382 if __name__
== "__main__":