Issue #5768: Change to Unicode output logic and test case for same.
[python.git] / Lib / pickle.py
blobabed1ca4fab0ab4497212fbd7e199a1e79db9cfb
1 """Create portable serialized representations of Python objects.
3 See module cPickle for a (much) faster implementation.
4 See module copy_reg for a mechanism for registering custom picklers.
5 See module pickletools source for extensive comments.
7 Classes:
9 Pickler
10 Unpickler
12 Functions:
14 dump(object, file)
15 dumps(object) -> string
16 load(file) -> object
17 loads(string) -> object
19 Misc variables:
21 __version__
22 format_version
23 compatible_formats
25 """
27 __version__ = "$Revision$" # Code version
29 from types import *
30 from copy_reg import dispatch_table
31 from copy_reg import _extension_registry, _inverted_registry, _extension_cache
32 import marshal
33 import sys
34 import struct
35 import re
37 __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
38 "Unpickler", "dump", "dumps", "load", "loads"]
40 # These are purely informational; no code uses these.
41 format_version = "2.0" # File format version we write
42 compatible_formats = ["1.0", # Original protocol 0
43 "1.1", # Protocol 0 with INST added
44 "1.2", # Original protocol 1
45 "1.3", # Protocol 1 with BINFLOAT added
46 "2.0", # Protocol 2
47 ] # Old format versions we can read
49 # Keep in synch with cPickle. This is the highest protocol number we
50 # know how to read.
51 HIGHEST_PROTOCOL = 2
53 # Why use struct.pack() for pickling but marshal.loads() for
54 # unpickling? struct.pack() is 40% faster than marshal.dumps(), but
55 # marshal.loads() is twice as fast as struct.unpack()!
56 mloads = marshal.loads
58 class PickleError(Exception):
59 """A common base class for the other pickling exceptions."""
60 pass
62 class PicklingError(PickleError):
63 """This exception is raised when an unpicklable object is passed to the
64 dump() method.
66 """
67 pass
69 class UnpicklingError(PickleError):
70 """This exception is raised when there is a problem unpickling an object,
71 such as a security violation.
73 Note that other exceptions may also be raised during unpickling, including
74 (but not necessarily limited to) AttributeError, EOFError, ImportError,
75 and IndexError.
77 """
78 pass
80 # An instance of _Stop is raised by Unpickler.load_stop() in response to
81 # the STOP opcode, passing the object that is the result of unpickling.
82 class _Stop(Exception):
83 def __init__(self, value):
84 self.value = value
86 # Jython has PyStringMap; it's a dict subclass with string keys
87 try:
88 from org.python.core import PyStringMap
89 except ImportError:
90 PyStringMap = None
92 # UnicodeType may or may not be exported (normally imported from types)
93 try:
94 UnicodeType
95 except NameError:
96 UnicodeType = None
98 # Pickle opcodes. See pickletools.py for extensive docs. The listing
99 # here is in kind-of alphabetical order of 1-character pickle code.
100 # pickletools groups them by purpose.
102 MARK = '(' # push special markobject on stack
103 STOP = '.' # every pickle ends with STOP
104 POP = '0' # discard topmost stack item
105 POP_MARK = '1' # discard stack top through topmost markobject
106 DUP = '2' # duplicate top stack item
107 FLOAT = 'F' # push float object; decimal string argument
108 INT = 'I' # push integer or bool; decimal string argument
109 BININT = 'J' # push four-byte signed int
110 BININT1 = 'K' # push 1-byte unsigned int
111 LONG = 'L' # push long; decimal string argument
112 BININT2 = 'M' # push 2-byte unsigned int
113 NONE = 'N' # push None
114 PERSID = 'P' # push persistent object; id is taken from string arg
115 BINPERSID = 'Q' # " " " ; " " " " stack
116 REDUCE = 'R' # apply callable to argtuple, both on stack
117 STRING = 'S' # push string; NL-terminated string argument
118 BINSTRING = 'T' # push string; counted binary string argument
119 SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes
120 UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument
121 BINUNICODE = 'X' # " " " ; counted UTF-8 string argument
122 APPEND = 'a' # append stack top to list below it
123 BUILD = 'b' # call __setstate__ or __dict__.update()
124 GLOBAL = 'c' # push self.find_class(modname, name); 2 string args
125 DICT = 'd' # build a dict from stack items
126 EMPTY_DICT = '}' # push empty dict
127 APPENDS = 'e' # extend list on stack by topmost stack slice
128 GET = 'g' # push item from memo on stack; index is string arg
129 BINGET = 'h' # " " " " " " ; " " 1-byte arg
130 INST = 'i' # build & push class instance
131 LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg
132 LIST = 'l' # build list from topmost stack items
133 EMPTY_LIST = ']' # push empty list
134 OBJ = 'o' # build & push class instance
135 PUT = 'p' # store stack top in memo; index is string arg
136 BINPUT = 'q' # " " " " " ; " " 1-byte arg
137 LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg
138 SETITEM = 's' # add key+value pair to dict
139 TUPLE = 't' # build tuple from topmost stack items
140 EMPTY_TUPLE = ')' # push empty tuple
141 SETITEMS = 'u' # modify dict by adding topmost key+value pairs
142 BINFLOAT = 'G' # push float; arg is 8-byte float encoding
144 TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
145 FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
147 # Protocol 2
149 PROTO = '\x80' # identify pickle protocol
150 NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple
151 EXT1 = '\x82' # push object from extension registry; 1-byte index
152 EXT2 = '\x83' # ditto, but 2-byte index
153 EXT4 = '\x84' # ditto, but 4-byte index
154 TUPLE1 = '\x85' # build 1-tuple from stack top
155 TUPLE2 = '\x86' # build 2-tuple from two topmost stack items
156 TUPLE3 = '\x87' # build 3-tuple from three topmost stack items
157 NEWTRUE = '\x88' # push True
158 NEWFALSE = '\x89' # push False
159 LONG1 = '\x8a' # push long from < 256 bytes
160 LONG4 = '\x8b' # push really big long
162 _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
165 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
166 del x
169 # Pickling machinery
171 class Pickler:
173 def __init__(self, file, protocol=None):
174 """This takes a file-like object for writing a pickle data stream.
176 The optional protocol argument tells the pickler to use the
177 given protocol; supported protocols are 0, 1, 2. The default
178 protocol is 0, to be backwards compatible. (Protocol 0 is the
179 only protocol that can be written to a file opened in text
180 mode and read back successfully. When using a protocol higher
181 than 0, make sure the file is opened in binary mode, both when
182 pickling and unpickling.)
184 Protocol 1 is more efficient than protocol 0; protocol 2 is
185 more efficient than protocol 1.
187 Specifying a negative protocol version selects the highest
188 protocol version supported. The higher the protocol used, the
189 more recent the version of Python needed to read the pickle
190 produced.
192 The file parameter must have a write() method that accepts a single
193 string argument. It can thus be an open file object, a StringIO
194 object, or any other custom object that meets this interface.
197 if protocol is None:
198 protocol = 0
199 if protocol < 0:
200 protocol = HIGHEST_PROTOCOL
201 elif not 0 <= protocol <= HIGHEST_PROTOCOL:
202 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
203 self.write = file.write
204 self.memo = {}
205 self.proto = int(protocol)
206 self.bin = protocol >= 1
207 self.fast = 0
209 def clear_memo(self):
210 """Clears the pickler's "memo".
212 The memo is the data structure that remembers which objects the
213 pickler has already seen, so that shared or recursive objects are
214 pickled by reference and not by value. This method is useful when
215 re-using picklers.
218 self.memo.clear()
220 def dump(self, obj):
221 """Write a pickled representation of obj to the open file."""
222 if self.proto >= 2:
223 self.write(PROTO + chr(self.proto))
224 self.save(obj)
225 self.write(STOP)
227 def memoize(self, obj):
228 """Store an object in the memo."""
230 # The Pickler memo is a dictionary mapping object ids to 2-tuples
231 # that contain the Unpickler memo key and the object being memoized.
232 # The memo key is written to the pickle and will become
233 # the key in the Unpickler's memo. The object is stored in the
234 # Pickler memo so that transient objects are kept alive during
235 # pickling.
237 # The use of the Unpickler memo length as the memo key is just a
238 # convention. The only requirement is that the memo values be unique.
239 # But there appears no advantage to any other scheme, and this
240 # scheme allows the Unpickler memo to be implemented as a plain (but
241 # growable) array, indexed by memo key.
242 if self.fast:
243 return
244 assert id(obj) not in self.memo
245 memo_len = len(self.memo)
246 self.write(self.put(memo_len))
247 self.memo[id(obj)] = memo_len, obj
249 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
250 def put(self, i, pack=struct.pack):
251 if self.bin:
252 if i < 256:
253 return BINPUT + chr(i)
254 else:
255 return LONG_BINPUT + pack("<i", i)
257 return PUT + repr(i) + '\n'
259 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
260 def get(self, i, pack=struct.pack):
261 if self.bin:
262 if i < 256:
263 return BINGET + chr(i)
264 else:
265 return LONG_BINGET + pack("<i", i)
267 return GET + repr(i) + '\n'
269 def save(self, obj):
270 # Check for persistent id (defined by a subclass)
271 pid = self.persistent_id(obj)
272 if pid:
273 self.save_pers(pid)
274 return
276 # Check the memo
277 x = self.memo.get(id(obj))
278 if x:
279 self.write(self.get(x[0]))
280 return
282 # Check the type dispatch table
283 t = type(obj)
284 f = self.dispatch.get(t)
285 if f:
286 f(self, obj) # Call unbound method with explicit self
287 return
289 # Check for a class with a custom metaclass; treat as regular class
290 try:
291 issc = issubclass(t, TypeType)
292 except TypeError: # t is not a class (old Boost; see SF #502085)
293 issc = 0
294 if issc:
295 self.save_global(obj)
296 return
298 # Check copy_reg.dispatch_table
299 reduce = dispatch_table.get(t)
300 if reduce:
301 rv = reduce(obj)
302 else:
303 # Check for a __reduce_ex__ method, fall back to __reduce__
304 reduce = getattr(obj, "__reduce_ex__", None)
305 if reduce:
306 rv = reduce(self.proto)
307 else:
308 reduce = getattr(obj, "__reduce__", None)
309 if reduce:
310 rv = reduce()
311 else:
312 raise PicklingError("Can't pickle %r object: %r" %
313 (t.__name__, obj))
315 # Check for string returned by reduce(), meaning "save as global"
316 if type(rv) is StringType:
317 self.save_global(obj, rv)
318 return
320 # Assert that reduce() returned a tuple
321 if type(rv) is not TupleType:
322 raise PicklingError("%s must return string or tuple" % reduce)
324 # Assert that it returned an appropriately sized tuple
325 l = len(rv)
326 if not (2 <= l <= 5):
327 raise PicklingError("Tuple returned by %s must have "
328 "two to five elements" % reduce)
330 # Save the reduce() output and finally memoize the object
331 self.save_reduce(obj=obj, *rv)
333 def persistent_id(self, obj):
334 # This exists so a subclass can override it
335 return None
337 def save_pers(self, pid):
338 # Save a persistent id reference
339 if self.bin:
340 self.save(pid)
341 self.write(BINPERSID)
342 else:
343 self.write(PERSID + str(pid) + '\n')
345 def save_reduce(self, func, args, state=None,
346 listitems=None, dictitems=None, obj=None):
347 # This API is called by some subclasses
349 # Assert that args is a tuple or None
350 if not isinstance(args, TupleType):
351 raise PicklingError("args from reduce() should be a tuple")
353 # Assert that func is callable
354 if not hasattr(func, '__call__'):
355 raise PicklingError("func from reduce should be callable")
357 save = self.save
358 write = self.write
360 # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
361 if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
362 # A __reduce__ implementation can direct protocol 2 to
363 # use the more efficient NEWOBJ opcode, while still
364 # allowing protocol 0 and 1 to work normally. For this to
365 # work, the function returned by __reduce__ should be
366 # called __newobj__, and its first argument should be a
367 # new-style class. The implementation for __newobj__
368 # should be as follows, although pickle has no way to
369 # verify this:
371 # def __newobj__(cls, *args):
372 # return cls.__new__(cls, *args)
374 # Protocols 0 and 1 will pickle a reference to __newobj__,
375 # while protocol 2 (and above) will pickle a reference to
376 # cls, the remaining args tuple, and the NEWOBJ code,
377 # which calls cls.__new__(cls, *args) at unpickling time
378 # (see load_newobj below). If __reduce__ returns a
379 # three-tuple, the state from the third tuple item will be
380 # pickled regardless of the protocol, calling __setstate__
381 # at unpickling time (see load_build below).
383 # Note that no standard __newobj__ implementation exists;
384 # you have to provide your own. This is to enforce
385 # compatibility with Python 2.2 (pickles written using
386 # protocol 0 or 1 in Python 2.3 should be unpicklable by
387 # Python 2.2).
388 cls = args[0]
389 if not hasattr(cls, "__new__"):
390 raise PicklingError(
391 "args[0] from __newobj__ args has no __new__")
392 if obj is not None and cls is not obj.__class__:
393 raise PicklingError(
394 "args[0] from __newobj__ args has the wrong class")
395 args = args[1:]
396 save(cls)
397 save(args)
398 write(NEWOBJ)
399 else:
400 save(func)
401 save(args)
402 write(REDUCE)
404 if obj is not None:
405 self.memoize(obj)
407 # More new special cases (that work with older protocols as
408 # well): when __reduce__ returns a tuple with 4 or 5 items,
409 # the 4th and 5th item should be iterators that provide list
410 # items and dict items (as (key, value) tuples), or None.
412 if listitems is not None:
413 self._batch_appends(listitems)
415 if dictitems is not None:
416 self._batch_setitems(dictitems)
418 if state is not None:
419 save(state)
420 write(BUILD)
422 # Methods below this point are dispatched through the dispatch table
424 dispatch = {}
426 def save_none(self, obj):
427 self.write(NONE)
428 dispatch[NoneType] = save_none
430 def save_bool(self, obj):
431 if self.proto >= 2:
432 self.write(obj and NEWTRUE or NEWFALSE)
433 else:
434 self.write(obj and TRUE or FALSE)
435 dispatch[bool] = save_bool
437 def save_int(self, obj, pack=struct.pack):
438 if self.bin:
439 # If the int is small enough to fit in a signed 4-byte 2's-comp
440 # format, we can store it more efficiently than the general
441 # case.
442 # First one- and two-byte unsigned ints:
443 if obj >= 0:
444 if obj <= 0xff:
445 self.write(BININT1 + chr(obj))
446 return
447 if obj <= 0xffff:
448 self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
449 return
450 # Next check for 4-byte signed ints:
451 high_bits = obj >> 31 # note that Python shift sign-extends
452 if high_bits == 0 or high_bits == -1:
453 # All high bits are copies of bit 2**31, so the value
454 # fits in a 4-byte signed int.
455 self.write(BININT + pack("<i", obj))
456 return
457 # Text pickle, or int too big to fit in signed 4-byte format.
458 self.write(INT + repr(obj) + '\n')
459 dispatch[IntType] = save_int
461 def save_long(self, obj, pack=struct.pack):
462 if self.proto >= 2:
463 bytes = encode_long(obj)
464 n = len(bytes)
465 if n < 256:
466 self.write(LONG1 + chr(n) + bytes)
467 else:
468 self.write(LONG4 + pack("<i", n) + bytes)
469 return
470 self.write(LONG + repr(obj) + '\n')
471 dispatch[LongType] = save_long
473 def save_float(self, obj, pack=struct.pack):
474 if self.bin:
475 self.write(BINFLOAT + pack('>d', obj))
476 else:
477 self.write(FLOAT + repr(obj) + '\n')
478 dispatch[FloatType] = save_float
480 def save_string(self, obj, pack=struct.pack):
481 if self.bin:
482 n = len(obj)
483 if n < 256:
484 self.write(SHORT_BINSTRING + chr(n) + obj)
485 else:
486 self.write(BINSTRING + pack("<i", n) + obj)
487 else:
488 self.write(STRING + repr(obj) + '\n')
489 self.memoize(obj)
490 dispatch[StringType] = save_string
492 def save_unicode(self, obj, pack=struct.pack):
493 if self.bin:
494 encoding = obj.encode('utf-8')
495 n = len(encoding)
496 self.write(BINUNICODE + pack("<i", n) + encoding)
497 else:
498 obj = obj.replace("\\", "\\u005c")
499 obj = obj.replace("\n", "\\u000a")
500 self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
501 self.memoize(obj)
502 dispatch[UnicodeType] = save_unicode
504 if StringType is UnicodeType:
505 # This is true for Jython
506 def save_string(self, obj, pack=struct.pack):
507 unicode = obj.isunicode()
509 if self.bin:
510 if unicode:
511 obj = obj.encode("utf-8")
512 l = len(obj)
513 if l < 256 and not unicode:
514 self.write(SHORT_BINSTRING + chr(l) + obj)
515 else:
516 s = pack("<i", l)
517 if unicode:
518 self.write(BINUNICODE + s + obj)
519 else:
520 self.write(BINSTRING + s + obj)
521 else:
522 if unicode:
523 obj = obj.replace("\\", "\\u005c")
524 obj = obj.replace("\n", "\\u000a")
525 obj = obj.encode('raw-unicode-escape')
526 self.write(UNICODE + obj + '\n')
527 else:
528 self.write(STRING + repr(obj) + '\n')
529 self.memoize(obj)
530 dispatch[StringType] = save_string
532 def save_tuple(self, obj):
533 write = self.write
534 proto = self.proto
536 n = len(obj)
537 if n == 0:
538 if proto:
539 write(EMPTY_TUPLE)
540 else:
541 write(MARK + TUPLE)
542 return
544 save = self.save
545 memo = self.memo
546 if n <= 3 and proto >= 2:
547 for element in obj:
548 save(element)
549 # Subtle. Same as in the big comment below.
550 if id(obj) in memo:
551 get = self.get(memo[id(obj)][0])
552 write(POP * n + get)
553 else:
554 write(_tuplesize2code[n])
555 self.memoize(obj)
556 return
558 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
559 # has more than 3 elements.
560 write(MARK)
561 for element in obj:
562 save(element)
564 if id(obj) in memo:
565 # Subtle. d was not in memo when we entered save_tuple(), so
566 # the process of saving the tuple's elements must have saved
567 # the tuple itself: the tuple is recursive. The proper action
568 # now is to throw away everything we put on the stack, and
569 # simply GET the tuple (it's already constructed). This check
570 # could have been done in the "for element" loop instead, but
571 # recursive tuples are a rare thing.
572 get = self.get(memo[id(obj)][0])
573 if proto:
574 write(POP_MARK + get)
575 else: # proto 0 -- POP_MARK not available
576 write(POP * (n+1) + get)
577 return
579 # No recursion.
580 self.write(TUPLE)
581 self.memoize(obj)
583 dispatch[TupleType] = save_tuple
585 # save_empty_tuple() isn't used by anything in Python 2.3. However, I
586 # found a Pickler subclass in Zope3 that calls it, so it's not harmless
587 # to remove it.
588 def save_empty_tuple(self, obj):
589 self.write(EMPTY_TUPLE)
591 def save_list(self, obj):
592 write = self.write
594 if self.bin:
595 write(EMPTY_LIST)
596 else: # proto 0 -- can't use EMPTY_LIST
597 write(MARK + LIST)
599 self.memoize(obj)
600 self._batch_appends(iter(obj))
602 dispatch[ListType] = save_list
604 # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
605 # out of synch, though.
606 _BATCHSIZE = 1000
608 def _batch_appends(self, items):
609 # Helper to batch up APPENDS sequences
610 save = self.save
611 write = self.write
613 if not self.bin:
614 for x in items:
615 save(x)
616 write(APPEND)
617 return
619 r = xrange(self._BATCHSIZE)
620 while items is not None:
621 tmp = []
622 for i in r:
623 try:
624 x = items.next()
625 tmp.append(x)
626 except StopIteration:
627 items = None
628 break
629 n = len(tmp)
630 if n > 1:
631 write(MARK)
632 for x in tmp:
633 save(x)
634 write(APPENDS)
635 elif n:
636 save(tmp[0])
637 write(APPEND)
638 # else tmp is empty, and we're done
640 def save_dict(self, obj):
641 write = self.write
643 if self.bin:
644 write(EMPTY_DICT)
645 else: # proto 0 -- can't use EMPTY_DICT
646 write(MARK + DICT)
648 self.memoize(obj)
649 self._batch_setitems(obj.iteritems())
651 dispatch[DictionaryType] = save_dict
652 if not PyStringMap is None:
653 dispatch[PyStringMap] = save_dict
655 def _batch_setitems(self, items):
656 # Helper to batch up SETITEMS sequences; proto >= 1 only
657 save = self.save
658 write = self.write
660 if not self.bin:
661 for k, v in items:
662 save(k)
663 save(v)
664 write(SETITEM)
665 return
667 r = xrange(self._BATCHSIZE)
668 while items is not None:
669 tmp = []
670 for i in r:
671 try:
672 tmp.append(items.next())
673 except StopIteration:
674 items = None
675 break
676 n = len(tmp)
677 if n > 1:
678 write(MARK)
679 for k, v in tmp:
680 save(k)
681 save(v)
682 write(SETITEMS)
683 elif n:
684 k, v = tmp[0]
685 save(k)
686 save(v)
687 write(SETITEM)
688 # else tmp is empty, and we're done
690 def save_inst(self, obj):
691 cls = obj.__class__
693 memo = self.memo
694 write = self.write
695 save = self.save
697 if hasattr(obj, '__getinitargs__'):
698 args = obj.__getinitargs__()
699 len(args) # XXX Assert it's a sequence
700 _keep_alive(args, memo)
701 else:
702 args = ()
704 write(MARK)
706 if self.bin:
707 save(cls)
708 for arg in args:
709 save(arg)
710 write(OBJ)
711 else:
712 for arg in args:
713 save(arg)
714 write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
716 self.memoize(obj)
718 try:
719 getstate = obj.__getstate__
720 except AttributeError:
721 stuff = obj.__dict__
722 else:
723 stuff = getstate()
724 _keep_alive(stuff, memo)
725 save(stuff)
726 write(BUILD)
728 dispatch[InstanceType] = save_inst
730 def save_global(self, obj, name=None, pack=struct.pack):
731 write = self.write
732 memo = self.memo
734 if name is None:
735 name = obj.__name__
737 module = getattr(obj, "__module__", None)
738 if module is None:
739 module = whichmodule(obj, name)
741 try:
742 __import__(module)
743 mod = sys.modules[module]
744 klass = getattr(mod, name)
745 except (ImportError, KeyError, AttributeError):
746 raise PicklingError(
747 "Can't pickle %r: it's not found as %s.%s" %
748 (obj, module, name))
749 else:
750 if klass is not obj:
751 raise PicklingError(
752 "Can't pickle %r: it's not the same object as %s.%s" %
753 (obj, module, name))
755 if self.proto >= 2:
756 code = _extension_registry.get((module, name))
757 if code:
758 assert code > 0
759 if code <= 0xff:
760 write(EXT1 + chr(code))
761 elif code <= 0xffff:
762 write("%c%c%c" % (EXT2, code&0xff, code>>8))
763 else:
764 write(EXT4 + pack("<i", code))
765 return
767 write(GLOBAL + module + '\n' + name + '\n')
768 self.memoize(obj)
770 dispatch[ClassType] = save_global
771 dispatch[FunctionType] = save_global
772 dispatch[BuiltinFunctionType] = save_global
773 dispatch[TypeType] = save_global
775 # Pickling helpers
777 def _keep_alive(x, memo):
778 """Keeps a reference to the object x in the memo.
780 Because we remember objects by their id, we have
781 to assure that possibly temporary objects are kept
782 alive by referencing them.
783 We store a reference at the id of the memo, which should
784 normally not be used unless someone tries to deepcopy
785 the memo itself...
787 try:
788 memo[id(memo)].append(x)
789 except KeyError:
790 # aha, this is the first one :-)
791 memo[id(memo)]=[x]
794 # A cache for whichmodule(), mapping a function object to the name of
795 # the module in which the function was found.
797 classmap = {} # called classmap for backwards compatibility
799 def whichmodule(func, funcname):
800 """Figure out the module in which a function occurs.
802 Search sys.modules for the module.
803 Cache in classmap.
804 Return a module name.
805 If the function cannot be found, return "__main__".
807 # Python functions should always get an __module__ from their globals.
808 mod = getattr(func, "__module__", None)
809 if mod is not None:
810 return mod
811 if func in classmap:
812 return classmap[func]
814 for name, module in sys.modules.items():
815 if module is None:
816 continue # skip dummy package entries
817 if name != '__main__' and getattr(module, funcname, None) is func:
818 break
819 else:
820 name = '__main__'
821 classmap[func] = name
822 return name
825 # Unpickling machinery
827 class Unpickler:
829 def __init__(self, file):
830 """This takes a file-like object for reading a pickle data stream.
832 The protocol version of the pickle is detected automatically, so no
833 proto argument is needed.
835 The file-like object must have two methods, a read() method that
836 takes an integer argument, and a readline() method that requires no
837 arguments. Both methods should return a string. Thus file-like
838 object can be a file object opened for reading, a StringIO object,
839 or any other custom object that meets this interface.
841 self.readline = file.readline
842 self.read = file.read
843 self.memo = {}
845 def load(self):
846 """Read a pickled object representation from the open file.
848 Return the reconstituted object hierarchy specified in the file.
850 self.mark = object() # any new unique object
851 self.stack = []
852 self.append = self.stack.append
853 read = self.read
854 dispatch = self.dispatch
855 try:
856 while 1:
857 key = read(1)
858 dispatch[key](self)
859 except _Stop, stopinst:
860 return stopinst.value
862 # Return largest index k such that self.stack[k] is self.mark.
863 # If the stack doesn't contain a mark, eventually raises IndexError.
864 # This could be sped by maintaining another stack, of indices at which
865 # the mark appears. For that matter, the latter stack would suffice,
866 # and we wouldn't need to push mark objects on self.stack at all.
867 # Doing so is probably a good thing, though, since if the pickle is
868 # corrupt (or hostile) we may get a clue from finding self.mark embedded
869 # in unpickled objects.
870 def marker(self):
871 stack = self.stack
872 mark = self.mark
873 k = len(stack)-1
874 while stack[k] is not mark: k = k-1
875 return k
877 dispatch = {}
879 def load_eof(self):
880 raise EOFError
881 dispatch[''] = load_eof
883 def load_proto(self):
884 proto = ord(self.read(1))
885 if not 0 <= proto <= 2:
886 raise ValueError, "unsupported pickle protocol: %d" % proto
887 dispatch[PROTO] = load_proto
889 def load_persid(self):
890 pid = self.readline()[:-1]
891 self.append(self.persistent_load(pid))
892 dispatch[PERSID] = load_persid
894 def load_binpersid(self):
895 pid = self.stack.pop()
896 self.append(self.persistent_load(pid))
897 dispatch[BINPERSID] = load_binpersid
899 def load_none(self):
900 self.append(None)
901 dispatch[NONE] = load_none
903 def load_false(self):
904 self.append(False)
905 dispatch[NEWFALSE] = load_false
907 def load_true(self):
908 self.append(True)
909 dispatch[NEWTRUE] = load_true
911 def load_int(self):
912 data = self.readline()
913 if data == FALSE[1:]:
914 val = False
915 elif data == TRUE[1:]:
916 val = True
917 else:
918 try:
919 val = int(data)
920 except ValueError:
921 val = long(data)
922 self.append(val)
923 dispatch[INT] = load_int
925 def load_binint(self):
926 self.append(mloads('i' + self.read(4)))
927 dispatch[BININT] = load_binint
929 def load_binint1(self):
930 self.append(ord(self.read(1)))
931 dispatch[BININT1] = load_binint1
933 def load_binint2(self):
934 self.append(mloads('i' + self.read(2) + '\000\000'))
935 dispatch[BININT2] = load_binint2
937 def load_long(self):
938 self.append(long(self.readline()[:-1], 0))
939 dispatch[LONG] = load_long
941 def load_long1(self):
942 n = ord(self.read(1))
943 bytes = self.read(n)
944 self.append(decode_long(bytes))
945 dispatch[LONG1] = load_long1
947 def load_long4(self):
948 n = mloads('i' + self.read(4))
949 bytes = self.read(n)
950 self.append(decode_long(bytes))
951 dispatch[LONG4] = load_long4
953 def load_float(self):
954 self.append(float(self.readline()[:-1]))
955 dispatch[FLOAT] = load_float
957 def load_binfloat(self, unpack=struct.unpack):
958 self.append(unpack('>d', self.read(8))[0])
959 dispatch[BINFLOAT] = load_binfloat
961 def load_string(self):
962 rep = self.readline()[:-1]
963 for q in "\"'": # double or single quote
964 if rep.startswith(q):
965 if not rep.endswith(q):
966 raise ValueError, "insecure string pickle"
967 rep = rep[len(q):-len(q)]
968 break
969 else:
970 raise ValueError, "insecure string pickle"
971 self.append(rep.decode("string-escape"))
972 dispatch[STRING] = load_string
974 def load_binstring(self):
975 len = mloads('i' + self.read(4))
976 self.append(self.read(len))
977 dispatch[BINSTRING] = load_binstring
979 def load_unicode(self):
980 self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
981 dispatch[UNICODE] = load_unicode
983 def load_binunicode(self):
984 len = mloads('i' + self.read(4))
985 self.append(unicode(self.read(len),'utf-8'))
986 dispatch[BINUNICODE] = load_binunicode
988 def load_short_binstring(self):
989 len = ord(self.read(1))
990 self.append(self.read(len))
991 dispatch[SHORT_BINSTRING] = load_short_binstring
993 def load_tuple(self):
994 k = self.marker()
995 self.stack[k:] = [tuple(self.stack[k+1:])]
996 dispatch[TUPLE] = load_tuple
998 def load_empty_tuple(self):
999 self.stack.append(())
1000 dispatch[EMPTY_TUPLE] = load_empty_tuple
1002 def load_tuple1(self):
1003 self.stack[-1] = (self.stack[-1],)
1004 dispatch[TUPLE1] = load_tuple1
1006 def load_tuple2(self):
1007 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1008 dispatch[TUPLE2] = load_tuple2
1010 def load_tuple3(self):
1011 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1012 dispatch[TUPLE3] = load_tuple3
1014 def load_empty_list(self):
1015 self.stack.append([])
1016 dispatch[EMPTY_LIST] = load_empty_list
1018 def load_empty_dictionary(self):
1019 self.stack.append({})
1020 dispatch[EMPTY_DICT] = load_empty_dictionary
1022 def load_list(self):
1023 k = self.marker()
1024 self.stack[k:] = [self.stack[k+1:]]
1025 dispatch[LIST] = load_list
1027 def load_dict(self):
1028 k = self.marker()
1029 d = {}
1030 items = self.stack[k+1:]
1031 for i in range(0, len(items), 2):
1032 key = items[i]
1033 value = items[i+1]
1034 d[key] = value
1035 self.stack[k:] = [d]
1036 dispatch[DICT] = load_dict
1038 # INST and OBJ differ only in how they get a class object. It's not
1039 # only sensible to do the rest in a common routine, the two routines
1040 # previously diverged and grew different bugs.
1041 # klass is the class to instantiate, and k points to the topmost mark
1042 # object, following which are the arguments for klass.__init__.
1043 def _instantiate(self, klass, k):
1044 args = tuple(self.stack[k+1:])
1045 del self.stack[k:]
1046 instantiated = 0
1047 if (not args and
1048 type(klass) is ClassType and
1049 not hasattr(klass, "__getinitargs__")):
1050 try:
1051 value = _EmptyClass()
1052 value.__class__ = klass
1053 instantiated = 1
1054 except RuntimeError:
1055 # In restricted execution, assignment to inst.__class__ is
1056 # prohibited
1057 pass
1058 if not instantiated:
1059 try:
1060 value = klass(*args)
1061 except TypeError, err:
1062 raise TypeError, "in constructor for %s: %s" % (
1063 klass.__name__, str(err)), sys.exc_info()[2]
1064 self.append(value)
1066 def load_inst(self):
1067 module = self.readline()[:-1]
1068 name = self.readline()[:-1]
1069 klass = self.find_class(module, name)
1070 self._instantiate(klass, self.marker())
1071 dispatch[INST] = load_inst
1073 def load_obj(self):
1074 # Stack is ... markobject classobject arg1 arg2 ...
1075 k = self.marker()
1076 klass = self.stack.pop(k+1)
1077 self._instantiate(klass, k)
1078 dispatch[OBJ] = load_obj
1080 def load_newobj(self):
1081 args = self.stack.pop()
1082 cls = self.stack[-1]
1083 obj = cls.__new__(cls, *args)
1084 self.stack[-1] = obj
1085 dispatch[NEWOBJ] = load_newobj
1087 def load_global(self):
1088 module = self.readline()[:-1]
1089 name = self.readline()[:-1]
1090 klass = self.find_class(module, name)
1091 self.append(klass)
1092 dispatch[GLOBAL] = load_global
1094 def load_ext1(self):
1095 code = ord(self.read(1))
1096 self.get_extension(code)
1097 dispatch[EXT1] = load_ext1
1099 def load_ext2(self):
1100 code = mloads('i' + self.read(2) + '\000\000')
1101 self.get_extension(code)
1102 dispatch[EXT2] = load_ext2
1104 def load_ext4(self):
1105 code = mloads('i' + self.read(4))
1106 self.get_extension(code)
1107 dispatch[EXT4] = load_ext4
1109 def get_extension(self, code):
1110 nil = []
1111 obj = _extension_cache.get(code, nil)
1112 if obj is not nil:
1113 self.append(obj)
1114 return
1115 key = _inverted_registry.get(code)
1116 if not key:
1117 raise ValueError("unregistered extension code %d" % code)
1118 obj = self.find_class(*key)
1119 _extension_cache[code] = obj
1120 self.append(obj)
1122 def find_class(self, module, name):
1123 # Subclasses may override this
1124 __import__(module)
1125 mod = sys.modules[module]
1126 klass = getattr(mod, name)
1127 return klass
1129 def load_reduce(self):
1130 stack = self.stack
1131 args = stack.pop()
1132 func = stack[-1]
1133 value = func(*args)
1134 stack[-1] = value
1135 dispatch[REDUCE] = load_reduce
1137 def load_pop(self):
1138 del self.stack[-1]
1139 dispatch[POP] = load_pop
1141 def load_pop_mark(self):
1142 k = self.marker()
1143 del self.stack[k:]
1144 dispatch[POP_MARK] = load_pop_mark
1146 def load_dup(self):
1147 self.append(self.stack[-1])
1148 dispatch[DUP] = load_dup
1150 def load_get(self):
1151 self.append(self.memo[self.readline()[:-1]])
1152 dispatch[GET] = load_get
1154 def load_binget(self):
1155 i = ord(self.read(1))
1156 self.append(self.memo[repr(i)])
1157 dispatch[BINGET] = load_binget
1159 def load_long_binget(self):
1160 i = mloads('i' + self.read(4))
1161 self.append(self.memo[repr(i)])
1162 dispatch[LONG_BINGET] = load_long_binget
1164 def load_put(self):
1165 self.memo[self.readline()[:-1]] = self.stack[-1]
1166 dispatch[PUT] = load_put
1168 def load_binput(self):
1169 i = ord(self.read(1))
1170 self.memo[repr(i)] = self.stack[-1]
1171 dispatch[BINPUT] = load_binput
1173 def load_long_binput(self):
1174 i = mloads('i' + self.read(4))
1175 self.memo[repr(i)] = self.stack[-1]
1176 dispatch[LONG_BINPUT] = load_long_binput
1178 def load_append(self):
1179 stack = self.stack
1180 value = stack.pop()
1181 list = stack[-1]
1182 list.append(value)
1183 dispatch[APPEND] = load_append
1185 def load_appends(self):
1186 stack = self.stack
1187 mark = self.marker()
1188 list = stack[mark - 1]
1189 list.extend(stack[mark + 1:])
1190 del stack[mark:]
1191 dispatch[APPENDS] = load_appends
1193 def load_setitem(self):
1194 stack = self.stack
1195 value = stack.pop()
1196 key = stack.pop()
1197 dict = stack[-1]
1198 dict[key] = value
1199 dispatch[SETITEM] = load_setitem
1201 def load_setitems(self):
1202 stack = self.stack
1203 mark = self.marker()
1204 dict = stack[mark - 1]
1205 for i in range(mark + 1, len(stack), 2):
1206 dict[stack[i]] = stack[i + 1]
1208 del stack[mark:]
1209 dispatch[SETITEMS] = load_setitems
1211 def load_build(self):
1212 stack = self.stack
1213 state = stack.pop()
1214 inst = stack[-1]
1215 setstate = getattr(inst, "__setstate__", None)
1216 if setstate:
1217 setstate(state)
1218 return
1219 slotstate = None
1220 if isinstance(state, tuple) and len(state) == 2:
1221 state, slotstate = state
1222 if state:
1223 try:
1224 inst.__dict__.update(state)
1225 except RuntimeError:
1226 # XXX In restricted execution, the instance's __dict__
1227 # is not accessible. Use the old way of unpickling
1228 # the instance variables. This is a semantic
1229 # difference when unpickling in restricted
1230 # vs. unrestricted modes.
1231 # Note, however, that cPickle has never tried to do the
1232 # .update() business, and always uses
1233 # PyObject_SetItem(inst.__dict__, key, value) in a
1234 # loop over state.items().
1235 for k, v in state.items():
1236 setattr(inst, k, v)
1237 if slotstate:
1238 for k, v in slotstate.items():
1239 setattr(inst, k, v)
1240 dispatch[BUILD] = load_build
1242 def load_mark(self):
1243 self.append(self.mark)
1244 dispatch[MARK] = load_mark
1246 def load_stop(self):
1247 value = self.stack.pop()
1248 raise _Stop(value)
1249 dispatch[STOP] = load_stop
1251 # Helper class for load_inst/load_obj
1253 class _EmptyClass:
1254 pass
1256 # Encode/decode longs in linear time.
1258 import binascii as _binascii
1260 def encode_long(x):
1261 r"""Encode a long to a two's complement little-endian binary string.
1262 Note that 0L is a special case, returning an empty string, to save a
1263 byte in the LONG1 pickling context.
1265 >>> encode_long(0L)
1267 >>> encode_long(255L)
1268 '\xff\x00'
1269 >>> encode_long(32767L)
1270 '\xff\x7f'
1271 >>> encode_long(-256L)
1272 '\x00\xff'
1273 >>> encode_long(-32768L)
1274 '\x00\x80'
1275 >>> encode_long(-128L)
1276 '\x80'
1277 >>> encode_long(127L)
1278 '\x7f'
1282 if x == 0:
1283 return ''
1284 if x > 0:
1285 ashex = hex(x)
1286 assert ashex.startswith("0x")
1287 njunkchars = 2 + ashex.endswith('L')
1288 nibbles = len(ashex) - njunkchars
1289 if nibbles & 1:
1290 # need an even # of nibbles for unhexlify
1291 ashex = "0x0" + ashex[2:]
1292 elif int(ashex[2], 16) >= 8:
1293 # "looks negative", so need a byte of sign bits
1294 ashex = "0x00" + ashex[2:]
1295 else:
1296 # Build the 256's-complement: (1L << nbytes) + x. The trick is
1297 # to find the number of bytes in linear time (although that should
1298 # really be a constant-time task).
1299 ashex = hex(-x)
1300 assert ashex.startswith("0x")
1301 njunkchars = 2 + ashex.endswith('L')
1302 nibbles = len(ashex) - njunkchars
1303 if nibbles & 1:
1304 # Extend to a full byte.
1305 nibbles += 1
1306 nbits = nibbles * 4
1307 x += 1L << nbits
1308 assert x > 0
1309 ashex = hex(x)
1310 njunkchars = 2 + ashex.endswith('L')
1311 newnibbles = len(ashex) - njunkchars
1312 if newnibbles < nibbles:
1313 ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1314 if int(ashex[2], 16) < 8:
1315 # "looks positive", so need a byte of sign bits
1316 ashex = "0xff" + ashex[2:]
1318 if ashex.endswith('L'):
1319 ashex = ashex[2:-1]
1320 else:
1321 ashex = ashex[2:]
1322 assert len(ashex) & 1 == 0, (x, ashex)
1323 binary = _binascii.unhexlify(ashex)
1324 return binary[::-1]
1326 def decode_long(data):
1327 r"""Decode a long from a two's complement little-endian binary string.
1329 >>> decode_long('')
1331 >>> decode_long("\xff\x00")
1332 255L
1333 >>> decode_long("\xff\x7f")
1334 32767L
1335 >>> decode_long("\x00\xff")
1336 -256L
1337 >>> decode_long("\x00\x80")
1338 -32768L
1339 >>> decode_long("\x80")
1340 -128L
1341 >>> decode_long("\x7f")
1342 127L
1345 nbytes = len(data)
1346 if nbytes == 0:
1347 return 0L
1348 ashex = _binascii.hexlify(data[::-1])
1349 n = long(ashex, 16) # quadratic time before Python 2.3; linear now
1350 if data[-1] >= '\x80':
1351 n -= 1L << (nbytes * 8)
1352 return n
1354 # Shorthands
1356 try:
1357 from cStringIO import StringIO
1358 except ImportError:
1359 from StringIO import StringIO
1361 def dump(obj, file, protocol=None):
1362 Pickler(file, protocol).dump(obj)
1364 def dumps(obj, protocol=None):
1365 file = StringIO()
1366 Pickler(file, protocol).dump(obj)
1367 return file.getvalue()
1369 def load(file):
1370 return Unpickler(file).load()
1372 def loads(str):
1373 file = StringIO(str)
1374 return Unpickler(file).load()
1376 # Doctest
1378 def _test():
1379 import doctest
1380 return doctest.testmod()
1382 if __name__ == "__main__":
1383 _test()