Merged revisions 82952,82954 via svnmerge from
[python/dscho.git] / Lib / pickle.py
blob5275991662ae4573a686b089ffd99a7624386e1c
1 """Create portable serialized representations of Python objects.
3 See module copyreg for a mechanism for registering custom picklers.
4 See module pickletools source for extensive comments.
6 Classes:
8 Pickler
9 Unpickler
11 Functions:
13 dump(object, file)
14 dumps(object) -> string
15 load(file) -> object
16 loads(string) -> object
18 Misc variables:
20 __version__
21 format_version
22 compatible_formats
24 """
26 __version__ = "$Revision$" # Code version
28 from types import FunctionType, BuiltinFunctionType
29 from copyreg import dispatch_table
30 from copyreg import _extension_registry, _inverted_registry, _extension_cache
31 import marshal
32 import sys
33 import struct
34 import re
35 import io
36 import codecs
37 import _compat_pickle
39 __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
40 "Unpickler", "dump", "dumps", "load", "loads"]
42 # Shortcut for use in isinstance testing
43 bytes_types = (bytes, bytearray)
45 # These are purely informational; no code uses these.
46 format_version = "3.0" # File format version we write
47 compatible_formats = ["1.0", # Original protocol 0
48 "1.1", # Protocol 0 with INST added
49 "1.2", # Original protocol 1
50 "1.3", # Protocol 1 with BINFLOAT added
51 "2.0", # Protocol 2
52 "3.0", # Protocol 3
53 ] # Old format versions we can read
55 # This is the highest protocol number we know how to read.
56 HIGHEST_PROTOCOL = 3
58 # The protocol we write by default. May be less than HIGHEST_PROTOCOL.
59 # We intentionally write a protocol that Python 2.x cannot read;
60 # there are too many issues with that.
61 DEFAULT_PROTOCOL = 3
63 # Why use struct.pack() for pickling but marshal.loads() for
64 # unpickling? struct.pack() is 40% faster than marshal.dumps(), but
65 # marshal.loads() is twice as fast as struct.unpack()!
66 mloads = marshal.loads
68 class PickleError(Exception):
69 """A common base class for the other pickling exceptions."""
70 pass
72 class PicklingError(PickleError):
73 """This exception is raised when an unpicklable object is passed to the
74 dump() method.
76 """
77 pass
79 class UnpicklingError(PickleError):
80 """This exception is raised when there is a problem unpickling an object,
81 such as a security violation.
83 Note that other exceptions may also be raised during unpickling, including
84 (but not necessarily limited to) AttributeError, EOFError, ImportError,
85 and IndexError.
87 """
88 pass
90 # An instance of _Stop is raised by Unpickler.load_stop() in response to
91 # the STOP opcode, passing the object that is the result of unpickling.
92 class _Stop(Exception):
93 def __init__(self, value):
94 self.value = value
96 # Jython has PyStringMap; it's a dict subclass with string keys
97 try:
98 from org.python.core import PyStringMap
99 except ImportError:
100 PyStringMap = None
102 # Pickle opcodes. See pickletools.py for extensive docs. The listing
103 # here is in kind-of alphabetical order of 1-character pickle code.
104 # pickletools groups them by purpose.
106 MARK = b'(' # push special markobject on stack
107 STOP = b'.' # every pickle ends with STOP
108 POP = b'0' # discard topmost stack item
109 POP_MARK = b'1' # discard stack top through topmost markobject
110 DUP = b'2' # duplicate top stack item
111 FLOAT = b'F' # push float object; decimal string argument
112 INT = b'I' # push integer or bool; decimal string argument
113 BININT = b'J' # push four-byte signed int
114 BININT1 = b'K' # push 1-byte unsigned int
115 LONG = b'L' # push long; decimal string argument
116 BININT2 = b'M' # push 2-byte unsigned int
117 NONE = b'N' # push None
118 PERSID = b'P' # push persistent object; id is taken from string arg
119 BINPERSID = b'Q' # " " " ; " " " " stack
120 REDUCE = b'R' # apply callable to argtuple, both on stack
121 STRING = b'S' # push string; NL-terminated string argument
122 BINSTRING = b'T' # push string; counted binary string argument
123 SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
124 UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
125 BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
126 APPEND = b'a' # append stack top to list below it
127 BUILD = b'b' # call __setstate__ or __dict__.update()
128 GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
129 DICT = b'd' # build a dict from stack items
130 EMPTY_DICT = b'}' # push empty dict
131 APPENDS = b'e' # extend list on stack by topmost stack slice
132 GET = b'g' # push item from memo on stack; index is string arg
133 BINGET = b'h' # " " " " " " ; " " 1-byte arg
134 INST = b'i' # build & push class instance
135 LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
136 LIST = b'l' # build list from topmost stack items
137 EMPTY_LIST = b']' # push empty list
138 OBJ = b'o' # build & push class instance
139 PUT = b'p' # store stack top in memo; index is string arg
140 BINPUT = b'q' # " " " " " ; " " 1-byte arg
141 LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
142 SETITEM = b's' # add key+value pair to dict
143 TUPLE = b't' # build tuple from topmost stack items
144 EMPTY_TUPLE = b')' # push empty tuple
145 SETITEMS = b'u' # modify dict by adding topmost key+value pairs
146 BINFLOAT = b'G' # push float; arg is 8-byte float encoding
148 TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
149 FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
151 # Protocol 2
153 PROTO = b'\x80' # identify pickle protocol
154 NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
155 EXT1 = b'\x82' # push object from extension registry; 1-byte index
156 EXT2 = b'\x83' # ditto, but 2-byte index
157 EXT4 = b'\x84' # ditto, but 4-byte index
158 TUPLE1 = b'\x85' # build 1-tuple from stack top
159 TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
160 TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
161 NEWTRUE = b'\x88' # push True
162 NEWFALSE = b'\x89' # push False
163 LONG1 = b'\x8a' # push long from < 256 bytes
164 LONG4 = b'\x8b' # push really big long
166 _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
168 # Protocol 3 (Python 3.x)
170 BINBYTES = b'B' # push bytes; counted binary string argument
171 SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
173 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
175 # Pickling machinery
177 class _Pickler:
179 def __init__(self, file, protocol=None, *, fix_imports=True):
180 """This takes a binary file for writing a pickle data stream.
182 The optional protocol argument tells the pickler to use the
183 given protocol; supported protocols are 0, 1, 2, 3. The default
184 protocol is 3; a backward-incompatible protocol designed for
185 Python 3.0.
187 Specifying a negative protocol version selects the highest
188 protocol version supported. The higher the protocol used, the
189 more recent the version of Python needed to read the pickle
190 produced.
192 The file argument must have a write() method that accepts a single
193 bytes argument. It can thus be a file object opened for binary
194 writing, a io.BytesIO instance, or any other custom object that
195 meets this interface.
197 If fix_imports is True and protocol is less than 3, pickle will try to
198 map the new Python 3.x names to the old module names used in Python
199 2.x, so that the pickle data stream is readable with Python 2.x.
201 if protocol is None:
202 protocol = DEFAULT_PROTOCOL
203 if protocol < 0:
204 protocol = HIGHEST_PROTOCOL
205 elif not 0 <= protocol <= HIGHEST_PROTOCOL:
206 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
207 try:
208 self.write = file.write
209 except AttributeError:
210 raise TypeError("file must have a 'write' attribute")
211 self.memo = {}
212 self.proto = int(protocol)
213 self.bin = protocol >= 1
214 self.fast = 0
215 self.fix_imports = fix_imports and protocol < 3
217 def clear_memo(self):
218 """Clears the pickler's "memo".
220 The memo is the data structure that remembers which objects the
221 pickler has already seen, so that shared or recursive objects are
222 pickled by reference and not by value. This method is useful when
223 re-using picklers.
226 self.memo.clear()
228 def dump(self, obj):
229 """Write a pickled representation of obj to the open file."""
230 # Check whether Pickler was initialized correctly. This is
231 # only needed to mimic the behavior of _pickle.Pickler.dump().
232 if not hasattr(self, "write"):
233 raise PicklingError("Pickler.__init__() was not called by "
234 "%s.__init__()" % (self.__class__.__name__,))
235 if self.proto >= 2:
236 self.write(PROTO + bytes([self.proto]))
237 self.save(obj)
238 self.write(STOP)
240 def memoize(self, obj):
241 """Store an object in the memo."""
243 # The Pickler memo is a dictionary mapping object ids to 2-tuples
244 # that contain the Unpickler memo key and the object being memoized.
245 # The memo key is written to the pickle and will become
246 # the key in the Unpickler's memo. The object is stored in the
247 # Pickler memo so that transient objects are kept alive during
248 # pickling.
250 # The use of the Unpickler memo length as the memo key is just a
251 # convention. The only requirement is that the memo values be unique.
252 # But there appears no advantage to any other scheme, and this
253 # scheme allows the Unpickler memo to be implemented as a plain (but
254 # growable) array, indexed by memo key.
255 if self.fast:
256 return
257 assert id(obj) not in self.memo
258 memo_len = len(self.memo)
259 self.write(self.put(memo_len))
260 self.memo[id(obj)] = memo_len, obj
262 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
263 def put(self, i, pack=struct.pack):
264 if self.bin:
265 if i < 256:
266 return BINPUT + bytes([i])
267 else:
268 return LONG_BINPUT + pack("<i", i)
270 return PUT + repr(i).encode("ascii") + b'\n'
272 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
273 def get(self, i, pack=struct.pack):
274 if self.bin:
275 if i < 256:
276 return BINGET + bytes([i])
277 else:
278 return LONG_BINGET + pack("<i", i)
280 return GET + repr(i).encode("ascii") + b'\n'
282 def save(self, obj, save_persistent_id=True):
283 # Check for persistent id (defined by a subclass)
284 pid = self.persistent_id(obj)
285 if pid is not None and save_persistent_id:
286 self.save_pers(pid)
287 return
289 # Check the memo
290 x = self.memo.get(id(obj))
291 if x:
292 self.write(self.get(x[0]))
293 return
295 # Check the type dispatch table
296 t = type(obj)
297 f = self.dispatch.get(t)
298 if f:
299 f(self, obj) # Call unbound method with explicit self
300 return
302 # Check for a class with a custom metaclass; treat as regular class
303 try:
304 issc = issubclass(t, type)
305 except TypeError: # t is not a class (old Boost; see SF #502085)
306 issc = 0
307 if issc:
308 self.save_global(obj)
309 return
311 # Check copyreg.dispatch_table
312 reduce = dispatch_table.get(t)
313 if reduce:
314 rv = reduce(obj)
315 else:
316 # Check for a __reduce_ex__ method, fall back to __reduce__
317 reduce = getattr(obj, "__reduce_ex__", None)
318 if reduce:
319 rv = reduce(self.proto)
320 else:
321 reduce = getattr(obj, "__reduce__", None)
322 if reduce:
323 rv = reduce()
324 else:
325 raise PicklingError("Can't pickle %r object: %r" %
326 (t.__name__, obj))
328 # Check for string returned by reduce(), meaning "save as global"
329 if isinstance(rv, str):
330 self.save_global(obj, rv)
331 return
333 # Assert that reduce() returned a tuple
334 if not isinstance(rv, tuple):
335 raise PicklingError("%s must return string or tuple" % reduce)
337 # Assert that it returned an appropriately sized tuple
338 l = len(rv)
339 if not (2 <= l <= 5):
340 raise PicklingError("Tuple returned by %s must have "
341 "two to five elements" % reduce)
343 # Save the reduce() output and finally memoize the object
344 self.save_reduce(obj=obj, *rv)
346 def persistent_id(self, obj):
347 # This exists so a subclass can override it
348 return None
350 def save_pers(self, pid):
351 # Save a persistent id reference
352 if self.bin:
353 self.save(pid, save_persistent_id=False)
354 self.write(BINPERSID)
355 else:
356 self.write(PERSID + str(pid).encode("ascii") + b'\n')
358 def save_reduce(self, func, args, state=None,
359 listitems=None, dictitems=None, obj=None):
360 # This API is called by some subclasses
362 # Assert that args is a tuple
363 if not isinstance(args, tuple):
364 raise PicklingError("args from save_reduce() should be a tuple")
366 # Assert that func is callable
367 if not hasattr(func, '__call__'):
368 raise PicklingError("func from save_reduce() should be callable")
370 save = self.save
371 write = self.write
373 # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
374 if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
375 # A __reduce__ implementation can direct protocol 2 to
376 # use the more efficient NEWOBJ opcode, while still
377 # allowing protocol 0 and 1 to work normally. For this to
378 # work, the function returned by __reduce__ should be
379 # called __newobj__, and its first argument should be a
380 # new-style class. The implementation for __newobj__
381 # should be as follows, although pickle has no way to
382 # verify this:
384 # def __newobj__(cls, *args):
385 # return cls.__new__(cls, *args)
387 # Protocols 0 and 1 will pickle a reference to __newobj__,
388 # while protocol 2 (and above) will pickle a reference to
389 # cls, the remaining args tuple, and the NEWOBJ code,
390 # which calls cls.__new__(cls, *args) at unpickling time
391 # (see load_newobj below). If __reduce__ returns a
392 # three-tuple, the state from the third tuple item will be
393 # pickled regardless of the protocol, calling __setstate__
394 # at unpickling time (see load_build below).
396 # Note that no standard __newobj__ implementation exists;
397 # you have to provide your own. This is to enforce
398 # compatibility with Python 2.2 (pickles written using
399 # protocol 0 or 1 in Python 2.3 should be unpicklable by
400 # Python 2.2).
401 cls = args[0]
402 if not hasattr(cls, "__new__"):
403 raise PicklingError(
404 "args[0] from __newobj__ args has no __new__")
405 if obj is not None and cls is not obj.__class__:
406 raise PicklingError(
407 "args[0] from __newobj__ args has the wrong class")
408 args = args[1:]
409 save(cls)
410 save(args)
411 write(NEWOBJ)
412 else:
413 save(func)
414 save(args)
415 write(REDUCE)
417 if obj is not None:
418 self.memoize(obj)
420 # More new special cases (that work with older protocols as
421 # well): when __reduce__ returns a tuple with 4 or 5 items,
422 # the 4th and 5th item should be iterators that provide list
423 # items and dict items (as (key, value) tuples), or None.
425 if listitems is not None:
426 self._batch_appends(listitems)
428 if dictitems is not None:
429 self._batch_setitems(dictitems)
431 if state is not None:
432 save(state)
433 write(BUILD)
435 # Methods below this point are dispatched through the dispatch table
437 dispatch = {}
439 def save_none(self, obj):
440 self.write(NONE)
441 dispatch[type(None)] = save_none
443 def save_bool(self, obj):
444 if self.proto >= 2:
445 self.write(obj and NEWTRUE or NEWFALSE)
446 else:
447 self.write(obj and TRUE or FALSE)
448 dispatch[bool] = save_bool
450 def save_long(self, obj, pack=struct.pack):
451 if self.bin:
452 # If the int is small enough to fit in a signed 4-byte 2's-comp
453 # format, we can store it more efficiently than the general
454 # case.
455 # First one- and two-byte unsigned ints:
456 if obj >= 0:
457 if obj <= 0xff:
458 self.write(BININT1 + bytes([obj]))
459 return
460 if obj <= 0xffff:
461 self.write(BININT2 + bytes([obj&0xff, obj>>8]))
462 return
463 # Next check for 4-byte signed ints:
464 high_bits = obj >> 31 # note that Python shift sign-extends
465 if high_bits == 0 or high_bits == -1:
466 # All high bits are copies of bit 2**31, so the value
467 # fits in a 4-byte signed int.
468 self.write(BININT + pack("<i", obj))
469 return
470 if self.proto >= 2:
471 encoded = encode_long(obj)
472 n = len(encoded)
473 if n < 256:
474 self.write(LONG1 + bytes([n]) + encoded)
475 else:
476 self.write(LONG4 + pack("<i", n) + encoded)
477 return
478 self.write(LONG + repr(obj).encode("ascii") + b'L\n')
479 dispatch[int] = save_long
481 def save_float(self, obj, pack=struct.pack):
482 if self.bin:
483 self.write(BINFLOAT + pack('>d', obj))
484 else:
485 self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
486 dispatch[float] = save_float
488 def save_bytes(self, obj, pack=struct.pack):
489 if self.proto < 3:
490 self.save_reduce(bytes, (list(obj),), obj=obj)
491 return
492 n = len(obj)
493 if n < 256:
494 self.write(SHORT_BINBYTES + bytes([n]) + bytes(obj))
495 else:
496 self.write(BINBYTES + pack("<i", n) + bytes(obj))
497 self.memoize(obj)
498 dispatch[bytes] = save_bytes
500 def save_str(self, obj, pack=struct.pack):
501 if self.bin:
502 encoded = obj.encode('utf-8', 'surrogatepass')
503 n = len(encoded)
504 self.write(BINUNICODE + pack("<i", n) + encoded)
505 else:
506 obj = obj.replace("\\", "\\u005c")
507 obj = obj.replace("\n", "\\u000a")
508 self.write(UNICODE + bytes(obj.encode('raw-unicode-escape')) +
509 b'\n')
510 self.memoize(obj)
511 dispatch[str] = save_str
513 def save_tuple(self, obj):
514 write = self.write
515 proto = self.proto
517 n = len(obj)
518 if n == 0:
519 if proto:
520 write(EMPTY_TUPLE)
521 else:
522 write(MARK + TUPLE)
523 return
525 save = self.save
526 memo = self.memo
527 if n <= 3 and proto >= 2:
528 for element in obj:
529 save(element)
530 # Subtle. Same as in the big comment below.
531 if id(obj) in memo:
532 get = self.get(memo[id(obj)][0])
533 write(POP * n + get)
534 else:
535 write(_tuplesize2code[n])
536 self.memoize(obj)
537 return
539 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
540 # has more than 3 elements.
541 write(MARK)
542 for element in obj:
543 save(element)
545 if id(obj) in memo:
546 # Subtle. d was not in memo when we entered save_tuple(), so
547 # the process of saving the tuple's elements must have saved
548 # the tuple itself: the tuple is recursive. The proper action
549 # now is to throw away everything we put on the stack, and
550 # simply GET the tuple (it's already constructed). This check
551 # could have been done in the "for element" loop instead, but
552 # recursive tuples are a rare thing.
553 get = self.get(memo[id(obj)][0])
554 if proto:
555 write(POP_MARK + get)
556 else: # proto 0 -- POP_MARK not available
557 write(POP * (n+1) + get)
558 return
560 # No recursion.
561 self.write(TUPLE)
562 self.memoize(obj)
564 dispatch[tuple] = save_tuple
566 def save_list(self, obj):
567 write = self.write
569 if self.bin:
570 write(EMPTY_LIST)
571 else: # proto 0 -- can't use EMPTY_LIST
572 write(MARK + LIST)
574 self.memoize(obj)
575 self._batch_appends(obj)
577 dispatch[list] = save_list
579 _BATCHSIZE = 1000
581 def _batch_appends(self, items):
582 # Helper to batch up APPENDS sequences
583 save = self.save
584 write = self.write
586 if not self.bin:
587 for x in items:
588 save(x)
589 write(APPEND)
590 return
592 items = iter(items)
593 r = range(self._BATCHSIZE)
594 while items is not None:
595 tmp = []
596 for i in r:
597 try:
598 x = next(items)
599 tmp.append(x)
600 except StopIteration:
601 items = None
602 break
603 n = len(tmp)
604 if n > 1:
605 write(MARK)
606 for x in tmp:
607 save(x)
608 write(APPENDS)
609 elif n:
610 save(tmp[0])
611 write(APPEND)
612 # else tmp is empty, and we're done
614 def save_dict(self, obj):
615 write = self.write
617 if self.bin:
618 write(EMPTY_DICT)
619 else: # proto 0 -- can't use EMPTY_DICT
620 write(MARK + DICT)
622 self.memoize(obj)
623 self._batch_setitems(obj.items())
625 dispatch[dict] = save_dict
626 if PyStringMap is not None:
627 dispatch[PyStringMap] = save_dict
629 def _batch_setitems(self, items):
630 # Helper to batch up SETITEMS sequences; proto >= 1 only
631 save = self.save
632 write = self.write
634 if not self.bin:
635 for k, v in items:
636 save(k)
637 save(v)
638 write(SETITEM)
639 return
641 items = iter(items)
642 r = range(self._BATCHSIZE)
643 while items is not None:
644 tmp = []
645 for i in r:
646 try:
647 tmp.append(next(items))
648 except StopIteration:
649 items = None
650 break
651 n = len(tmp)
652 if n > 1:
653 write(MARK)
654 for k, v in tmp:
655 save(k)
656 save(v)
657 write(SETITEMS)
658 elif n:
659 k, v = tmp[0]
660 save(k)
661 save(v)
662 write(SETITEM)
663 # else tmp is empty, and we're done
665 def save_global(self, obj, name=None, pack=struct.pack):
666 write = self.write
667 memo = self.memo
669 if name is None:
670 name = obj.__name__
672 module = getattr(obj, "__module__", None)
673 if module is None:
674 module = whichmodule(obj, name)
676 try:
677 __import__(module, level=0)
678 mod = sys.modules[module]
679 klass = getattr(mod, name)
680 except (ImportError, KeyError, AttributeError):
681 raise PicklingError(
682 "Can't pickle %r: it's not found as %s.%s" %
683 (obj, module, name))
684 else:
685 if klass is not obj:
686 raise PicklingError(
687 "Can't pickle %r: it's not the same object as %s.%s" %
688 (obj, module, name))
690 if self.proto >= 2:
691 code = _extension_registry.get((module, name))
692 if code:
693 assert code > 0
694 if code <= 0xff:
695 write(EXT1 + bytes([code]))
696 elif code <= 0xffff:
697 write(EXT2 + bytes([code&0xff, code>>8]))
698 else:
699 write(EXT4 + pack("<i", code))
700 return
701 # Non-ASCII identifiers are supported only with protocols >= 3.
702 if self.proto >= 3:
703 write(GLOBAL + bytes(module, "utf-8") + b'\n' +
704 bytes(name, "utf-8") + b'\n')
705 else:
706 if self.fix_imports:
707 if (module, name) in _compat_pickle.REVERSE_NAME_MAPPING:
708 module, name = _compat_pickle.REVERSE_NAME_MAPPING[(module, name)]
709 if module in _compat_pickle.REVERSE_IMPORT_MAPPING:
710 module = _compat_pickle.REVERSE_IMPORT_MAPPING[module]
711 try:
712 write(GLOBAL + bytes(module, "ascii") + b'\n' +
713 bytes(name, "ascii") + b'\n')
714 except UnicodeEncodeError:
715 raise PicklingError(
716 "can't pickle global identifier '%s.%s' using "
717 "pickle protocol %i" % (module, name, self.proto))
719 self.memoize(obj)
721 dispatch[FunctionType] = save_global
722 dispatch[BuiltinFunctionType] = save_global
723 dispatch[type] = save_global
725 # Pickling helpers
727 def _keep_alive(x, memo):
728 """Keeps a reference to the object x in the memo.
730 Because we remember objects by their id, we have
731 to assure that possibly temporary objects are kept
732 alive by referencing them.
733 We store a reference at the id of the memo, which should
734 normally not be used unless someone tries to deepcopy
735 the memo itself...
737 try:
738 memo[id(memo)].append(x)
739 except KeyError:
740 # aha, this is the first one :-)
741 memo[id(memo)]=[x]
744 # A cache for whichmodule(), mapping a function object to the name of
745 # the module in which the function was found.
747 classmap = {} # called classmap for backwards compatibility
749 def whichmodule(func, funcname):
750 """Figure out the module in which a function occurs.
752 Search sys.modules for the module.
753 Cache in classmap.
754 Return a module name.
755 If the function cannot be found, return "__main__".
757 # Python functions should always get an __module__ from their globals.
758 mod = getattr(func, "__module__", None)
759 if mod is not None:
760 return mod
761 if func in classmap:
762 return classmap[func]
764 for name, module in list(sys.modules.items()):
765 if module is None:
766 continue # skip dummy package entries
767 if name != '__main__' and getattr(module, funcname, None) is func:
768 break
769 else:
770 name = '__main__'
771 classmap[func] = name
772 return name
775 # Unpickling machinery
777 class _Unpickler:
779 def __init__(self, file, *, fix_imports=True,
780 encoding="ASCII", errors="strict"):
781 """This takes a binary file for reading a pickle data stream.
783 The protocol version of the pickle is detected automatically, so no
784 proto argument is needed.
786 The file-like object must have two methods, a read() method
787 that takes an integer argument, and a readline() method that
788 requires no arguments. Both methods should return bytes.
789 Thus file-like object can be a binary file object opened for
790 reading, a BytesIO object, or any other custom object that
791 meets this interface.
793 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
794 which are used to control compatiblity support for pickle stream
795 generated by Python 2.x. If *fix_imports* is True, pickle will try to
796 map the old Python 2.x names to the new names used in Python 3.x. The
797 *encoding* and *errors* tell pickle how to decode 8-bit string
798 instances pickled by Python 2.x; these default to 'ASCII' and
799 'strict', respectively.
801 self.readline = file.readline
802 self.read = file.read
803 self.memo = {}
804 self.encoding = encoding
805 self.errors = errors
806 self.proto = 0
807 self.fix_imports = fix_imports
809 def load(self):
810 """Read a pickled object representation from the open file.
812 Return the reconstituted object hierarchy specified in the file.
814 # Check whether Unpickler was initialized correctly. This is
815 # only needed to mimic the behavior of _pickle.Unpickler.dump().
816 if not hasattr(self, "read"):
817 raise UnpicklingError("Unpickler.__init__() was not called by "
818 "%s.__init__()" % (self.__class__.__name__,))
819 self.mark = object() # any new unique object
820 self.stack = []
821 self.append = self.stack.append
822 read = self.read
823 dispatch = self.dispatch
824 try:
825 while 1:
826 key = read(1)
827 if not key:
828 raise EOFError
829 assert isinstance(key, bytes_types)
830 dispatch[key[0]](self)
831 except _Stop as stopinst:
832 return stopinst.value
834 # Return largest index k such that self.stack[k] is self.mark.
835 # If the stack doesn't contain a mark, eventually raises IndexError.
836 # This could be sped by maintaining another stack, of indices at which
837 # the mark appears. For that matter, the latter stack would suffice,
838 # and we wouldn't need to push mark objects on self.stack at all.
839 # Doing so is probably a good thing, though, since if the pickle is
840 # corrupt (or hostile) we may get a clue from finding self.mark embedded
841 # in unpickled objects.
842 def marker(self):
843 stack = self.stack
844 mark = self.mark
845 k = len(stack)-1
846 while stack[k] is not mark: k = k-1
847 return k
849 def persistent_load(self, pid):
850 raise UnpicklingError("unsupported persistent id encountered")
852 dispatch = {}
854 def load_proto(self):
855 proto = ord(self.read(1))
856 if not 0 <= proto <= HIGHEST_PROTOCOL:
857 raise ValueError("unsupported pickle protocol: %d" % proto)
858 self.proto = proto
859 dispatch[PROTO[0]] = load_proto
861 def load_persid(self):
862 pid = self.readline()[:-1].decode("ascii")
863 self.append(self.persistent_load(pid))
864 dispatch[PERSID[0]] = load_persid
866 def load_binpersid(self):
867 pid = self.stack.pop()
868 self.append(self.persistent_load(pid))
869 dispatch[BINPERSID[0]] = load_binpersid
871 def load_none(self):
872 self.append(None)
873 dispatch[NONE[0]] = load_none
875 def load_false(self):
876 self.append(False)
877 dispatch[NEWFALSE[0]] = load_false
879 def load_true(self):
880 self.append(True)
881 dispatch[NEWTRUE[0]] = load_true
883 def load_int(self):
884 data = self.readline()
885 if data == FALSE[1:]:
886 val = False
887 elif data == TRUE[1:]:
888 val = True
889 else:
890 try:
891 val = int(data, 0)
892 except ValueError:
893 val = int(data, 0)
894 self.append(val)
895 dispatch[INT[0]] = load_int
897 def load_binint(self):
898 self.append(mloads(b'i' + self.read(4)))
899 dispatch[BININT[0]] = load_binint
901 def load_binint1(self):
902 self.append(ord(self.read(1)))
903 dispatch[BININT1[0]] = load_binint1
905 def load_binint2(self):
906 self.append(mloads(b'i' + self.read(2) + b'\000\000'))
907 dispatch[BININT2[0]] = load_binint2
909 def load_long(self):
910 val = self.readline()[:-1].decode("ascii")
911 if val and val[-1] == 'L':
912 val = val[:-1]
913 self.append(int(val, 0))
914 dispatch[LONG[0]] = load_long
916 def load_long1(self):
917 n = ord(self.read(1))
918 data = self.read(n)
919 self.append(decode_long(data))
920 dispatch[LONG1[0]] = load_long1
922 def load_long4(self):
923 n = mloads(b'i' + self.read(4))
924 data = self.read(n)
925 self.append(decode_long(data))
926 dispatch[LONG4[0]] = load_long4
928 def load_float(self):
929 self.append(float(self.readline()[:-1]))
930 dispatch[FLOAT[0]] = load_float
932 def load_binfloat(self, unpack=struct.unpack):
933 self.append(unpack('>d', self.read(8))[0])
934 dispatch[BINFLOAT[0]] = load_binfloat
936 def load_string(self):
937 orig = self.readline()
938 rep = orig[:-1]
939 for q in (b'"', b"'"): # double or single quote
940 if rep.startswith(q):
941 if not rep.endswith(q):
942 raise ValueError("insecure string pickle")
943 rep = rep[len(q):-len(q)]
944 break
945 else:
946 raise ValueError("insecure string pickle: %r" % orig)
947 self.append(codecs.escape_decode(rep)[0]
948 .decode(self.encoding, self.errors))
949 dispatch[STRING[0]] = load_string
951 def load_binstring(self):
952 len = mloads(b'i' + self.read(4))
953 data = self.read(len)
954 value = str(data, self.encoding, self.errors)
955 self.append(value)
956 dispatch[BINSTRING[0]] = load_binstring
958 def load_binbytes(self):
959 len = mloads(b'i' + self.read(4))
960 self.append(self.read(len))
961 dispatch[BINBYTES[0]] = load_binbytes
963 def load_unicode(self):
964 self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
965 dispatch[UNICODE[0]] = load_unicode
967 def load_binunicode(self):
968 len = mloads(b'i' + self.read(4))
969 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
970 dispatch[BINUNICODE[0]] = load_binunicode
972 def load_short_binstring(self):
973 len = ord(self.read(1))
974 data = bytes(self.read(len))
975 value = str(data, self.encoding, self.errors)
976 self.append(value)
977 dispatch[SHORT_BINSTRING[0]] = load_short_binstring
979 def load_short_binbytes(self):
980 len = ord(self.read(1))
981 self.append(bytes(self.read(len)))
982 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
984 def load_tuple(self):
985 k = self.marker()
986 self.stack[k:] = [tuple(self.stack[k+1:])]
987 dispatch[TUPLE[0]] = load_tuple
989 def load_empty_tuple(self):
990 self.append(())
991 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
993 def load_tuple1(self):
994 self.stack[-1] = (self.stack[-1],)
995 dispatch[TUPLE1[0]] = load_tuple1
997 def load_tuple2(self):
998 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
999 dispatch[TUPLE2[0]] = load_tuple2
1001 def load_tuple3(self):
1002 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1003 dispatch[TUPLE3[0]] = load_tuple3
1005 def load_empty_list(self):
1006 self.append([])
1007 dispatch[EMPTY_LIST[0]] = load_empty_list
1009 def load_empty_dictionary(self):
1010 self.append({})
1011 dispatch[EMPTY_DICT[0]] = load_empty_dictionary
1013 def load_list(self):
1014 k = self.marker()
1015 self.stack[k:] = [self.stack[k+1:]]
1016 dispatch[LIST[0]] = load_list
1018 def load_dict(self):
1019 k = self.marker()
1020 d = {}
1021 items = self.stack[k+1:]
1022 for i in range(0, len(items), 2):
1023 key = items[i]
1024 value = items[i+1]
1025 d[key] = value
1026 self.stack[k:] = [d]
1027 dispatch[DICT[0]] = load_dict
1029 # INST and OBJ differ only in how they get a class object. It's not
1030 # only sensible to do the rest in a common routine, the two routines
1031 # previously diverged and grew different bugs.
1032 # klass is the class to instantiate, and k points to the topmost mark
1033 # object, following which are the arguments for klass.__init__.
1034 def _instantiate(self, klass, k):
1035 args = tuple(self.stack[k+1:])
1036 del self.stack[k:]
1037 if (args or not isinstance(klass, type) or
1038 hasattr(klass, "__getinitargs__")):
1039 try:
1040 value = klass(*args)
1041 except TypeError as err:
1042 raise TypeError("in constructor for %s: %s" %
1043 (klass.__name__, str(err)), sys.exc_info()[2])
1044 else:
1045 value = klass.__new__(klass)
1046 self.append(value)
1048 def load_inst(self):
1049 module = self.readline()[:-1].decode("ascii")
1050 name = self.readline()[:-1].decode("ascii")
1051 klass = self.find_class(module, name)
1052 self._instantiate(klass, self.marker())
1053 dispatch[INST[0]] = load_inst
1055 def load_obj(self):
1056 # Stack is ... markobject classobject arg1 arg2 ...
1057 k = self.marker()
1058 klass = self.stack.pop(k+1)
1059 self._instantiate(klass, k)
1060 dispatch[OBJ[0]] = load_obj
1062 def load_newobj(self):
1063 args = self.stack.pop()
1064 cls = self.stack[-1]
1065 obj = cls.__new__(cls, *args)
1066 self.stack[-1] = obj
1067 dispatch[NEWOBJ[0]] = load_newobj
1069 def load_global(self):
1070 module = self.readline()[:-1].decode("utf-8")
1071 name = self.readline()[:-1].decode("utf-8")
1072 klass = self.find_class(module, name)
1073 self.append(klass)
1074 dispatch[GLOBAL[0]] = load_global
1076 def load_ext1(self):
1077 code = ord(self.read(1))
1078 self.get_extension(code)
1079 dispatch[EXT1[0]] = load_ext1
1081 def load_ext2(self):
1082 code = mloads(b'i' + self.read(2) + b'\000\000')
1083 self.get_extension(code)
1084 dispatch[EXT2[0]] = load_ext2
1086 def load_ext4(self):
1087 code = mloads(b'i' + self.read(4))
1088 self.get_extension(code)
1089 dispatch[EXT4[0]] = load_ext4
1091 def get_extension(self, code):
1092 nil = []
1093 obj = _extension_cache.get(code, nil)
1094 if obj is not nil:
1095 self.append(obj)
1096 return
1097 key = _inverted_registry.get(code)
1098 if not key:
1099 raise ValueError("unregistered extension code %d" % code)
1100 obj = self.find_class(*key)
1101 _extension_cache[code] = obj
1102 self.append(obj)
1104 def find_class(self, module, name):
1105 # Subclasses may override this.
1106 if self.proto < 3 and self.fix_imports:
1107 if (module, name) in _compat_pickle.NAME_MAPPING:
1108 module, name = _compat_pickle.NAME_MAPPING[(module, name)]
1109 if module in _compat_pickle.IMPORT_MAPPING:
1110 module = _compat_pickle.IMPORT_MAPPING[module]
1111 __import__(module, level=0)
1112 mod = sys.modules[module]
1113 klass = getattr(mod, name)
1114 return klass
1116 def load_reduce(self):
1117 stack = self.stack
1118 args = stack.pop()
1119 func = stack[-1]
1120 try:
1121 value = func(*args)
1122 except:
1123 print(sys.exc_info())
1124 print(func, args)
1125 raise
1126 stack[-1] = value
1127 dispatch[REDUCE[0]] = load_reduce
1129 def load_pop(self):
1130 del self.stack[-1]
1131 dispatch[POP[0]] = load_pop
1133 def load_pop_mark(self):
1134 k = self.marker()
1135 del self.stack[k:]
1136 dispatch[POP_MARK[0]] = load_pop_mark
1138 def load_dup(self):
1139 self.append(self.stack[-1])
1140 dispatch[DUP[0]] = load_dup
1142 def load_get(self):
1143 i = int(self.readline()[:-1])
1144 self.append(self.memo[i])
1145 dispatch[GET[0]] = load_get
1147 def load_binget(self):
1148 i = self.read(1)[0]
1149 self.append(self.memo[i])
1150 dispatch[BINGET[0]] = load_binget
1152 def load_long_binget(self):
1153 i = mloads(b'i' + self.read(4))
1154 self.append(self.memo[i])
1155 dispatch[LONG_BINGET[0]] = load_long_binget
1157 def load_put(self):
1158 i = int(self.readline()[:-1])
1159 self.memo[i] = self.stack[-1]
1160 dispatch[PUT[0]] = load_put
1162 def load_binput(self):
1163 i = self.read(1)[0]
1164 self.memo[i] = self.stack[-1]
1165 dispatch[BINPUT[0]] = load_binput
1167 def load_long_binput(self):
1168 i = mloads(b'i' + self.read(4))
1169 self.memo[i] = self.stack[-1]
1170 dispatch[LONG_BINPUT[0]] = load_long_binput
1172 def load_append(self):
1173 stack = self.stack
1174 value = stack.pop()
1175 list = stack[-1]
1176 list.append(value)
1177 dispatch[APPEND[0]] = load_append
1179 def load_appends(self):
1180 stack = self.stack
1181 mark = self.marker()
1182 list = stack[mark - 1]
1183 list.extend(stack[mark + 1:])
1184 del stack[mark:]
1185 dispatch[APPENDS[0]] = load_appends
1187 def load_setitem(self):
1188 stack = self.stack
1189 value = stack.pop()
1190 key = stack.pop()
1191 dict = stack[-1]
1192 dict[key] = value
1193 dispatch[SETITEM[0]] = load_setitem
1195 def load_setitems(self):
1196 stack = self.stack
1197 mark = self.marker()
1198 dict = stack[mark - 1]
1199 for i in range(mark + 1, len(stack), 2):
1200 dict[stack[i]] = stack[i + 1]
1202 del stack[mark:]
1203 dispatch[SETITEMS[0]] = load_setitems
1205 def load_build(self):
1206 stack = self.stack
1207 state = stack.pop()
1208 inst = stack[-1]
1209 setstate = getattr(inst, "__setstate__", None)
1210 if setstate:
1211 setstate(state)
1212 return
1213 slotstate = None
1214 if isinstance(state, tuple) and len(state) == 2:
1215 state, slotstate = state
1216 if state:
1217 inst_dict = inst.__dict__
1218 intern = sys.intern
1219 for k, v in state.items():
1220 if type(k) is str:
1221 inst_dict[intern(k)] = v
1222 else:
1223 inst_dict[k] = v
1224 if slotstate:
1225 for k, v in slotstate.items():
1226 setattr(inst, k, v)
1227 dispatch[BUILD[0]] = load_build
1229 def load_mark(self):
1230 self.append(self.mark)
1231 dispatch[MARK[0]] = load_mark
1233 def load_stop(self):
1234 value = self.stack.pop()
1235 raise _Stop(value)
1236 dispatch[STOP[0]] = load_stop
1238 # Encode/decode longs in linear time.
1240 import binascii as _binascii
1242 def encode_long(x):
1243 r"""Encode a long to a two's complement little-endian binary string.
1244 Note that 0 is a special case, returning an empty string, to save a
1245 byte in the LONG1 pickling context.
1247 >>> encode_long(0)
1249 >>> encode_long(255)
1250 b'\xff\x00'
1251 >>> encode_long(32767)
1252 b'\xff\x7f'
1253 >>> encode_long(-256)
1254 b'\x00\xff'
1255 >>> encode_long(-32768)
1256 b'\x00\x80'
1257 >>> encode_long(-128)
1258 b'\x80'
1259 >>> encode_long(127)
1260 b'\x7f'
1264 if x == 0:
1265 return b''
1266 if x > 0:
1267 ashex = hex(x)
1268 assert ashex.startswith("0x")
1269 njunkchars = 2 + ashex.endswith('L')
1270 nibbles = len(ashex) - njunkchars
1271 if nibbles & 1:
1272 # need an even # of nibbles for unhexlify
1273 ashex = "0x0" + ashex[2:]
1274 elif int(ashex[2], 16) >= 8:
1275 # "looks negative", so need a byte of sign bits
1276 ashex = "0x00" + ashex[2:]
1277 else:
1278 # Build the 256's-complement: (1L << nbytes) + x. The trick is
1279 # to find the number of bytes in linear time (although that should
1280 # really be a constant-time task).
1281 ashex = hex(-x)
1282 assert ashex.startswith("0x")
1283 njunkchars = 2 + ashex.endswith('L')
1284 nibbles = len(ashex) - njunkchars
1285 if nibbles & 1:
1286 # Extend to a full byte.
1287 nibbles += 1
1288 nbits = nibbles * 4
1289 x += 1 << nbits
1290 assert x > 0
1291 ashex = hex(x)
1292 njunkchars = 2 + ashex.endswith('L')
1293 newnibbles = len(ashex) - njunkchars
1294 if newnibbles < nibbles:
1295 ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1296 if int(ashex[2], 16) < 8:
1297 # "looks positive", so need a byte of sign bits
1298 ashex = "0xff" + ashex[2:]
1300 if ashex.endswith('L'):
1301 ashex = ashex[2:-1]
1302 else:
1303 ashex = ashex[2:]
1304 assert len(ashex) & 1 == 0, (x, ashex)
1305 binary = _binascii.unhexlify(ashex)
1306 return bytes(binary[::-1])
1308 def decode_long(data):
1309 r"""Decode a long from a two's complement little-endian binary string.
1311 >>> decode_long(b'')
1313 >>> decode_long(b"\xff\x00")
1315 >>> decode_long(b"\xff\x7f")
1316 32767
1317 >>> decode_long(b"\x00\xff")
1318 -256
1319 >>> decode_long(b"\x00\x80")
1320 -32768
1321 >>> decode_long(b"\x80")
1322 -128
1323 >>> decode_long(b"\x7f")
1327 nbytes = len(data)
1328 if nbytes == 0:
1329 return 0
1330 ashex = _binascii.hexlify(data[::-1])
1331 n = int(ashex, 16) # quadratic time before Python 2.3; linear now
1332 if data[-1] >= 0x80:
1333 n -= 1 << (nbytes * 8)
1334 return n
1336 # Use the faster _pickle if possible
1337 try:
1338 from _pickle import *
1339 except ImportError:
1340 Pickler, Unpickler = _Pickler, _Unpickler
1342 # Shorthands
1344 def dump(obj, file, protocol=None, *, fix_imports=True):
1345 Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
1347 def dumps(obj, protocol=None, *, fix_imports=True):
1348 f = io.BytesIO()
1349 Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
1350 res = f.getvalue()
1351 assert isinstance(res, bytes_types)
1352 return res
1354 def load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
1355 return Unpickler(file, fix_imports=fix_imports,
1356 encoding=encoding, errors=errors).load()
1358 def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
1359 if isinstance(s, str):
1360 raise TypeError("Can't load pickle from unicode string")
1361 file = io.BytesIO(s)
1362 return Unpickler(file, fix_imports=fix_imports,
1363 encoding=encoding, errors=errors).load()
1365 # Doctest
1366 def _test():
1367 import doctest
1368 return doctest.testmod()
1370 if __name__ == "__main__":
1371 _test()