# # Reimplementation of cPickle, mostly as a copy of pickle.py # from pickle import Pickler, dump, dumps, PickleError, PicklingError, UnpicklingError, _EmptyClass from pickle import __doc__, __version__, format_version, compatible_formats from types import * from copy_reg import dispatch_table from copy_reg import _extension_registry, _inverted_registry, _extension_cache import marshal, struct, sys try: from __pypy__ import builtinify except ImportError: builtinify = lambda f: f # These are purely informational; no code uses these. format_version = "2.0" # File format version we write compatible_formats = ["1.0", # Original protocol 0 "1.1", # Protocol 0 with INST added "1.2", # Original protocol 1 "1.3", # Protocol 1 with BINFLOAT added "2.0", # Protocol 2 ] # Old format versions we can read # Keep in synch with cPickle. This is the highest protocol number we # know how to read. HIGHEST_PROTOCOL = 2 BadPickleGet = KeyError UnpickleableError = PicklingError MARK = ord('(') # push special markobject on stack STOP = ord('.') # every pickle ends with STOP POP = ord('0') # discard topmost stack item POP_MARK = ord('1') # discard stack top through topmost markobject DUP = ord('2') # duplicate top stack item FLOAT = ord('F') # push float object; decimal string argument INT = ord('I') # push integer or bool; decimal string argument BININT = ord('J') # push four-byte signed int BININT1 = ord('K') # push 1-byte unsigned int LONG = ord('L') # push long; decimal string argument BININT2 = ord('M') # push 2-byte unsigned int NONE = ord('N') # push None PERSID = ord('P') # push persistent object; id is taken from string arg BINPERSID = ord('Q') # " " " ; " " " " stack REDUCE = ord('R') # apply callable to argtuple, both on stack STRING = ord('S') # push string; NL-terminated string argument BINSTRING = ord('T') # push string; counted binary string argument SHORT_BINSTRING = ord('U') # " " ; " " " " < 256 bytes UNICODE = ord('V') # push Unicode string; raw-unicode-escaped'd argument BINUNICODE = ord('X') # " " " ; counted UTF-8 string argument APPEND = ord('a') # append stack top to list below it BUILD = ord('b') # call __setstate__ or __dict__.update() GLOBAL = ord('c') # push self.find_class(modname, name); 2 string args DICT = ord('d') # build a dict from stack items EMPTY_DICT = ord('}') # push empty dict APPENDS = ord('e') # extend list on stack by topmost stack slice GET = ord('g') # push item from memo on stack; index is string arg BINGET = ord('h') # " " " " " " ; " " 1-byte arg INST = ord('i') # build & push class instance LONG_BINGET = ord('j') # push item from memo on stack; index is 4-byte arg LIST = ord('l') # build list from topmost stack items EMPTY_LIST = ord(']') # push empty list OBJ = ord('o') # build & push class instance PUT = ord('p') # store stack top in memo; index is string arg BINPUT = ord('q') # " " " " " ; " " 1-byte arg LONG_BINPUT = ord('r') # " " " " " ; " " 4-byte arg SETITEM = ord('s') # add key+value pair to dict TUPLE = ord('t') # build tuple from topmost stack items EMPTY_TUPLE = ord(')') # push empty tuple SETITEMS = ord('u') # modify dict by adding topmost key+value pairs BINFLOAT = ord('G') # push float; arg is 8-byte float encoding TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py # Protocol 2 PROTO = ord('\x80') # identify pickle protocol NEWOBJ = ord('\x81') # build object by applying cls.__new__ to argtuple EXT1 = ord('\x82') # push object from extension registry; 1-byte index EXT2 = ord('\x83') # ditto, but 2-byte index EXT4 = ord('\x84') # ditto, but 4-byte index TUPLE1 = ord('\x85') # build 1-tuple from stack top TUPLE2 = ord('\x86') # build 2-tuple from two topmost stack items TUPLE3 = ord('\x87') # build 3-tuple from three topmost stack items NEWTRUE = ord('\x88') # push True NEWFALSE = ord('\x89') # push False LONG1 = ord('\x8a') # push long from < 256 bytes LONG4 = ord('\x8b') # push really big long _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] # ____________________________________________________________ # XXX some temporary dark magic to produce pickled dumps that are # closer to the ones produced by cPickle in CPython from pickle import StringIO PythonPickler = Pickler class Pickler(PythonPickler): def __init__(self, *args, **kw): self.__f = None if len(args) == 1 and isinstance(args[0], int): self.__f = StringIO() PythonPickler.__init__(self, self.__f, args[0], **kw) else: PythonPickler.__init__(self, *args, **kw) def memoize(self, obj): self.memo[id(None)] = None # cPickle starts counting at one return PythonPickler.memoize(self, obj) def getvalue(self): return self.__f and self.__f.getvalue() @builtinify def dump(obj, file, protocol=None): if protocol > HIGHEST_PROTOCOL: # use cPickle error message, not pickle.py one raise ValueError("pickle protocol %d asked for; " "the highest available protocol is %d" % ( protocol, HIGHEST_PROTOCOL)) Pickler(file, protocol).dump(obj) @builtinify def dumps(obj, protocol=None): if protocol > HIGHEST_PROTOCOL: # use cPickle error message, not pickle.py one raise ValueError("pickle protocol %d asked for; " "the highest available protocol is %d" % ( protocol, HIGHEST_PROTOCOL)) file = StringIO() Pickler(file, protocol).dump(obj) return file.getvalue() # Why use struct.pack() for pickling but marshal.loads() for # unpickling? struct.pack() is 40% faster than marshal.dumps(), but # marshal.loads() is twice as fast as struct.unpack()! mloads = marshal.loads # Unpickling machinery class _Stack(list): def pop(self, index=-1): try: return list.pop(self, index) except IndexError: raise UnpicklingError("unpickling stack underflow") class Unpickler(object): def __init__(self, file): """This takes a file-like object for reading a pickle data stream. The protocol version of the pickle is detected automatically, so no proto argument is needed. The file-like object must have two methods, a read() method that takes an integer argument, and a readline() method that requires no arguments. Both methods should return a string. Thus file-like object can be a file object opened for reading, a StringIO object, or any other custom object that meets this interface. """ self.readline = file.readline self.read = file.read self.memo = {} def load(self): """Read a pickled object representation from the open file. Return the reconstituted object hierarchy specified in the file. """ self.mark = object() # any new unique object self.stack = _Stack() self.append = self.stack.append try: key = ord(self.read(1)) while key != STOP: try: meth = self.dispatch[key] except KeyError: raise UnpicklingError("invalid load key, %r." % chr(key)) meth(self) key = ord(self.read(1)) except TypeError: if self.read(1) == '': raise EOFError raise return self.stack.pop() # Return largest index k such that self.stack[k] is self.mark. # If the stack doesn't contain a mark, eventually raises IndexError. # This could be sped by maintaining another stack, of indices at which # the mark appears. For that matter, the latter stack would suffice, # and we wouldn't need to push mark objects on self.stack at all. # Doing so is probably a good thing, though, since if the pickle is # corrupt (or hostile) we may get a clue from finding self.mark embedded # in unpickled objects. def marker(self): k = len(self.stack)-1 while self.stack[k] is not self.mark: k -= 1 return k dispatch = {} def load_proto(self): proto = ord(self.read(1)) if not 0 <= proto <= 2: raise ValueError, "unsupported pickle protocol: %d" % proto dispatch[PROTO] = load_proto def load_persid(self): pid = self.readline()[:-1] self.append(self.persistent_load(pid)) dispatch[PERSID] = load_persid def load_binpersid(self): pid = self.stack.pop() self.append(self.persistent_load(pid)) dispatch[BINPERSID] = load_binpersid def load_none(self): self.append(None) dispatch[NONE] = load_none def load_false(self): self.append(False) dispatch[NEWFALSE] = load_false def load_true(self): self.append(True) dispatch[NEWTRUE] = load_true def load_int(self): data = self.readline() if data == FALSE[1:]: val = False elif data == TRUE[1:]: val = True else: val = int(data) self.append(val) dispatch[INT] = load_int def load_binint(self): self.append(mloads('i' + self.read(4))) dispatch[BININT] = load_binint def load_binint1(self): self.append(ord(self.read(1))) dispatch[BININT1] = load_binint1 def load_binint2(self): self.append(mloads('i' + self.read(2) + '\000\000')) dispatch[BININT2] = load_binint2 def load_long(self): self.append(long(self.readline()[:-1], 0)) dispatch[LONG] = load_long def load_long1(self): n = ord(self.read(1)) bytes = self.read(n) self.append(decode_long(bytes)) dispatch[LONG1] = load_long1 def load_long4(self): n = mloads('i' + self.read(4)) bytes = self.read(n) self.append(decode_long(bytes)) dispatch[LONG4] = load_long4 def load_float(self): self.append(float(self.readline()[:-1])) dispatch[FLOAT] = load_float def load_binfloat(self, unpack=struct.unpack): self.append(unpack('>d', self.read(8))[0]) dispatch[BINFLOAT] = load_binfloat def load_string(self): rep = self.readline() if len(rep) < 3: raise ValueError, "insecure string pickle" if rep[0] == "'" == rep[-2]: rep = rep[1:-2] elif rep[0] == '"' == rep[-2]: rep = rep[1:-2] else: raise ValueError, "insecure string pickle" self.append(rep.decode("string-escape")) dispatch[STRING] = load_string def load_binstring(self): L = mloads('i' + self.read(4)) self.append(self.read(L)) dispatch[BINSTRING] = load_binstring def load_unicode(self): self.append(unicode(self.readline()[:-1],'raw-unicode-escape')) dispatch[UNICODE] = load_unicode def load_binunicode(self): L = mloads('i' + self.read(4)) self.append(unicode(self.read(L),'utf-8')) dispatch[BINUNICODE] = load_binunicode def load_short_binstring(self): L = ord(self.read(1)) self.append(self.read(L)) dispatch[SHORT_BINSTRING] = load_short_binstring def load_tuple(self): k = self.marker() self.stack[k:] = [tuple(self.stack[k+1:])] dispatch[TUPLE] = load_tuple def load_empty_tuple(self): self.stack.append(()) dispatch[EMPTY_TUPLE] = load_empty_tuple def load_tuple1(self): self.stack[-1] = (self.stack[-1],) dispatch[TUPLE1] = load_tuple1 def load_tuple2(self): self.stack[-2:] = [(self.stack[-2], self.stack[-1])] dispatch[TUPLE2] = load_tuple2 def load_tuple3(self): self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] dispatch[TUPLE3] = load_tuple3 def load_empty_list(self): self.stack.append([]) dispatch[EMPTY_LIST] = load_empty_list def load_empty_dictionary(self): self.stack.append({}) dispatch[EMPTY_DICT] = load_empty_dictionary def load_list(self): k = self.marker() self.stack[k:] = [self.stack[k+1:]] dispatch[LIST] = load_list def load_dict(self): k = self.marker() d = {} items = self.stack[k+1:] for i in range(0, len(items), 2): key = items[i] value = items[i+1] d[key] = value self.stack[k:] = [d] dispatch[DICT] = load_dict # INST and OBJ differ only in how they get a class object. It's not # only sensible to do the rest in a common routine, the two routines # previously diverged and grew different bugs. # klass is the class to instantiate, and k points to the topmost mark # object, following which are the arguments for klass.__init__. def _instantiate(self, klass, k): args = tuple(self.stack[k+1:]) del self.stack[k:] instantiated = 0 if (not args and type(klass) is ClassType and not hasattr(klass, "__getinitargs__")): try: value = _EmptyClass() value.__class__ = klass instantiated = 1 except RuntimeError: # In restricted execution, assignment to inst.__class__ is # prohibited pass if not instantiated: try: value = klass(*args) except TypeError, err: raise TypeError, "in constructor for %s: %s" % ( klass.__name__, str(err)), sys.exc_info()[2] self.append(value) def load_inst(self): module = self.readline()[:-1] name = self.readline()[:-1] klass = self.find_class(module, name) self._instantiate(klass, self.marker()) dispatch[INST] = load_inst def load_obj(self): # Stack is ... markobject classobject arg1 arg2 ... k = self.marker() klass = self.stack.pop(k+1) self._instantiate(klass, k) dispatch[OBJ] = load_obj def load_newobj(self): args = self.stack.pop() cls = self.stack[-1] obj = cls.__new__(cls, *args) self.stack[-1] = obj dispatch[NEWOBJ] = load_newobj def load_global(self): module = self.readline()[:-1] name = self.readline()[:-1] klass = self.find_class(module, name) self.append(klass) dispatch[GLOBAL] = load_global def load_ext1(self): code = ord(self.read(1)) self.get_extension(code) dispatch[EXT1] = load_ext1 def load_ext2(self): code = mloads('i' + self.read(2) + '\000\000') self.get_extension(code) dispatch[EXT2] = load_ext2 def load_ext4(self): code = mloads('i' + self.read(4)) self.get_extension(code) dispatch[EXT4] = load_ext4 def get_extension(self, code): nil = [] obj = _extension_cache.get(code, nil) if obj is not nil: self.append(obj) return key = _inverted_registry.get(code) if not key: raise ValueError("unregistered extension code %d" % code) obj = self.find_class(*key) _extension_cache[code] = obj self.append(obj) def find_class(self, module, name): if self.find_global is None: raise UnpicklingError( "Global and instance pickles are not supported.") return self.find_global(module, name) def find_global(self, module, name): # This can officially be patched directly in the Unpickler # instance, according to the docs __import__(module) mod = sys.modules[module] klass = getattr(mod, name) return klass def load_reduce(self): args = self.stack.pop() func = self.stack[-1] value = self.stack[-1](*args) self.stack[-1] = value dispatch[REDUCE] = load_reduce def load_pop(self): del self.stack[-1] dispatch[POP] = load_pop def load_pop_mark(self): k = self.marker() del self.stack[k:] dispatch[POP_MARK] = load_pop_mark def load_dup(self): self.append(self.stack[-1]) dispatch[DUP] = load_dup def load_get(self): self.append(self.memo[self.readline()[:-1]]) dispatch[GET] = load_get def load_binget(self): i = ord(self.read(1)) self.append(self.memo[repr(i)]) dispatch[BINGET] = load_binget def load_long_binget(self): i = mloads('i' + self.read(4)) self.append(self.memo[repr(i)]) dispatch[LONG_BINGET] = load_long_binget def load_put(self): self.memo[self.readline()[:-1]] = self.stack[-1] dispatch[PUT] = load_put def load_binput(self): i = ord(self.read(1)) self.memo[repr(i)] = self.stack[-1] dispatch[BINPUT] = load_binput def load_long_binput(self): i = mloads('i' + self.read(4)) self.memo[repr(i)] = self.stack[-1] dispatch[LONG_BINPUT] = load_long_binput def load_append(self): value = self.stack.pop() self.stack[-1].append(value) dispatch[APPEND] = load_append def load_appends(self): stack = self.stack mark = self.marker() lst = stack[mark - 1] lst.extend(stack[mark + 1:]) del stack[mark:] dispatch[APPENDS] = load_appends def load_setitem(self): stack = self.stack value = stack.pop() key = stack.pop() dict = stack[-1] dict[key] = value dispatch[SETITEM] = load_setitem def load_setitems(self): stack = self.stack mark = self.marker() dict = stack[mark - 1] for i in range(mark + 1, len(stack), 2): dict[stack[i]] = stack[i + 1] del stack[mark:] dispatch[SETITEMS] = load_setitems def load_build(self): stack = self.stack state = stack.pop() inst = stack[-1] setstate = getattr(inst, "__setstate__", None) if setstate: setstate(state) return slotstate = None if isinstance(state, tuple) and len(state) == 2: state, slotstate = state if state: try: d = inst.__dict__ try: for k, v in state.iteritems(): d[intern(k)] = v # keys in state don't have to be strings # don't blow up, but don't go out of our way except TypeError: d.update(state) except RuntimeError: # XXX In restricted execution, the instance's __dict__ # is not accessible. Use the old way of unpickling # the instance variables. This is a semantic # difference when unpickling in restricted # vs. unrestricted modes. # Note, however, that cPickle has never tried to do the # .update() business, and always uses # PyObject_SetItem(inst.__dict__, key, value) in a # loop over state.items(). for k, v in state.items(): setattr(inst, k, v) if slotstate: for k, v in slotstate.items(): setattr(inst, k, v) dispatch[BUILD] = load_build def load_mark(self): self.append(self.mark) dispatch[MARK] = load_mark #from pickle import decode_long def decode_long(data): r"""Decode a long from a two's complement little-endian binary string. This is overriden on PyPy by a RPython version that has linear complexity. >>> decode_long('') 0L >>> decode_long("\xff\x00") 255L >>> decode_long("\xff\x7f") 32767L >>> decode_long("\x00\xff") -256L >>> decode_long("\x00\x80") -32768L >>> decode_long("\x80") -128L >>> decode_long("\x7f") 127L """ nbytes = len(data) if nbytes == 0: return 0L ind = nbytes - 1 while ind and ord(data[ind]) == 0: ind -= 1 n = ord(data[ind]) while ind: n <<= 8 ind -= 1 if ord(data[ind]): n += ord(data[ind]) if ord(data[nbytes - 1]) >= 128: n -= 1L << (nbytes << 3) return n try: from __pypy__ import decode_long except ImportError: pass def load(f): return Unpickler(f).load() def loads(str): f = StringIO(str) return Unpickler(f).load()