python/google/appengine/datastore/sortable_pb_encoder.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2007 Google Inc.
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17
  18
  19
  20
  21
  22 """An Encoder class for Protocol Buffers that preserves sorting characteristics.
  23
  24 This is used by datastore_sqlite_stub and datastore_types to match the ordering
  25 semantics of the production datastore. Broadly, there are four
  26 changes from regular PB encoding:
  27
  28  - Strings are escaped and null terminated instead of length-prefixed. The
  29    escaping replaces \x00 with \x01\x01 and \x01 with \x01\x02, thus preserving
  30    the ordering of the original string.
  31  - Variable length integers are encoded using a variable length encoding that
  32    preserves order. The first byte stores the absolute value if it's between
  33    -119 to 119, otherwise it stores the number of bytes that follow.
  34  - Numbers are stored big endian instead of little endian.
  35  - Negative doubles are entirely negated, while positive doubles have their sign
  36    bit flipped.
  37
  38 Warning:
  39   Due to the way nested Protocol Buffers are encoded, this encoder will NOT
  40   preserve sorting characteristics for embedded protocol buffers!
  41 """
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55 import array
  56 import struct
  57
  58 from google.net.proto import ProtocolBuffer
  59
  60
  61 _MAX_UNSIGNED_BYTE = 255
  62
  63
  64 _MAX_LONG_BYTES = 8
  65
  66
  67
  68
  69 _MAX_INLINE = (_MAX_UNSIGNED_BYTE - (2 * _MAX_LONG_BYTES)) / 2
  70 _MIN_INLINE = -_MAX_INLINE
  71 _OFFSET = 1 + 8
  72 _POS_OFFSET = _OFFSET + _MAX_INLINE * 2
  73
  74
  75 class Encoder(ProtocolBuffer.Encoder):
  76   """Encodes Protocol Buffers in a form that sorts nicely."""
  77
  78   def put16(self, value):
  79     if value < 0 or value >= (1<<16):
  80       raise ProtocolBuffer.ProtocolBufferEncodeError, 'u16 too big'
  81     self.buf.append((value >> 8) & 0xff)
  82     self.buf.append((value >> 0) & 0xff)
  83     return
  84
  85   def put32(self, value):
  86     if value < 0 or value >= (1L<<32):
  87       raise ProtocolBuffer.ProtocolBufferEncodeError, 'u32 too big'
  88     self.buf.append((value >> 24) & 0xff)
  89     self.buf.append((value >> 16) & 0xff)
  90     self.buf.append((value >> 8) & 0xff)
  91     self.buf.append((value >> 0) & 0xff)
  92     return
  93
  94   def put64(self, value):
  95     if value < 0 or value >= (1L<<64):
  96       raise ProtocolBuffer.ProtocolBufferEncodeError, 'u64 too big'
  97     self.buf.append((value >> 56) & 0xff)
  98     self.buf.append((value >> 48) & 0xff)
  99     self.buf.append((value >> 40) & 0xff)
 100     self.buf.append((value >> 32) & 0xff)
 101     self.buf.append((value >> 24) & 0xff)
 102     self.buf.append((value >> 16) & 0xff)
 103     self.buf.append((value >> 8) & 0xff)
 104     self.buf.append((value >> 0) & 0xff)
 105     return
 106
 107   def _PutVarInt(self, value):
 108     if value is None:
 109       self.buf.append(0)
 110       return
 111
 112     if value >= _MIN_INLINE and value <= _MAX_INLINE:
 113       value = _OFFSET + (value - _MIN_INLINE)
 114       self.buf.append(value & 0xff)
 115       return
 116
 117     negative = False
 118
 119     if value < 0:
 120       value = _MIN_INLINE - value
 121       negative = True
 122     else:
 123       value = value - _MAX_INLINE
 124
 125     len = 0
 126     w = value
 127     while w > 0:
 128       w >>= 8
 129       len += 1
 130
 131     if negative:
 132       head = _OFFSET - len
 133     else:
 134       head = _POS_OFFSET + len
 135     self.buf.append(head & 0xff)
 136
 137     for i in range(len - 1, -1, -1):
 138       b = value >> (i * 8)
 139       if negative:
 140         b = _MAX_UNSIGNED_BYTE - (b & 0xff)
 141       self.buf.append(b & 0xff)
 142
 143   def putVarInt32(self, value):
 144     if value >= 0x80000000 or value < -0x80000000:
 145       raise ProtocolBuffer.ProtocolBufferEncodeError, 'int32 too big'
 146     self._PutVarInt(value)
 147
 148   def putVarInt64(self, value):
 149     if value >= 0x8000000000000000 or value < -0x8000000000000000:
 150       raise ProtocolBuffer.ProtocolBufferEncodeError, 'int64 too big'
 151     self._PutVarInt(value)
 152
 153   def putVarUint64(self, value):
 154     if value < 0 or value >= 0x10000000000000000:
 155       raise ProtocolBuffer.ProtocolBufferEncodeError, 'uint64 too big'
 156     self._PutVarInt(value)
 157
 158   def _isFloatNegative(self, value, encoded):
 159     if value == 0:
 160       return encoded[0] == 128
 161     return value < 0
 162
 163   def putFloat(self, value):
 164     encoded = array.array('B')
 165     encoded.fromstring(struct.pack('>f', value))
 166     if self._isFloatNegative(value, encoded):
 167
 168
 169       encoded[0] ^= 0xFF
 170       encoded[1] ^= 0xFF
 171       encoded[2] ^= 0xFF
 172       encoded[3] ^= 0xFF
 173     else:
 174
 175       encoded[0] ^= 0x80
 176     self.buf.extend(encoded)
 177
 178   def putDouble(self, value):
 179     encoded = array.array('B')
 180     encoded.fromstring(struct.pack('>d', value))
 181     if self._isFloatNegative(value, encoded):
 182
 183
 184       encoded[0] ^= 0xFF
 185       encoded[1] ^= 0xFF
 186       encoded[2] ^= 0xFF
 187       encoded[3] ^= 0xFF
 188       encoded[4] ^= 0xFF
 189       encoded[5] ^= 0xFF
 190       encoded[6] ^= 0xFF
 191       encoded[7] ^= 0xFF
 192     else:
 193
 194       encoded[0] ^= 0x80
 195     self.buf.extend(encoded)
 196
 197   def putPrefixedString(self, value):
 198
 199
 200     self.buf.fromstring(
 201         value.replace('\x01', '\x01\x02').replace('\x00', '\x01\x01') + '\x00')
 202
 203
 204 class Decoder(ProtocolBuffer.Decoder):
 205   def __init__(self, buf, idx=0, limit=None):
 206     if not limit:
 207       limit = len(buf)
 208     ProtocolBuffer.Decoder.__init__(self, buf, idx, limit)
 209
 210   def get16(self):
 211     if self.idx + 2 > self.limit:
 212       raise ProtocolBuffer.ProtocolBufferDecodeError, 'truncated'
 213     c = self.buf[self.idx]
 214     d = self.buf[self.idx + 1]
 215     self.idx += 2
 216     return (c << 8) | d
 217
 218   def get32(self):
 219     if self.idx + 4 > self.limit:
 220       raise ProtocolBuffer.ProtocolBufferDecodeError, 'truncated'
 221     c = long(self.buf[self.idx])
 222     d = self.buf[self.idx + 1]
 223     e = self.buf[self.idx + 2]
 224     f = self.buf[self.idx + 3]
 225     self.idx += 4
 226     return (c << 24) | (d << 16) | (e << 8) | f
 227
 228   def get64(self):
 229     if self.idx + 8 > self.limit:
 230       raise ProtocolBuffer.ProtocolBufferDecodeError, 'truncated'
 231     c = long(self.buf[self.idx])
 232     d = long(self.buf[self.idx + 1])
 233     e = long(self.buf[self.idx + 2])
 234     f = long(self.buf[self.idx + 3])
 235     g = long(self.buf[self.idx + 4])
 236     h = self.buf[self.idx + 5]
 237     i = self.buf[self.idx + 6]
 238     j = self.buf[self.idx + 7]
 239     self.idx += 8
 240     return ((c << 56) | (d << 48) | (e << 40) | (f << 32) | (g << 24)
 241             | (h << 16) | (i << 8) | j)
 242
 243   def getVarInt64(self):
 244     b = self.get8()
 245     if b >= _OFFSET and b <= _POS_OFFSET:
 246       return b - _OFFSET + _MIN_INLINE
 247     if b == 0:
 248       return None
 249
 250     if b < _OFFSET:
 251       negative = True
 252       bytes = _OFFSET - b
 253     else:
 254       negative = False
 255       bytes = b - _POS_OFFSET
 256
 257     ret = 0
 258     for _ in range(bytes):
 259       b = self.get8()
 260       if negative:
 261         b = _MAX_UNSIGNED_BYTE - b
 262       ret = ret << 8 | b
 263
 264     if negative:
 265       return _MIN_INLINE - ret
 266     else:
 267       return ret + _MAX_INLINE
 268
 269   def getVarInt32(self):
 270     result = self.getVarInt64()
 271     if result >= 0x80000000L or result < -0x80000000L:
 272       raise ProtocolBuffer.ProtocolBufferDecodeError, 'corrupted'
 273     return result
 274
 275   def getVarUint64(self):
 276     result = self.getVarInt64()
 277     if result < 0:
 278       raise ProtocolBuffer.ProtocolBufferDecodeError, 'corrupted'
 279     return result
 280
 281   def getFloat(self):
 282     if self.idx + 4 > self.limit:
 283       raise ProtocolBuffer.ProtocolBufferDecodeError, 'truncated'
 284     a = self.buf[self.idx:self.idx+4]
 285     self.idx += 4
 286     if a[0] & 0x80:
 287
 288       a[0] ^= 0x80
 289     else:
 290
 291       a = [x ^ 0xFF for x in a]
 292     return struct.unpack('>f', array.array('B', a).tostring())[0]
 293
 294   def getDouble(self):
 295     if self.idx + 8 > self.limit:
 296       raise ProtocolBuffer.ProtocolBufferDecodeError, 'truncated'
 297     a = self.buf[self.idx:self.idx+8]
 298     self.idx += 8
 299     if a[0] & 0x80:
 300
 301       a[0] ^= 0x80
 302     else:
 303
 304       a = [x ^ 0xFF for x in a]
 305     return struct.unpack('>d', array.array('B', a).tostring())[0]
 306
 307   def getPrefixedString(self):
 308     end_idx = self.idx
 309     while self.buf[end_idx] != 0:
 310       end_idx += 1
 311
 312     data = array.array('B', self.buf[self.idx:end_idx]).tostring()
 313     self.idx = end_idx + 1
 314     return data.replace('\x01\x01', '\x00').replace('\x01\x02', '\x01')