App Engine Python SDK version 1.8.9
[gae.git] / python / google / appengine / datastore / sortable_pb_encoder.py
blob5294d455a4f6be025db9db998290e1118b472449
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
22 """An Encoder class for Protocol Buffers that preserves sorting characteristics.
24 This is used by datastore_sqlite_stub and datastore_types to match the ordering
25 semantics of the production datastore. Broadly, there are four
26 changes from regular PB encoding:
28 - Strings are escaped and null terminated instead of length-prefixed. The
29 escaping replaces \x00 with \x01\x01 and \x01 with \x01\x02, thus preserving
30 the ordering of the original string.
31 - Variable length integers are encoded using a variable length encoding that
32 preserves order. The first byte stores the absolute value if it's between
33 -119 to 119, otherwise it stores the number of bytes that follow.
34 - Numbers are stored big endian instead of little endian.
35 - Negative doubles are entirely negated, while positive doubles have their sign
36 bit flipped.
38 Warning:
39 Due to the way nested Protocol Buffers are encoded, this encoder will NOT
40 preserve sorting characteristics for embedded protocol buffers!
41 """
55 import array
56 import struct
58 from google.net.proto import ProtocolBuffer
61 _MAX_UNSIGNED_BYTE = 255
64 _MAX_LONG_BYTES = 8
69 _MAX_INLINE = (_MAX_UNSIGNED_BYTE - (2 * _MAX_LONG_BYTES)) / 2
70 _MIN_INLINE = -_MAX_INLINE
71 _OFFSET = 1 + 8
72 _POS_OFFSET = _OFFSET + _MAX_INLINE * 2
75 class Encoder(ProtocolBuffer.Encoder):
76 """Encodes Protocol Buffers in a form that sorts nicely."""
78 def put16(self, value):
79 if value < 0 or value >= (1<<16):
80 raise ProtocolBuffer.ProtocolBufferEncodeError, 'u16 too big'
81 self.buf.append((value >> 8) & 0xff)
82 self.buf.append((value >> 0) & 0xff)
83 return
85 def put32(self, value):
86 if value < 0 or value >= (1L<<32):
87 raise ProtocolBuffer.ProtocolBufferEncodeError, 'u32 too big'
88 self.buf.append((value >> 24) & 0xff)
89 self.buf.append((value >> 16) & 0xff)
90 self.buf.append((value >> 8) & 0xff)
91 self.buf.append((value >> 0) & 0xff)
92 return
94 def put64(self, value):
95 if value < 0 or value >= (1L<<64):
96 raise ProtocolBuffer.ProtocolBufferEncodeError, 'u64 too big'
97 self.buf.append((value >> 56) & 0xff)
98 self.buf.append((value >> 48) & 0xff)
99 self.buf.append((value >> 40) & 0xff)
100 self.buf.append((value >> 32) & 0xff)
101 self.buf.append((value >> 24) & 0xff)
102 self.buf.append((value >> 16) & 0xff)
103 self.buf.append((value >> 8) & 0xff)
104 self.buf.append((value >> 0) & 0xff)
105 return
107 def _PutVarInt(self, value):
108 if value is None:
109 self.buf.append(0)
110 return
112 if value >= _MIN_INLINE and value <= _MAX_INLINE:
113 value = _OFFSET + (value - _MIN_INLINE)
114 self.buf.append(value & 0xff)
115 return
117 negative = False
119 if value < 0:
120 value = _MIN_INLINE - value
121 negative = True
122 else:
123 value = value - _MAX_INLINE
125 len = 0
126 w = value
127 while w > 0:
128 w >>= 8
129 len += 1
131 if negative:
132 head = _OFFSET - len
133 else:
134 head = _POS_OFFSET + len
135 self.buf.append(head & 0xff)
137 for i in range(len - 1, -1, -1):
138 b = value >> (i * 8)
139 if negative:
140 b = _MAX_UNSIGNED_BYTE - (b & 0xff)
141 self.buf.append(b & 0xff)
143 def putVarInt32(self, value):
144 if value >= 0x80000000 or value < -0x80000000:
145 raise ProtocolBuffer.ProtocolBufferEncodeError, 'int32 too big'
146 self._PutVarInt(value)
148 def putVarInt64(self, value):
149 if value >= 0x8000000000000000 or value < -0x8000000000000000:
150 raise ProtocolBuffer.ProtocolBufferEncodeError, 'int64 too big'
151 self._PutVarInt(value)
153 def putVarUint64(self, value):
154 if value < 0 or value >= 0x10000000000000000:
155 raise ProtocolBuffer.ProtocolBufferEncodeError, 'uint64 too big'
156 self._PutVarInt(value)
158 def _isFloatNegative(self, value, encoded):
159 if value == 0:
160 return encoded[0] == 128
161 return value < 0
163 def putFloat(self, value):
164 encoded = array.array('B')
165 encoded.fromstring(struct.pack('>f', value))
166 if self._isFloatNegative(value, encoded):
169 encoded[0] ^= 0xFF
170 encoded[1] ^= 0xFF
171 encoded[2] ^= 0xFF
172 encoded[3] ^= 0xFF
173 else:
175 encoded[0] ^= 0x80
176 self.buf.extend(encoded)
178 def putDouble(self, value):
179 encoded = array.array('B')
180 encoded.fromstring(struct.pack('>d', value))
181 if self._isFloatNegative(value, encoded):
184 encoded[0] ^= 0xFF
185 encoded[1] ^= 0xFF
186 encoded[2] ^= 0xFF
187 encoded[3] ^= 0xFF
188 encoded[4] ^= 0xFF
189 encoded[5] ^= 0xFF
190 encoded[6] ^= 0xFF
191 encoded[7] ^= 0xFF
192 else:
194 encoded[0] ^= 0x80
195 self.buf.extend(encoded)
197 def putPrefixedString(self, value):
200 self.buf.fromstring(
201 value.replace('\x01', '\x01\x02').replace('\x00', '\x01\x01') + '\x00')
204 class Decoder(ProtocolBuffer.Decoder):
205 def __init__(self, buf, idx=0, limit=None):
206 if not limit:
207 limit = len(buf)
208 ProtocolBuffer.Decoder.__init__(self, buf, idx, limit)
210 def get16(self):
211 if self.idx + 2 > self.limit:
212 raise ProtocolBuffer.ProtocolBufferDecodeError, 'truncated'
213 c = self.buf[self.idx]
214 d = self.buf[self.idx + 1]
215 self.idx += 2
216 return (c << 8) | d
218 def get32(self):
219 if self.idx + 4 > self.limit:
220 raise ProtocolBuffer.ProtocolBufferDecodeError, 'truncated'
221 c = long(self.buf[self.idx])
222 d = self.buf[self.idx + 1]
223 e = self.buf[self.idx + 2]
224 f = self.buf[self.idx + 3]
225 self.idx += 4
226 return (c << 24) | (d << 16) | (e << 8) | f
228 def get64(self):
229 if self.idx + 8 > self.limit:
230 raise ProtocolBuffer.ProtocolBufferDecodeError, 'truncated'
231 c = long(self.buf[self.idx])
232 d = long(self.buf[self.idx + 1])
233 e = long(self.buf[self.idx + 2])
234 f = long(self.buf[self.idx + 3])
235 g = long(self.buf[self.idx + 4])
236 h = self.buf[self.idx + 5]
237 i = self.buf[self.idx + 6]
238 j = self.buf[self.idx + 7]
239 self.idx += 8
240 return ((c << 56) | (d << 48) | (e << 40) | (f << 32) | (g << 24)
241 | (h << 16) | (i << 8) | j)
243 def getVarInt64(self):
244 b = self.get8()
245 if b >= _OFFSET and b <= _POS_OFFSET:
246 return b - _OFFSET + _MIN_INLINE
247 if b == 0:
248 return None
250 if b < _OFFSET:
251 negative = True
252 bytes = _OFFSET - b
253 else:
254 negative = False
255 bytes = b - _POS_OFFSET
257 ret = 0
258 for _ in range(bytes):
259 b = self.get8()
260 if negative:
261 b = _MAX_UNSIGNED_BYTE - b
262 ret = ret << 8 | b
264 if negative:
265 return _MIN_INLINE - ret
266 else:
267 return ret + _MAX_INLINE
269 def getVarInt32(self):
270 result = self.getVarInt64()
271 if result >= 0x80000000L or result < -0x80000000L:
272 raise ProtocolBuffer.ProtocolBufferDecodeError, 'corrupted'
273 return result
275 def getVarUint64(self):
276 result = self.getVarInt64()
277 if result < 0:
278 raise ProtocolBuffer.ProtocolBufferDecodeError, 'corrupted'
279 return result
281 def getFloat(self):
282 if self.idx + 4 > self.limit:
283 raise ProtocolBuffer.ProtocolBufferDecodeError, 'truncated'
284 a = self.buf[self.idx:self.idx+4]
285 self.idx += 4
286 if a[0] & 0x80:
288 a[0] ^= 0x80
289 else:
291 a = [x ^ 0xFF for x in a]
292 return struct.unpack('>f', array.array('B', a).tostring())[0]
294 def getDouble(self):
295 if self.idx + 8 > self.limit:
296 raise ProtocolBuffer.ProtocolBufferDecodeError, 'truncated'
297 a = self.buf[self.idx:self.idx+8]
298 self.idx += 8
299 if a[0] & 0x80:
301 a[0] ^= 0x80
302 else:
304 a = [x ^ 0xFF for x in a]
305 return struct.unpack('>d', array.array('B', a).tostring())[0]
307 def getPrefixedString(self):
308 end_idx = self.idx
309 while self.buf[end_idx] != 0:
310 end_idx += 1
312 data = array.array('B', self.buf[self.idx:end_idx]).tostring()
313 self.idx = end_idx + 1
314 return data.replace('\x01\x01', '\x00').replace('\x01\x02', '\x01')