App Engine Python SDK version 1.7.7
[gae.git] / python / google / net / proto2 / python / public / text_format.py
blob747db8094236953f0e232a161cbe9653120a93b9
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
19 """Contains routines for printing protocol messages in text format."""
22 from collections import deque
23 import cStringIO
24 import re
26 from google.net.proto2.python.internal import type_checkers
27 from google.net.proto2.python.public import descriptor
29 __all__ = ['MessageToString', 'PrintMessage', 'PrintField',
30 'PrintFieldValue', 'Merge']
33 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
34 type_checkers.Int32ValueChecker(),
35 type_checkers.Uint64ValueChecker(),
36 type_checkers.Int64ValueChecker())
37 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
38 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
41 class ParseError(Exception):
42 """Thrown in case of ASCII parsing error."""
45 def MessageToString(message, as_utf8=False, as_one_line=False,
46 pointy_brackets=False):
47 out = cStringIO.StringIO()
48 PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
49 pointy_brackets=pointy_brackets)
50 result = out.getvalue()
51 out.close()
52 if as_one_line:
53 return result.rstrip()
54 return result
57 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
58 pointy_brackets=False):
59 for field, value in message.ListFields():
60 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
61 for element in value:
62 PrintField(field, element, out, indent, as_utf8, as_one_line,
63 pointy_brackets=pointy_brackets)
64 else:
65 PrintField(field, value, out, indent, as_utf8, as_one_line,
66 pointy_brackets=pointy_brackets)
69 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
70 pointy_brackets=False):
71 """Print a single field name/value pair. For repeated fields, the value
72 should be a single element."""
74 out.write(' ' * indent)
75 if field.is_extension:
76 out.write('[')
77 if (field.containing_type.GetOptions().message_set_wire_format and
78 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
79 field.message_type == field.extension_scope and
80 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
81 out.write(field.message_type.full_name)
82 else:
83 out.write(field.full_name)
84 out.write(']')
85 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
87 out.write(field.message_type.name)
88 else:
89 out.write(field.name)
91 if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
94 out.write(': ')
96 PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,
97 pointy_brackets=pointy_brackets)
98 if as_one_line:
99 out.write(' ')
100 else:
101 out.write('\n')
104 def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
105 as_one_line=False, pointy_brackets=False):
106 """Print a single field value (not including name). For repeated fields,
107 the value should be a single element."""
109 if pointy_brackets:
110 openb = '<'
111 closeb = '>'
112 else:
113 openb = '{'
114 closeb = '}'
116 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
117 if as_one_line:
118 out.write(' %s ' % openb)
119 PrintMessage(value, out, indent, as_utf8, as_one_line,
120 pointy_brackets=pointy_brackets)
121 out.write(closeb)
122 else:
123 out.write(' %s\n' % openb)
124 PrintMessage(value, out, indent + 2, as_utf8, as_one_line,
125 pointy_brackets=pointy_brackets)
126 out.write(' ' * indent + closeb)
127 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
128 enum_value = field.enum_type.values_by_number.get(value, None)
129 if enum_value is not None:
130 out.write(enum_value.name)
131 else:
132 out.write(str(value))
133 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
134 out.write('\"')
135 if type(value) is unicode:
136 out_value = value.encode('utf-8')
137 else:
138 out_value = value
139 if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
141 out_as_utf8 = False
142 else:
143 out_as_utf8 = as_utf8
144 out.write(_CEscape(out_value, out_as_utf8))
145 out.write('\"')
146 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
147 if value:
148 out.write('true')
149 else:
150 out.write('false')
151 else:
152 out.write(str(value))
155 def _ParseOrMerge(text, message, allow_multiple_scalars):
156 """Converts an ASCII representation of a protocol message into a message.
158 Args:
159 text: Message ASCII representation.
160 message: A protocol buffer message to merge into.
161 allow_multiple_scalars: Determines if repeated values for a non-repeated
162 field are permitted, e.g., the string "foo: 1 foo: 2" for a
163 required/optional field named "foo".
165 Raises:
166 ParseError: On ASCII parsing problems.
168 tokenizer = _Tokenizer(text)
169 while not tokenizer.AtEnd():
170 _MergeField(tokenizer, message, allow_multiple_scalars)
173 def Parse(text, message):
174 """Parses an ASCII representation of a protocol message into a message.
176 Args:
177 text: Message ASCII representation.
178 message: A protocol buffer message to merge into.
180 Raises:
181 ParseError: On ASCII parsing problems.
184 _ParseOrMerge(text, message, False)
187 def Merge(text, message):
188 """Parses an ASCII representation of a protocol message into a message.
190 Like Parse(), but allows repeated values for a non-repeated field, and uses
191 the last one.
193 Args:
194 text: Message ASCII representation.
195 message: A protocol buffer message to merge into.
197 Raises:
198 ParseError: On ASCII parsing problems.
201 _ParseOrMerge(text, message, True)
204 def _MergeField(tokenizer, message, allow_multiple_scalars):
205 """Merges a single protocol message field into a message.
207 Args:
208 tokenizer: A tokenizer to parse the field name and values.
209 message: A protocol message to record the data.
210 allow_multiple_scalars: Determines if repeated values for a non-repeated
211 field are permitted, e.g., the string "foo: 1 foo: 2" for a
212 required/optional field named "foo".
214 Raises:
215 ParseError: In case of ASCII parsing problems.
217 message_descriptor = message.DESCRIPTOR
218 if tokenizer.TryConsume('['):
219 name = [tokenizer.ConsumeIdentifier()]
220 while tokenizer.TryConsume('.'):
221 name.append(tokenizer.ConsumeIdentifier())
222 name = '.'.join(name)
224 if not message_descriptor.is_extendable:
225 raise tokenizer.ParseErrorPreviousToken(
226 'Message type "%s" does not have extensions.' %
227 message_descriptor.full_name)
229 field = message.Extensions._FindExtensionByName(name)
231 if not field:
232 raise tokenizer.ParseErrorPreviousToken(
233 'Extension "%s" not registered.' % name)
234 elif message_descriptor != field.containing_type:
235 raise tokenizer.ParseErrorPreviousToken(
236 'Extension "%s" does not extend message type "%s".' % (
237 name, message_descriptor.full_name))
238 tokenizer.Consume(']')
239 else:
240 name = tokenizer.ConsumeIdentifier()
241 field = message_descriptor.fields_by_name.get(name, None)
246 if not field:
247 field = message_descriptor.fields_by_name.get(name.lower(), None)
248 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
249 field = None
251 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
252 field.message_type.name != name):
253 field = None
255 if not field:
256 raise tokenizer.ParseErrorPreviousToken(
257 'Message type "%s" has no field named "%s".' % (
258 message_descriptor.full_name, name))
260 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
261 tokenizer.TryConsume(':')
263 if tokenizer.TryConsume('<'):
264 end_token = '>'
265 else:
266 tokenizer.Consume('{')
267 end_token = '}'
269 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
270 if field.is_extension:
271 sub_message = message.Extensions[field].add()
272 else:
273 sub_message = getattr(message, field.name).add()
274 else:
275 if field.is_extension:
276 sub_message = message.Extensions[field]
277 else:
278 sub_message = getattr(message, field.name)
279 sub_message.SetInParent()
281 while not tokenizer.TryConsume(end_token):
282 if tokenizer.AtEnd():
283 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))
284 _MergeField(tokenizer, sub_message, allow_multiple_scalars)
285 else:
286 _MergeScalarField(tokenizer, message, field, allow_multiple_scalars)
290 if not tokenizer.TryConsume(','):
291 tokenizer.TryConsume(';')
294 def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars):
295 """Merges a single protocol message scalar field into a message.
297 Args:
298 tokenizer: A tokenizer to parse the field value.
299 message: A protocol message to record the data.
300 field: The descriptor of the field to be merged.
301 allow_multiple_scalars: Determines if repeated values for a non-repeated
302 field are permitted, e.g., the string "foo: 1 foo: 2" for a
303 required/optional field named "foo".
305 Raises:
306 ParseError: In case of ASCII parsing problems.
307 RuntimeError: On runtime errors.
309 tokenizer.Consume(':')
310 value = None
312 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
313 descriptor.FieldDescriptor.TYPE_SINT32,
314 descriptor.FieldDescriptor.TYPE_SFIXED32):
315 value = tokenizer.ConsumeInt32()
316 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
317 descriptor.FieldDescriptor.TYPE_SINT64,
318 descriptor.FieldDescriptor.TYPE_SFIXED64):
319 value = tokenizer.ConsumeInt64()
320 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
321 descriptor.FieldDescriptor.TYPE_FIXED32):
322 value = tokenizer.ConsumeUint32()
323 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
324 descriptor.FieldDescriptor.TYPE_FIXED64):
325 value = tokenizer.ConsumeUint64()
326 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
327 descriptor.FieldDescriptor.TYPE_DOUBLE):
328 value = tokenizer.ConsumeFloat()
329 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
330 value = tokenizer.ConsumeBool()
331 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
332 value = tokenizer.ConsumeString()
333 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
334 value = tokenizer.ConsumeByteString()
335 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
336 value = tokenizer.ConsumeEnum(field)
337 else:
338 raise RuntimeError('Unknown field type %d' % field.type)
340 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
341 if field.is_extension:
342 message.Extensions[field].append(value)
343 else:
344 getattr(message, field.name).append(value)
345 else:
346 if field.is_extension:
347 if not allow_multiple_scalars and message.HasExtension(field):
348 raise tokenizer.ParseErrorPreviousToken(
349 'Message type "%s" should not have multiple "%s" extensions.' %
350 (message.DESCRIPTOR.full_name, field.full_name))
351 else:
352 message.Extensions[field] = value
353 else:
354 if not allow_multiple_scalars and message.HasField(field.name):
355 raise tokenizer.ParseErrorPreviousToken(
356 'Message type "%s" should not have multiple "%s" fields.' %
357 (message.DESCRIPTOR.full_name, field.name))
358 else:
359 setattr(message, field.name, value)
362 class _Tokenizer(object):
363 """Protocol buffer ASCII representation tokenizer.
365 This class handles the lower level string parsing by splitting it into
366 meaningful tokens.
368 It was directly ported from the Java protocol buffer API.
371 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE)
372 _TOKEN = re.compile(
373 '[a-zA-Z_][0-9a-zA-Z_+-]*|'
374 '[0-9+-][0-9a-zA-Z_.+-]*|'
375 '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'
376 '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')
377 _IDENTIFIER = re.compile(r'\w+')
379 def __init__(self, text_message):
380 self._text_message = text_message
382 self._position = 0
383 self._line = -1
384 self._column = 0
385 self._token_start = None
386 self.token = ''
387 self._lines = deque(text_message.split('\n'))
388 self._current_line = ''
389 self._previous_line = 0
390 self._previous_column = 0
391 self._SkipWhitespace()
392 self.NextToken()
394 def AtEnd(self):
395 """Checks the end of the text was reached.
397 Returns:
398 True iff the end was reached.
400 return not self.token
402 def _PopLine(self):
403 while len(self._current_line) <= self._column:
404 if not self._lines:
405 self._current_line = ''
406 return
407 self._line += 1
408 self._column = 0
409 self._current_line = self._lines.popleft()
411 def _SkipWhitespace(self):
412 while True:
413 self._PopLine()
414 match = self._WHITESPACE.match(self._current_line, self._column)
415 if not match:
416 break
417 length = len(match.group(0))
418 self._column += length
420 def TryConsume(self, token):
421 """Tries to consume a given piece of text.
423 Args:
424 token: Text to consume.
426 Returns:
427 True iff the text was consumed.
429 if self.token == token:
430 self.NextToken()
431 return True
432 return False
434 def Consume(self, token):
435 """Consumes a piece of text.
437 Args:
438 token: Text to consume.
440 Raises:
441 ParseError: If the text couldn't be consumed.
443 if not self.TryConsume(token):
444 raise self._ParseError('Expected "%s".' % token)
446 def ConsumeIdentifier(self):
447 """Consumes protocol message field identifier.
449 Returns:
450 Identifier string.
452 Raises:
453 ParseError: If an identifier couldn't be consumed.
455 result = self.token
456 if not self._IDENTIFIER.match(result):
457 raise self._ParseError('Expected identifier.')
458 self.NextToken()
459 return result
461 def ConsumeInt32(self):
462 """Consumes a signed 32bit integer number.
464 Returns:
465 The integer parsed.
467 Raises:
468 ParseError: If a signed 32bit integer couldn't be consumed.
470 try:
471 result = ParseInteger(self.token, is_signed=True, is_long=False)
472 except ValueError, e:
473 raise self._ParseError(str(e))
474 self.NextToken()
475 return result
477 def ConsumeUint32(self):
478 """Consumes an unsigned 32bit integer number.
480 Returns:
481 The integer parsed.
483 Raises:
484 ParseError: If an unsigned 32bit integer couldn't be consumed.
486 try:
487 result = ParseInteger(self.token, is_signed=False, is_long=False)
488 except ValueError, e:
489 raise self._ParseError(str(e))
490 self.NextToken()
491 return result
493 def ConsumeInt64(self):
494 """Consumes a signed 64bit integer number.
496 Returns:
497 The integer parsed.
499 Raises:
500 ParseError: If a signed 64bit integer couldn't be consumed.
502 try:
503 result = ParseInteger(self.token, is_signed=True, is_long=True)
504 except ValueError, e:
505 raise self._ParseError(str(e))
506 self.NextToken()
507 return result
509 def ConsumeUint64(self):
510 """Consumes an unsigned 64bit integer number.
512 Returns:
513 The integer parsed.
515 Raises:
516 ParseError: If an unsigned 64bit integer couldn't be consumed.
518 try:
519 result = ParseInteger(self.token, is_signed=False, is_long=True)
520 except ValueError, e:
521 raise self._ParseError(str(e))
522 self.NextToken()
523 return result
525 def ConsumeFloat(self):
526 """Consumes an floating point number.
528 Returns:
529 The number parsed.
531 Raises:
532 ParseError: If a floating point number couldn't be consumed.
534 try:
535 result = ParseFloat(self.token)
536 except ValueError, e:
537 raise self._ParseError(str(e))
538 self.NextToken()
539 return result
541 def ConsumeBool(self):
542 """Consumes a boolean value.
544 Returns:
545 The bool parsed.
547 Raises:
548 ParseError: If a boolean value couldn't be consumed.
550 try:
551 result = ParseBool(self.token)
552 except ValueError, e:
553 raise self._ParseError(str(e))
554 self.NextToken()
555 return result
557 def ConsumeString(self):
558 """Consumes a string value.
560 Returns:
561 The string parsed.
563 Raises:
564 ParseError: If a string value couldn't be consumed.
566 the_bytes = self.ConsumeByteString()
567 try:
568 return unicode(the_bytes, 'utf-8')
569 except UnicodeDecodeError, e:
570 raise self._StringParseError(e)
572 def ConsumeByteString(self):
573 """Consumes a byte array value.
575 Returns:
576 The array parsed (as a string).
578 Raises:
579 ParseError: If a byte array value couldn't be consumed.
581 the_list = [self._ConsumeSingleByteString()]
582 while self.token and self.token[0] in ('\'', '"'):
583 the_list.append(self._ConsumeSingleByteString())
584 return ''.join(the_list)
586 def _ConsumeSingleByteString(self):
587 """Consume one token of a string literal.
589 String literals (whether bytes or text) can come in multiple adjacent
590 tokens which are automatically concatenated, like in C or Python. This
591 method only consumes one token.
593 text = self.token
594 if len(text) < 1 or text[0] not in ('\'', '"'):
595 raise self._ParseError('Expected string.')
597 if len(text) < 2 or text[-1] != text[0]:
598 raise self._ParseError('String missing ending quote.')
600 try:
601 result = _CUnescape(text[1:-1])
602 except ValueError, e:
603 raise self._ParseError(str(e))
604 self.NextToken()
605 return result
607 def ConsumeEnum(self, field):
608 try:
609 result = ParseEnum(field, self.token)
610 except ValueError, e:
611 raise self._ParseError(str(e))
612 self.NextToken()
613 return result
615 def ParseErrorPreviousToken(self, message):
616 """Creates and *returns* a ParseError for the previously read token.
618 Args:
619 message: A message to set for the exception.
621 Returns:
622 A ParseError instance.
624 return ParseError('%d:%d : %s' % (
625 self._previous_line + 1, self._previous_column + 1, message))
627 def _ParseError(self, message):
628 """Creates and *returns* a ParseError for the current token."""
629 return ParseError('%d:%d : %s' % (
630 self._line + 1, self._column + 1, message))
632 def _StringParseError(self, e):
633 return self._ParseError('Couldn\'t parse string: ' + str(e))
635 def NextToken(self):
636 """Reads the next meaningful token."""
637 self._previous_line = self._line
638 self._previous_column = self._column
640 self._column += len(self.token)
641 self._SkipWhitespace()
643 if not self._lines and len(self._current_line) <= self._column:
644 self.token = ''
645 return
647 match = self._TOKEN.match(self._current_line, self._column)
648 if match:
649 token = match.group(0)
650 self.token = token
651 else:
652 self.token = self._current_line[self._column]
660 def _CEscape(text, as_utf8):
661 def escape(c):
662 o = ord(c)
663 if o == 10: return r'\n'
664 if o == 13: return r'\r'
665 if o == 9: return r'\t'
666 if o == 39: return r"\'"
668 if o == 34: return r'\"'
669 if o == 92: return r'\\'
672 if not as_utf8 and (o >= 127 or o < 32):
673 return r'\%03o' % o
674 return c
675 return ''.join([escape(c) for c in text])
678 _CUNESCAPE_HEX = re.compile(r'(\\+)x([0-9a-fA-F])(?![0-9a-fA-F])')
681 def _CUnescape(text):
682 def ReplaceHex(m):
685 if len(m.group(1)) & 1:
686 return m.group(1) + 'x0' + m.group(2)
687 return m.group(0)
691 result = _CUNESCAPE_HEX.sub(ReplaceHex, text)
692 return result.decode('string_escape')
695 def ParseInteger(text, is_signed=False, is_long=False):
696 """Parses an integer.
698 Args:
699 text: The text to parse.
700 is_signed: True if a signed integer must be parsed.
701 is_long: True if a long integer must be parsed.
703 Returns:
704 The integer value.
706 Raises:
707 ValueError: Thrown Iff the text is not a valid integer.
710 try:
711 result = int(text, 0)
712 except ValueError:
713 raise ValueError('Couldn\'t parse integer: %s' % text)
716 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
717 checker.CheckValue(result)
718 return result
721 def ParseFloat(text):
722 """Parse a floating point number.
724 Args:
725 text: Text to parse.
727 Returns:
728 The number parsed.
730 Raises:
731 ValueError: If a floating point number couldn't be parsed.
733 try:
735 return float(text)
736 except ValueError:
738 if _FLOAT_INFINITY.match(text):
739 if text[0] == '-':
740 return float('-inf')
741 else:
742 return float('inf')
743 elif _FLOAT_NAN.match(text):
744 return float('nan')
745 else:
747 try:
748 return float(text.rstrip('f'))
749 except ValueError:
750 raise ValueError('Couldn\'t parse float: %s' % text)
753 def ParseBool(text):
754 """Parse a boolean value.
756 Args:
757 text: Text to parse.
759 Returns:
760 Boolean values parsed
762 Raises:
763 ValueError: If text is not a valid boolean.
765 if text in ('true', 't', '1'):
766 return True
767 elif text in ('false', 'f', '0'):
768 return False
769 else:
770 raise ValueError('Expected "true" or "false".')
773 def ParseEnum(field, value):
774 """Parse an enum value.
776 The value can be specified by a number (the enum value), or by
777 a string literal (the enum name).
779 Args:
780 field: Enum field descriptor.
781 value: String value.
783 Returns:
784 Enum value number.
786 Raises:
787 ValueError: If the enum value could not be parsed.
789 enum_descriptor = field.enum_type
790 try:
791 number = int(value, 0)
792 except ValueError:
794 enum_value = enum_descriptor.values_by_name.get(value, None)
795 if enum_value is None:
796 raise ValueError(
797 'Enum type "%s" has no value named %s.' % (
798 enum_descriptor.full_name, value))
799 else:
801 enum_value = enum_descriptor.values_by_number.get(number, None)
802 if enum_value is None:
803 raise ValueError(
804 'Enum type "%s" has no value with number %d.' % (
805 enum_descriptor.full_name, number))
806 return enum_value.number