3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
21 """Contains routines for printing protocol messages in text format."""
28 from google
.net
.proto2
.python
.internal
import type_checkers
29 from google
.net
.proto2
.python
.public
import descriptor
30 from google
.net
.proto2
.python
.public
import text_encoding
32 __all__
= ['MessageToString', 'PrintMessage', 'PrintField',
33 'PrintFieldValue', 'Merge']
36 _INTEGER_CHECKERS
= (type_checkers
.Uint32ValueChecker(),
37 type_checkers
.Int32ValueChecker(),
38 type_checkers
.Uint64ValueChecker(),
39 type_checkers
.Int64ValueChecker())
40 _FLOAT_INFINITY
= re
.compile('-?inf(?:inity)?f?', re
.IGNORECASE
)
41 _FLOAT_NAN
= re
.compile('nanf?', re
.IGNORECASE
)
42 _FLOAT_TYPES
= frozenset([descriptor
.FieldDescriptor
.CPPTYPE_FLOAT
,
43 descriptor
.FieldDescriptor
.CPPTYPE_DOUBLE
])
46 class Error(Exception):
47 """Top-level module error for text_format."""
50 class ParseError(Error
):
51 """Thrown in case of ASCII parsing error."""
54 def MessageToString(message
, as_utf8
=False, as_one_line
=False,
55 pointy_brackets
=False, use_index_order
=False,
57 """Convert protobuf message to text format.
59 Floating point values can be formatted compactly with 15 digits of
60 precision (which is the most that IEEE 754 "double" can guarantee)
61 using float_format='.15g'.
64 message: The protocol buffers message.
65 as_utf8: Produce text output in UTF8 format.
66 as_one_line: Don't introduce newlines between fields.
67 pointy_brackets: If True, use angle brackets instead of curly braces for
69 use_index_order: If True, print fields of a proto message using the order
70 defined in source code instead of the field number. By default, use the
72 float_format: If set, use this to specify floating point number formatting
73 (per the "Format Specification Mini-Language"); otherwise, str() is used.
76 A string of the text formatted protocol buffer message.
78 out
= cStringIO
.StringIO()
79 PrintMessage(message
, out
, as_utf8
=as_utf8
, as_one_line
=as_one_line
,
80 pointy_brackets
=pointy_brackets
,
81 use_index_order
=use_index_order
,
82 float_format
=float_format
)
83 result
= out
.getvalue()
86 return result
.rstrip()
90 def PrintMessage(message
, out
, indent
=0, as_utf8
=False, as_one_line
=False,
91 pointy_brackets
=False, use_index_order
=False,
93 fields
= message
.ListFields()
95 fields
.sort(key
=lambda x
: x
[0].index
)
96 for field
, value
in fields
:
97 if field
.label
== descriptor
.FieldDescriptor
.LABEL_REPEATED
:
99 PrintField(field
, element
, out
, indent
, as_utf8
, as_one_line
,
100 pointy_brackets
=pointy_brackets
,
101 use_index_order
=use_index_order
,
102 float_format
=float_format
)
104 PrintField(field
, value
, out
, indent
, as_utf8
, as_one_line
,
105 pointy_brackets
=pointy_brackets
,
106 use_index_order
=use_index_order
,
107 float_format
=float_format
)
110 def PrintField(field
, value
, out
, indent
=0, as_utf8
=False, as_one_line
=False,
111 pointy_brackets
=False, use_index_order
=False, float_format
=None):
112 """Print a single field name/value pair. For repeated fields, the value
113 should be a single element."""
115 out
.write(' ' * indent
)
116 if field
.is_extension
:
118 if (field
.containing_type
.GetOptions().message_set_wire_format
and
119 field
.type == descriptor
.FieldDescriptor
.TYPE_MESSAGE
and
120 field
.message_type
== field
.extension_scope
and
121 field
.label
== descriptor
.FieldDescriptor
.LABEL_OPTIONAL
):
122 out
.write(field
.message_type
.full_name
)
124 out
.write(field
.full_name
)
126 elif field
.type == descriptor
.FieldDescriptor
.TYPE_GROUP
:
128 out
.write(field
.message_type
.name
)
130 out
.write(field
.name
)
132 if field
.cpp_type
!= descriptor
.FieldDescriptor
.CPPTYPE_MESSAGE
:
137 PrintFieldValue(field
, value
, out
, indent
, as_utf8
, as_one_line
,
138 pointy_brackets
=pointy_brackets
,
139 use_index_order
=use_index_order
,
140 float_format
=float_format
)
147 def PrintFieldValue(field
, value
, out
, indent
=0, as_utf8
=False,
148 as_one_line
=False, pointy_brackets
=False,
149 use_index_order
=False,
151 """Print a single field value (not including name). For repeated fields,
152 the value should be a single element."""
161 if field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_MESSAGE
:
163 out
.write(' %s ' % openb
)
164 PrintMessage(value
, out
, indent
, as_utf8
, as_one_line
,
165 pointy_brackets
=pointy_brackets
,
166 use_index_order
=use_index_order
,
167 float_format
=float_format
)
170 out
.write(' %s\n' % openb
)
171 PrintMessage(value
, out
, indent
+ 2, as_utf8
, as_one_line
,
172 pointy_brackets
=pointy_brackets
,
173 use_index_order
=use_index_order
,
174 float_format
=float_format
)
175 out
.write(' ' * indent
+ closeb
)
176 elif field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_ENUM
:
177 enum_value
= field
.enum_type
.values_by_number
.get(value
, None)
178 if enum_value
is not None:
179 out
.write(enum_value
.name
)
181 out
.write(str(value
))
182 elif field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_STRING
:
184 if isinstance(value
, unicode):
185 out_value
= value
.encode('utf-8')
188 if field
.type == descriptor
.FieldDescriptor
.TYPE_BYTES
:
192 out_as_utf8
= as_utf8
193 out
.write(text_encoding
.CEscape(out_value
, out_as_utf8
))
195 elif field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_BOOL
:
200 elif field
.cpp_type
in _FLOAT_TYPES
and float_format
is not None:
201 out
.write('{1:{0}}'.format(float_format
, value
))
203 out
.write(str(value
))
206 def _ParseOrMerge(lines
, message
, allow_multiple_scalars
):
207 """Converts an ASCII representation of a protocol message into a message.
210 lines: Lines of a message's ASCII representation.
211 message: A protocol buffer message to merge into.
212 allow_multiple_scalars: Determines if repeated values for a non-repeated
213 field are permitted, e.g., the string "foo: 1 foo: 2" for a
214 required/optional field named "foo".
217 ParseError: On ASCII parsing problems.
219 tokenizer
= _Tokenizer(lines
)
220 while not tokenizer
.AtEnd():
221 _MergeField(tokenizer
, message
, allow_multiple_scalars
)
224 def Parse(text
, message
):
225 """Parses an ASCII representation of a protocol message into a message.
228 text: Message ASCII representation.
229 message: A protocol buffer message to merge into.
232 The same message passed as argument.
235 ParseError: On ASCII parsing problems.
237 if not isinstance(text
, str): text
= text
.decode('utf-8')
238 return ParseLines(text
.split('\n'), message
)
241 def Merge(text
, message
):
242 """Parses an ASCII representation of a protocol message into a message.
244 Like Parse(), but allows repeated values for a non-repeated field, and uses
248 text: Message ASCII representation.
249 message: A protocol buffer message to merge into.
252 The same message passed as argument.
255 ParseError: On ASCII parsing problems.
257 return MergeLines(text
.split('\n'), message
)
260 def ParseLines(lines
, message
):
261 """Parses an ASCII representation of a protocol message into a message.
264 lines: An iterable of lines of a message's ASCII representation.
265 message: A protocol buffer message to merge into.
268 The same message passed as argument.
271 ParseError: On ASCII parsing problems.
273 _ParseOrMerge(lines
, message
, False)
277 def MergeLines(lines
, message
):
278 """Parses an ASCII representation of a protocol message into a message.
281 lines: An iterable of lines of a message's ASCII representation.
282 message: A protocol buffer message to merge into.
285 The same message passed as argument.
288 ParseError: On ASCII parsing problems.
290 _ParseOrMerge(lines
, message
, True)
294 def _MergeField(tokenizer
, message
, allow_multiple_scalars
):
295 """Merges a single protocol message field into a message.
298 tokenizer: A tokenizer to parse the field name and values.
299 message: A protocol message to record the data.
300 allow_multiple_scalars: Determines if repeated values for a non-repeated
301 field are permitted, e.g., the string "foo: 1 foo: 2" for a
302 required/optional field named "foo".
305 ParseError: In case of ASCII parsing problems.
307 message_descriptor
= message
.DESCRIPTOR
308 if tokenizer
.TryConsume('['):
309 name
= [tokenizer
.ConsumeIdentifier()]
310 while tokenizer
.TryConsume('.'):
311 name
.append(tokenizer
.ConsumeIdentifier())
312 name
= '.'.join(name
)
314 if not message_descriptor
.is_extendable
:
315 raise tokenizer
.ParseErrorPreviousToken(
316 'Message type "%s" does not have extensions.' %
317 message_descriptor
.full_name
)
319 field
= message
.Extensions
._FindExtensionByName
(name
)
322 raise tokenizer
.ParseErrorPreviousToken(
323 'Extension "%s" not registered.' % name
)
324 elif message_descriptor
!= field
.containing_type
:
325 raise tokenizer
.ParseErrorPreviousToken(
326 'Extension "%s" does not extend message type "%s".' % (
327 name
, message_descriptor
.full_name
))
328 tokenizer
.Consume(']')
330 name
= tokenizer
.ConsumeIdentifier()
331 field
= message_descriptor
.fields_by_name
.get(name
, None)
337 field
= message_descriptor
.fields_by_name
.get(name
.lower(), None)
338 if field
and field
.type != descriptor
.FieldDescriptor
.TYPE_GROUP
:
341 if (field
and field
.type == descriptor
.FieldDescriptor
.TYPE_GROUP
and
342 field
.message_type
.name
!= name
):
346 raise tokenizer
.ParseErrorPreviousToken(
347 'Message type "%s" has no field named "%s".' % (
348 message_descriptor
.full_name
, name
))
350 if field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_MESSAGE
:
351 tokenizer
.TryConsume(':')
353 if tokenizer
.TryConsume('<'):
356 tokenizer
.Consume('{')
359 if field
.label
== descriptor
.FieldDescriptor
.LABEL_REPEATED
:
360 if field
.is_extension
:
361 sub_message
= message
.Extensions
[field
].add()
363 sub_message
= getattr(message
, field
.name
).add()
365 if field
.is_extension
:
366 sub_message
= message
.Extensions
[field
]
368 sub_message
= getattr(message
, field
.name
)
369 sub_message
.SetInParent()
371 while not tokenizer
.TryConsume(end_token
):
372 if tokenizer
.AtEnd():
373 raise tokenizer
.ParseErrorPreviousToken('Expected "%s".' % (end_token
))
374 _MergeField(tokenizer
, sub_message
, allow_multiple_scalars
)
376 _MergeScalarField(tokenizer
, message
, field
, allow_multiple_scalars
)
380 if not tokenizer
.TryConsume(','):
381 tokenizer
.TryConsume(';')
384 def _MergeScalarField(tokenizer
, message
, field
, allow_multiple_scalars
):
385 """Merges a single protocol message scalar field into a message.
388 tokenizer: A tokenizer to parse the field value.
389 message: A protocol message to record the data.
390 field: The descriptor of the field to be merged.
391 allow_multiple_scalars: Determines if repeated values for a non-repeated
392 field are permitted, e.g., the string "foo: 1 foo: 2" for a
393 required/optional field named "foo".
396 ParseError: In case of ASCII parsing problems.
397 RuntimeError: On runtime errors.
399 tokenizer
.Consume(':')
402 if field
.type in (descriptor
.FieldDescriptor
.TYPE_INT32
,
403 descriptor
.FieldDescriptor
.TYPE_SINT32
,
404 descriptor
.FieldDescriptor
.TYPE_SFIXED32
):
405 value
= tokenizer
.ConsumeInt32()
406 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_INT64
,
407 descriptor
.FieldDescriptor
.TYPE_SINT64
,
408 descriptor
.FieldDescriptor
.TYPE_SFIXED64
):
409 value
= tokenizer
.ConsumeInt64()
410 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_UINT32
,
411 descriptor
.FieldDescriptor
.TYPE_FIXED32
):
412 value
= tokenizer
.ConsumeUint32()
413 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_UINT64
,
414 descriptor
.FieldDescriptor
.TYPE_FIXED64
):
415 value
= tokenizer
.ConsumeUint64()
416 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_FLOAT
,
417 descriptor
.FieldDescriptor
.TYPE_DOUBLE
):
418 value
= tokenizer
.ConsumeFloat()
419 elif field
.type == descriptor
.FieldDescriptor
.TYPE_BOOL
:
420 value
= tokenizer
.ConsumeBool()
421 elif field
.type == descriptor
.FieldDescriptor
.TYPE_STRING
:
422 value
= tokenizer
.ConsumeString()
423 elif field
.type == descriptor
.FieldDescriptor
.TYPE_BYTES
:
424 value
= tokenizer
.ConsumeByteString()
425 elif field
.type == descriptor
.FieldDescriptor
.TYPE_ENUM
:
426 value
= tokenizer
.ConsumeEnum(field
)
428 raise RuntimeError('Unknown field type %d' % field
.type)
430 if field
.label
== descriptor
.FieldDescriptor
.LABEL_REPEATED
:
431 if field
.is_extension
:
432 message
.Extensions
[field
].append(value
)
434 getattr(message
, field
.name
).append(value
)
436 if field
.is_extension
:
437 if not allow_multiple_scalars
and message
.HasExtension(field
):
438 raise tokenizer
.ParseErrorPreviousToken(
439 'Message type "%s" should not have multiple "%s" extensions.' %
440 (message
.DESCRIPTOR
.full_name
, field
.full_name
))
442 message
.Extensions
[field
] = value
444 if not allow_multiple_scalars
and message
.HasField(field
.name
):
445 raise tokenizer
.ParseErrorPreviousToken(
446 'Message type "%s" should not have multiple "%s" fields.' %
447 (message
.DESCRIPTOR
.full_name
, field
.name
))
449 setattr(message
, field
.name
, value
)
452 class _Tokenizer(object):
453 """Protocol buffer ASCII representation tokenizer.
455 This class handles the lower level string parsing by splitting it into
458 It was directly ported from the Java protocol buffer API.
461 _WHITESPACE
= re
.compile('(\\s|(#.*$))+', re
.MULTILINE
)
463 '[a-zA-Z_][0-9a-zA-Z_+-]*|'
464 '[0-9+-][0-9a-zA-Z_.+-]*|'
465 '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'
466 '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')
467 _IDENTIFIER
= re
.compile(r
'\w+')
469 def __init__(self
, lines
):
473 self
._token
_start
= None
475 self
._lines
= iter(lines
)
476 self
._current
_line
= ''
477 self
._previous
_line
= 0
478 self
._previous
_column
= 0
479 self
._more
_lines
= True
480 self
._SkipWhitespace
()
484 """Checks the end of the text was reached.
487 True iff the end was reached.
489 return not self
.token
492 while len(self
._current
_line
) <= self
._column
:
494 self
._current
_line
= self
._lines
.next()
495 except StopIteration:
496 self
._current
_line
= ''
497 self
._more
_lines
= False
503 def _SkipWhitespace(self
):
506 match
= self
._WHITESPACE
.match(self
._current
_line
, self
._column
)
509 length
= len(match
.group(0))
510 self
._column
+= length
512 def TryConsume(self
, token
):
513 """Tries to consume a given piece of text.
516 token: Text to consume.
519 True iff the text was consumed.
521 if self
.token
== token
:
526 def Consume(self
, token
):
527 """Consumes a piece of text.
530 token: Text to consume.
533 ParseError: If the text couldn't be consumed.
535 if not self
.TryConsume(token
):
536 raise self
._ParseError
('Expected "%s".' % token
)
538 def ConsumeIdentifier(self
):
539 """Consumes protocol message field identifier.
545 ParseError: If an identifier couldn't be consumed.
548 if not self
._IDENTIFIER
.match(result
):
549 raise self
._ParseError
('Expected identifier.')
553 def ConsumeInt32(self
):
554 """Consumes a signed 32bit integer number.
560 ParseError: If a signed 32bit integer couldn't be consumed.
563 result
= ParseInteger(self
.token
, is_signed
=True, is_long
=False)
564 except ValueError, e
:
565 raise self
._ParseError
(str(e
))
569 def ConsumeUint32(self
):
570 """Consumes an unsigned 32bit integer number.
576 ParseError: If an unsigned 32bit integer couldn't be consumed.
579 result
= ParseInteger(self
.token
, is_signed
=False, is_long
=False)
580 except ValueError, e
:
581 raise self
._ParseError
(str(e
))
585 def ConsumeInt64(self
):
586 """Consumes a signed 64bit integer number.
592 ParseError: If a signed 64bit integer couldn't be consumed.
595 result
= ParseInteger(self
.token
, is_signed
=True, is_long
=True)
596 except ValueError, e
:
597 raise self
._ParseError
(str(e
))
601 def ConsumeUint64(self
):
602 """Consumes an unsigned 64bit integer number.
608 ParseError: If an unsigned 64bit integer couldn't be consumed.
611 result
= ParseInteger(self
.token
, is_signed
=False, is_long
=True)
612 except ValueError, e
:
613 raise self
._ParseError
(str(e
))
617 def ConsumeFloat(self
):
618 """Consumes an floating point number.
624 ParseError: If a floating point number couldn't be consumed.
627 result
= ParseFloat(self
.token
)
628 except ValueError, e
:
629 raise self
._ParseError
(str(e
))
633 def ConsumeBool(self
):
634 """Consumes a boolean value.
640 ParseError: If a boolean value couldn't be consumed.
643 result
= ParseBool(self
.token
)
644 except ValueError, e
:
645 raise self
._ParseError
(str(e
))
649 def ConsumeString(self
):
650 """Consumes a string value.
656 ParseError: If a string value couldn't be consumed.
658 the_bytes
= self
.ConsumeByteString()
660 return unicode(the_bytes
, 'utf-8')
661 except UnicodeDecodeError, e
:
662 raise self
._StringParseError
(e
)
664 def ConsumeByteString(self
):
665 """Consumes a byte array value.
668 The array parsed (as a string).
671 ParseError: If a byte array value couldn't be consumed.
673 the_list
= [self
._ConsumeSingleByteString
()]
674 while self
.token
and self
.token
[0] in ('\'', '"'):
675 the_list
.append(self
._ConsumeSingleByteString
())
676 return ''.encode('latin1').join(the_list
)
679 def _ConsumeSingleByteString(self
):
680 """Consume one token of a string literal.
682 String literals (whether bytes or text) can come in multiple adjacent
683 tokens which are automatically concatenated, like in C or Python. This
684 method only consumes one token.
687 if len(text
) < 1 or text
[0] not in ('\'', '"'):
688 raise self
._ParseError
('Expected string.')
690 if len(text
) < 2 or text
[-1] != text
[0]:
691 raise self
._ParseError
('String missing ending quote.')
694 result
= text_encoding
.CUnescape(text
[1:-1])
695 except ValueError, e
:
696 raise self
._ParseError
(str(e
))
700 def ConsumeEnum(self
, field
):
702 result
= ParseEnum(field
, self
.token
)
703 except ValueError, e
:
704 raise self
._ParseError
(str(e
))
708 def ParseErrorPreviousToken(self
, message
):
709 """Creates and *returns* a ParseError for the previously read token.
712 message: A message to set for the exception.
715 A ParseError instance.
717 return ParseError('%d:%d : %s' % (
718 self
._previous
_line
+ 1, self
._previous
_column
+ 1, message
))
720 def _ParseError(self
, message
):
721 """Creates and *returns* a ParseError for the current token."""
722 return ParseError('%d:%d : %s' % (
723 self
._line
+ 1, self
._column
+ 1, message
))
725 def _StringParseError(self
, e
):
726 return self
._ParseError
('Couldn\'t parse string: ' + str(e
))
729 """Reads the next meaningful token."""
730 self
._previous
_line
= self
._line
731 self
._previous
_column
= self
._column
733 self
._column
+= len(self
.token
)
734 self
._SkipWhitespace
()
736 if not self
._more
_lines
:
740 match
= self
._TOKEN
.match(self
._current
_line
, self
._column
)
742 token
= match
.group(0)
745 self
.token
= self
._current
_line
[self
._column
]
748 def ParseInteger(text
, is_signed
=False, is_long
=False):
749 """Parses an integer.
752 text: The text to parse.
753 is_signed: True if a signed integer must be parsed.
754 is_long: True if a long integer must be parsed.
760 ValueError: Thrown Iff the text is not a valid integer.
768 result
= long(text
, 0)
770 result
= int(text
, 0)
772 raise ValueError('Couldn\'t parse integer: %s' % text
)
775 checker
= _INTEGER_CHECKERS
[2 * int(is_long
) + int(is_signed
)]
776 checker
.CheckValue(result
)
780 def ParseFloat(text
):
781 """Parse a floating point number.
790 ValueError: If a floating point number couldn't be parsed.
797 if _FLOAT_INFINITY
.match(text
):
802 elif _FLOAT_NAN
.match(text
):
807 return float(text
.rstrip('f'))
809 raise ValueError('Couldn\'t parse float: %s' % text
)
813 """Parse a boolean value.
819 Boolean values parsed
822 ValueError: If text is not a valid boolean.
824 if text
in ('true', 't', '1'):
826 elif text
in ('false', 'f', '0'):
829 raise ValueError('Expected "true" or "false".')
832 def ParseEnum(field
, value
):
833 """Parse an enum value.
835 The value can be specified by a number (the enum value), or by
836 a string literal (the enum name).
839 field: Enum field descriptor.
846 ValueError: If the enum value could not be parsed.
848 enum_descriptor
= field
.enum_type
850 number
= int(value
, 0)
853 enum_value
= enum_descriptor
.values_by_name
.get(value
, None)
854 if enum_value
is None:
856 'Enum type "%s" has no value named %s.' % (
857 enum_descriptor
.full_name
, value
))
860 enum_value
= enum_descriptor
.values_by_number
.get(number
, None)
861 if enum_value
is None:
863 'Enum type "%s" has no value with number %d.' % (
864 enum_descriptor
.full_name
, number
))
865 return enum_value
.number