3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
21 """Contains routines for printing protocol messages in text format."""
27 from google
.net
.proto2
.python
.internal
import type_checkers
28 from google
.net
.proto2
.python
.public
import descriptor
29 from google
.net
.proto2
.python
.public
import text_encoding
31 __all__
= ['MessageToString', 'PrintMessage', 'PrintField',
32 'PrintFieldValue', 'Merge']
35 _INTEGER_CHECKERS
= (type_checkers
.Uint32ValueChecker(),
36 type_checkers
.Int32ValueChecker(),
37 type_checkers
.Uint64ValueChecker(),
38 type_checkers
.Int64ValueChecker())
39 _FLOAT_INFINITY
= re
.compile('-?inf(?:inity)?f?', re
.IGNORECASE
)
40 _FLOAT_NAN
= re
.compile('nanf?', re
.IGNORECASE
)
41 _FLOAT_TYPES
= frozenset([descriptor
.FieldDescriptor
.CPPTYPE_FLOAT
,
42 descriptor
.FieldDescriptor
.CPPTYPE_DOUBLE
])
45 class Error(Exception):
46 """Top-level module error for text_format."""
49 class ParseError(Error
):
50 """Thrown in case of ASCII parsing error."""
53 def MessageToString(message
, as_utf8
=False, as_one_line
=False,
54 pointy_brackets
=False, use_index_order
=False,
56 """Convert protobuf message to text format.
58 Floating point values can be formatted compactly with 15 digits of
59 precision (which is the most that IEEE 754 "double" can guarantee)
60 using float_format='.15g'.
63 message: The protocol buffers message.
64 as_utf8: Produce text output in UTF8 format.
65 as_one_line: Don't introduce newlines between fields.
66 pointy_brackets: If True, use angle brackets instead of curly braces for
68 use_index_order: If True, print fields of a proto message using the order
69 defined in source code instead of the field number. By default, use the
71 float_format: If set, use this to specify floating point number formatting
72 (per the "Format Specification Mini-Language"); otherwise, str() is used.
75 A string of the text formatted protocol buffer message.
77 out
= cStringIO
.StringIO()
78 PrintMessage(message
, out
, as_utf8
=as_utf8
, as_one_line
=as_one_line
,
79 pointy_brackets
=pointy_brackets
,
80 use_index_order
=use_index_order
,
81 float_format
=float_format
)
82 result
= out
.getvalue()
85 return result
.rstrip()
89 def PrintMessage(message
, out
, indent
=0, as_utf8
=False, as_one_line
=False,
90 pointy_brackets
=False, use_index_order
=False,
92 fields
= message
.ListFields()
94 fields
.sort(key
=lambda x
: x
[0].index
)
95 for field
, value
in fields
:
96 if field
.label
== descriptor
.FieldDescriptor
.LABEL_REPEATED
:
98 PrintField(field
, element
, out
, indent
, as_utf8
, as_one_line
,
99 pointy_brackets
=pointy_brackets
,
100 use_index_order
=use_index_order
,
101 float_format
=float_format
)
103 PrintField(field
, value
, out
, indent
, as_utf8
, as_one_line
,
104 pointy_brackets
=pointy_brackets
,
105 use_index_order
=use_index_order
,
106 float_format
=float_format
)
109 def PrintField(field
, value
, out
, indent
=0, as_utf8
=False, as_one_line
=False,
110 pointy_brackets
=False, use_index_order
=False, float_format
=None):
111 """Print a single field name/value pair. For repeated fields, the value
112 should be a single element."""
114 out
.write(' ' * indent
)
115 if field
.is_extension
:
117 if (field
.containing_type
.GetOptions().message_set_wire_format
and
118 field
.type == descriptor
.FieldDescriptor
.TYPE_MESSAGE
and
119 field
.message_type
== field
.extension_scope
and
120 field
.label
== descriptor
.FieldDescriptor
.LABEL_OPTIONAL
):
121 out
.write(field
.message_type
.full_name
)
123 out
.write(field
.full_name
)
125 elif field
.type == descriptor
.FieldDescriptor
.TYPE_GROUP
:
127 out
.write(field
.message_type
.name
)
129 out
.write(field
.name
)
131 if field
.cpp_type
!= descriptor
.FieldDescriptor
.CPPTYPE_MESSAGE
:
136 PrintFieldValue(field
, value
, out
, indent
, as_utf8
, as_one_line
,
137 pointy_brackets
=pointy_brackets
,
138 use_index_order
=use_index_order
,
139 float_format
=float_format
)
146 def PrintFieldValue(field
, value
, out
, indent
=0, as_utf8
=False,
147 as_one_line
=False, pointy_brackets
=False,
148 use_index_order
=False,
150 """Print a single field value (not including name). For repeated fields,
151 the value should be a single element."""
160 if field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_MESSAGE
:
162 out
.write(' %s ' % openb
)
163 PrintMessage(value
, out
, indent
, as_utf8
, as_one_line
,
164 pointy_brackets
=pointy_brackets
,
165 use_index_order
=use_index_order
,
166 float_format
=float_format
)
169 out
.write(' %s\n' % openb
)
170 PrintMessage(value
, out
, indent
+ 2, as_utf8
, as_one_line
,
171 pointy_brackets
=pointy_brackets
,
172 use_index_order
=use_index_order
,
173 float_format
=float_format
)
174 out
.write(' ' * indent
+ closeb
)
175 elif field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_ENUM
:
176 enum_value
= field
.enum_type
.values_by_number
.get(value
, None)
177 if enum_value
is not None:
178 out
.write(enum_value
.name
)
180 out
.write(str(value
))
181 elif field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_STRING
:
183 if isinstance(value
, unicode):
184 out_value
= value
.encode('utf-8')
187 if field
.type == descriptor
.FieldDescriptor
.TYPE_BYTES
:
191 out_as_utf8
= as_utf8
192 out
.write(text_encoding
.CEscape(out_value
, out_as_utf8
))
194 elif field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_BOOL
:
199 elif field
.cpp_type
in _FLOAT_TYPES
and float_format
is not None:
200 out
.write('{1:{0}}'.format(float_format
, value
))
202 out
.write(str(value
))
205 def _ParseOrMerge(lines
, message
, allow_multiple_scalars
):
206 """Converts an ASCII representation of a protocol message into a message.
209 lines: Lines of a message's ASCII representation.
210 message: A protocol buffer message to merge into.
211 allow_multiple_scalars: Determines if repeated values for a non-repeated
212 field are permitted, e.g., the string "foo: 1 foo: 2" for a
213 required/optional field named "foo".
216 ParseError: On ASCII parsing problems.
218 tokenizer
= _Tokenizer(lines
)
219 while not tokenizer
.AtEnd():
220 _MergeField(tokenizer
, message
, allow_multiple_scalars
)
223 def Parse(text
, message
):
224 """Parses an ASCII representation of a protocol message into a message.
227 text: Message ASCII representation.
228 message: A protocol buffer message to merge into.
231 The same message passed as argument.
234 ParseError: On ASCII parsing problems.
236 if not isinstance(text
, str): text
= text
.decode('utf-8')
237 return ParseLines(text
.split('\n'), message
)
240 def Merge(text
, message
):
241 """Parses an ASCII representation of a protocol message into a message.
243 Like Parse(), but allows repeated values for a non-repeated field, and uses
247 text: Message ASCII representation.
248 message: A protocol buffer message to merge into.
251 The same message passed as argument.
254 ParseError: On ASCII parsing problems.
256 return MergeLines(text
.split('\n'), message
)
259 def ParseLines(lines
, message
):
260 """Parses an ASCII representation of a protocol message into a message.
263 lines: An iterable of lines of a message's ASCII representation.
264 message: A protocol buffer message to merge into.
267 The same message passed as argument.
270 ParseError: On ASCII parsing problems.
272 _ParseOrMerge(lines
, message
, False)
276 def MergeLines(lines
, message
):
277 """Parses an ASCII representation of a protocol message into a message.
280 lines: An iterable of lines of a message's ASCII representation.
281 message: A protocol buffer message to merge into.
284 The same message passed as argument.
287 ParseError: On ASCII parsing problems.
289 _ParseOrMerge(lines
, message
, True)
293 def _MergeField(tokenizer
, message
, allow_multiple_scalars
):
294 """Merges a single protocol message field into a message.
297 tokenizer: A tokenizer to parse the field name and values.
298 message: A protocol message to record the data.
299 allow_multiple_scalars: Determines if repeated values for a non-repeated
300 field are permitted, e.g., the string "foo: 1 foo: 2" for a
301 required/optional field named "foo".
304 ParseError: In case of ASCII parsing problems.
306 message_descriptor
= message
.DESCRIPTOR
307 if tokenizer
.TryConsume('['):
308 name
= [tokenizer
.ConsumeIdentifier()]
309 while tokenizer
.TryConsume('.'):
310 name
.append(tokenizer
.ConsumeIdentifier())
311 name
= '.'.join(name
)
313 if not message_descriptor
.is_extendable
:
314 raise tokenizer
.ParseErrorPreviousToken(
315 'Message type "%s" does not have extensions.' %
316 message_descriptor
.full_name
)
318 field
= message
.Extensions
._FindExtensionByName
(name
)
321 raise tokenizer
.ParseErrorPreviousToken(
322 'Extension "%s" not registered.' % name
)
323 elif message_descriptor
!= field
.containing_type
:
324 raise tokenizer
.ParseErrorPreviousToken(
325 'Extension "%s" does not extend message type "%s".' % (
326 name
, message_descriptor
.full_name
))
327 tokenizer
.Consume(']')
329 name
= tokenizer
.ConsumeIdentifier()
330 field
= message_descriptor
.fields_by_name
.get(name
, None)
336 field
= message_descriptor
.fields_by_name
.get(name
.lower(), None)
337 if field
and field
.type != descriptor
.FieldDescriptor
.TYPE_GROUP
:
340 if (field
and field
.type == descriptor
.FieldDescriptor
.TYPE_GROUP
and
341 field
.message_type
.name
!= name
):
345 raise tokenizer
.ParseErrorPreviousToken(
346 'Message type "%s" has no field named "%s".' % (
347 message_descriptor
.full_name
, name
))
349 if field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_MESSAGE
:
350 tokenizer
.TryConsume(':')
352 if tokenizer
.TryConsume('<'):
355 tokenizer
.Consume('{')
358 if field
.label
== descriptor
.FieldDescriptor
.LABEL_REPEATED
:
359 if field
.is_extension
:
360 sub_message
= message
.Extensions
[field
].add()
362 sub_message
= getattr(message
, field
.name
).add()
364 if field
.is_extension
:
365 sub_message
= message
.Extensions
[field
]
367 sub_message
= getattr(message
, field
.name
)
368 sub_message
.SetInParent()
370 while not tokenizer
.TryConsume(end_token
):
371 if tokenizer
.AtEnd():
372 raise tokenizer
.ParseErrorPreviousToken('Expected "%s".' % (end_token
))
373 _MergeField(tokenizer
, sub_message
, allow_multiple_scalars
)
375 _MergeScalarField(tokenizer
, message
, field
, allow_multiple_scalars
)
379 if not tokenizer
.TryConsume(','):
380 tokenizer
.TryConsume(';')
383 def _MergeScalarField(tokenizer
, message
, field
, allow_multiple_scalars
):
384 """Merges a single protocol message scalar field into a message.
387 tokenizer: A tokenizer to parse the field value.
388 message: A protocol message to record the data.
389 field: The descriptor of the field to be merged.
390 allow_multiple_scalars: Determines if repeated values for a non-repeated
391 field are permitted, e.g., the string "foo: 1 foo: 2" for a
392 required/optional field named "foo".
395 ParseError: In case of ASCII parsing problems.
396 RuntimeError: On runtime errors.
398 tokenizer
.Consume(':')
401 if field
.type in (descriptor
.FieldDescriptor
.TYPE_INT32
,
402 descriptor
.FieldDescriptor
.TYPE_SINT32
,
403 descriptor
.FieldDescriptor
.TYPE_SFIXED32
):
404 value
= tokenizer
.ConsumeInt32()
405 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_INT64
,
406 descriptor
.FieldDescriptor
.TYPE_SINT64
,
407 descriptor
.FieldDescriptor
.TYPE_SFIXED64
):
408 value
= tokenizer
.ConsumeInt64()
409 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_UINT32
,
410 descriptor
.FieldDescriptor
.TYPE_FIXED32
):
411 value
= tokenizer
.ConsumeUint32()
412 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_UINT64
,
413 descriptor
.FieldDescriptor
.TYPE_FIXED64
):
414 value
= tokenizer
.ConsumeUint64()
415 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_FLOAT
,
416 descriptor
.FieldDescriptor
.TYPE_DOUBLE
):
417 value
= tokenizer
.ConsumeFloat()
418 elif field
.type == descriptor
.FieldDescriptor
.TYPE_BOOL
:
419 value
= tokenizer
.ConsumeBool()
420 elif field
.type == descriptor
.FieldDescriptor
.TYPE_STRING
:
421 value
= tokenizer
.ConsumeString()
422 elif field
.type == descriptor
.FieldDescriptor
.TYPE_BYTES
:
423 value
= tokenizer
.ConsumeByteString()
424 elif field
.type == descriptor
.FieldDescriptor
.TYPE_ENUM
:
425 value
= tokenizer
.ConsumeEnum(field
)
427 raise RuntimeError('Unknown field type %d' % field
.type)
429 if field
.label
== descriptor
.FieldDescriptor
.LABEL_REPEATED
:
430 if field
.is_extension
:
431 message
.Extensions
[field
].append(value
)
433 getattr(message
, field
.name
).append(value
)
435 if field
.is_extension
:
436 if not allow_multiple_scalars
and message
.HasExtension(field
):
437 raise tokenizer
.ParseErrorPreviousToken(
438 'Message type "%s" should not have multiple "%s" extensions.' %
439 (message
.DESCRIPTOR
.full_name
, field
.full_name
))
441 message
.Extensions
[field
] = value
443 if not allow_multiple_scalars
and message
.HasField(field
.name
):
444 raise tokenizer
.ParseErrorPreviousToken(
445 'Message type "%s" should not have multiple "%s" fields.' %
446 (message
.DESCRIPTOR
.full_name
, field
.name
))
448 setattr(message
, field
.name
, value
)
451 class _Tokenizer(object):
452 """Protocol buffer ASCII representation tokenizer.
454 This class handles the lower level string parsing by splitting it into
457 It was directly ported from the Java protocol buffer API.
460 _WHITESPACE
= re
.compile('(\\s|(#.*$))+', re
.MULTILINE
)
462 '[a-zA-Z_][0-9a-zA-Z_+-]*|'
463 '[0-9+-][0-9a-zA-Z_.+-]*|'
464 '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'
465 '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')
466 _IDENTIFIER
= re
.compile(r
'\w+')
468 def __init__(self
, lines
):
472 self
._token
_start
= None
474 self
._lines
= iter(lines
)
475 self
._current
_line
= ''
476 self
._previous
_line
= 0
477 self
._previous
_column
= 0
478 self
._more
_lines
= True
479 self
._SkipWhitespace
()
483 """Checks the end of the text was reached.
486 True iff the end was reached.
488 return not self
.token
491 while len(self
._current
_line
) <= self
._column
:
493 self
._current
_line
= self
._lines
.next()
494 except StopIteration:
495 self
._current
_line
= ''
496 self
._more
_lines
= False
502 def _SkipWhitespace(self
):
505 match
= self
._WHITESPACE
.match(self
._current
_line
, self
._column
)
508 length
= len(match
.group(0))
509 self
._column
+= length
511 def TryConsume(self
, token
):
512 """Tries to consume a given piece of text.
515 token: Text to consume.
518 True iff the text was consumed.
520 if self
.token
== token
:
525 def Consume(self
, token
):
526 """Consumes a piece of text.
529 token: Text to consume.
532 ParseError: If the text couldn't be consumed.
534 if not self
.TryConsume(token
):
535 raise self
._ParseError
('Expected "%s".' % token
)
537 def ConsumeIdentifier(self
):
538 """Consumes protocol message field identifier.
544 ParseError: If an identifier couldn't be consumed.
547 if not self
._IDENTIFIER
.match(result
):
548 raise self
._ParseError
('Expected identifier.')
552 def ConsumeInt32(self
):
553 """Consumes a signed 32bit integer number.
559 ParseError: If a signed 32bit integer couldn't be consumed.
562 result
= ParseInteger(self
.token
, is_signed
=True, is_long
=False)
563 except ValueError, e
:
564 raise self
._ParseError
(str(e
))
568 def ConsumeUint32(self
):
569 """Consumes an unsigned 32bit integer number.
575 ParseError: If an unsigned 32bit integer couldn't be consumed.
578 result
= ParseInteger(self
.token
, is_signed
=False, is_long
=False)
579 except ValueError, e
:
580 raise self
._ParseError
(str(e
))
584 def ConsumeInt64(self
):
585 """Consumes a signed 64bit integer number.
591 ParseError: If a signed 64bit integer couldn't be consumed.
594 result
= ParseInteger(self
.token
, is_signed
=True, is_long
=True)
595 except ValueError, e
:
596 raise self
._ParseError
(str(e
))
600 def ConsumeUint64(self
):
601 """Consumes an unsigned 64bit integer number.
607 ParseError: If an unsigned 64bit integer couldn't be consumed.
610 result
= ParseInteger(self
.token
, is_signed
=False, is_long
=True)
611 except ValueError, e
:
612 raise self
._ParseError
(str(e
))
616 def ConsumeFloat(self
):
617 """Consumes an floating point number.
623 ParseError: If a floating point number couldn't be consumed.
626 result
= ParseFloat(self
.token
)
627 except ValueError, e
:
628 raise self
._ParseError
(str(e
))
632 def ConsumeBool(self
):
633 """Consumes a boolean value.
639 ParseError: If a boolean value couldn't be consumed.
642 result
= ParseBool(self
.token
)
643 except ValueError, e
:
644 raise self
._ParseError
(str(e
))
648 def ConsumeString(self
):
649 """Consumes a string value.
655 ParseError: If a string value couldn't be consumed.
657 the_bytes
= self
.ConsumeByteString()
659 return unicode(the_bytes
, 'utf-8')
660 except UnicodeDecodeError, e
:
661 raise self
._StringParseError
(e
)
663 def ConsumeByteString(self
):
664 """Consumes a byte array value.
667 The array parsed (as a string).
670 ParseError: If a byte array value couldn't be consumed.
672 the_list
= [self
._ConsumeSingleByteString
()]
673 while self
.token
and self
.token
[0] in ('\'', '"'):
674 the_list
.append(self
._ConsumeSingleByteString
())
675 return ''.encode('latin1').join(the_list
)
678 def _ConsumeSingleByteString(self
):
679 """Consume one token of a string literal.
681 String literals (whether bytes or text) can come in multiple adjacent
682 tokens which are automatically concatenated, like in C or Python. This
683 method only consumes one token.
686 if len(text
) < 1 or text
[0] not in ('\'', '"'):
687 raise self
._ParseError
('Expected string.')
689 if len(text
) < 2 or text
[-1] != text
[0]:
690 raise self
._ParseError
('String missing ending quote.')
693 result
= text_encoding
.CUnescape(text
[1:-1])
694 except ValueError, e
:
695 raise self
._ParseError
(str(e
))
699 def ConsumeEnum(self
, field
):
701 result
= ParseEnum(field
, self
.token
)
702 except ValueError, e
:
703 raise self
._ParseError
(str(e
))
707 def ParseErrorPreviousToken(self
, message
):
708 """Creates and *returns* a ParseError for the previously read token.
711 message: A message to set for the exception.
714 A ParseError instance.
716 return ParseError('%d:%d : %s' % (
717 self
._previous
_line
+ 1, self
._previous
_column
+ 1, message
))
719 def _ParseError(self
, message
):
720 """Creates and *returns* a ParseError for the current token."""
721 return ParseError('%d:%d : %s' % (
722 self
._line
+ 1, self
._column
+ 1, message
))
724 def _StringParseError(self
, e
):
725 return self
._ParseError
('Couldn\'t parse string: ' + str(e
))
728 """Reads the next meaningful token."""
729 self
._previous
_line
= self
._line
730 self
._previous
_column
= self
._column
732 self
._column
+= len(self
.token
)
733 self
._SkipWhitespace
()
735 if not self
._more
_lines
:
739 match
= self
._TOKEN
.match(self
._current
_line
, self
._column
)
741 token
= match
.group(0)
744 self
.token
= self
._current
_line
[self
._column
]
747 def ParseInteger(text
, is_signed
=False, is_long
=False):
748 """Parses an integer.
751 text: The text to parse.
752 is_signed: True if a signed integer must be parsed.
753 is_long: True if a long integer must be parsed.
759 ValueError: Thrown Iff the text is not a valid integer.
767 result
= long(text
, 0)
769 result
= int(text
, 0)
771 raise ValueError('Couldn\'t parse integer: %s' % text
)
774 checker
= _INTEGER_CHECKERS
[2 * int(is_long
) + int(is_signed
)]
775 checker
.CheckValue(result
)
779 def ParseFloat(text
):
780 """Parse a floating point number.
789 ValueError: If a floating point number couldn't be parsed.
796 if _FLOAT_INFINITY
.match(text
):
801 elif _FLOAT_NAN
.match(text
):
806 return float(text
.rstrip('f'))
808 raise ValueError('Couldn\'t parse float: %s' % text
)
812 """Parse a boolean value.
818 Boolean values parsed
821 ValueError: If text is not a valid boolean.
823 if text
in ('true', 't', '1'):
825 elif text
in ('false', 'f', '0'):
828 raise ValueError('Expected "true" or "false".')
831 def ParseEnum(field
, value
):
832 """Parse an enum value.
834 The value can be specified by a number (the enum value), or by
835 a string literal (the enum name).
838 field: Enum field descriptor.
845 ValueError: If the enum value could not be parsed.
847 enum_descriptor
= field
.enum_type
849 number
= int(value
, 0)
852 enum_value
= enum_descriptor
.values_by_name
.get(value
, None)
853 if enum_value
is None:
855 'Enum type "%s" has no value named %s.' % (
856 enum_descriptor
.full_name
, value
))
859 enum_value
= enum_descriptor
.values_by_number
.get(number
, None)
860 if enum_value
is None:
862 'Enum type "%s" has no value with number %d.' % (
863 enum_descriptor
.full_name
, number
))
864 return enum_value
.number