3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
19 """Contains routines for printing protocol messages in text format."""
22 from collections
import deque
26 from google
.net
.proto2
.python
.internal
import type_checkers
27 from google
.net
.proto2
.python
.public
import descriptor
29 __all__
= ['MessageToString', 'PrintMessage', 'PrintField',
30 'PrintFieldValue', 'Merge']
33 _INTEGER_CHECKERS
= (type_checkers
.Uint32ValueChecker(),
34 type_checkers
.Int32ValueChecker(),
35 type_checkers
.Uint64ValueChecker(),
36 type_checkers
.Int64ValueChecker())
37 _FLOAT_INFINITY
= re
.compile('-?inf(?:inity)?f?', re
.IGNORECASE
)
38 _FLOAT_NAN
= re
.compile('nanf?', re
.IGNORECASE
)
41 class ParseError(Exception):
42 """Thrown in case of ASCII parsing error."""
45 def MessageToString(message
, as_utf8
=False, as_one_line
=False,
46 pointy_brackets
=False):
47 out
= cStringIO
.StringIO()
48 PrintMessage(message
, out
, as_utf8
=as_utf8
, as_one_line
=as_one_line
,
49 pointy_brackets
=pointy_brackets
)
50 result
= out
.getvalue()
53 return result
.rstrip()
57 def PrintMessage(message
, out
, indent
=0, as_utf8
=False, as_one_line
=False,
58 pointy_brackets
=False):
59 for field
, value
in message
.ListFields():
60 if field
.label
== descriptor
.FieldDescriptor
.LABEL_REPEATED
:
62 PrintField(field
, element
, out
, indent
, as_utf8
, as_one_line
,
63 pointy_brackets
=pointy_brackets
)
65 PrintField(field
, value
, out
, indent
, as_utf8
, as_one_line
,
66 pointy_brackets
=pointy_brackets
)
69 def PrintField(field
, value
, out
, indent
=0, as_utf8
=False, as_one_line
=False,
70 pointy_brackets
=False):
71 """Print a single field name/value pair. For repeated fields, the value
72 should be a single element."""
74 out
.write(' ' * indent
)
75 if field
.is_extension
:
77 if (field
.containing_type
.GetOptions().message_set_wire_format
and
78 field
.type == descriptor
.FieldDescriptor
.TYPE_MESSAGE
and
79 field
.message_type
== field
.extension_scope
and
80 field
.label
== descriptor
.FieldDescriptor
.LABEL_OPTIONAL
):
81 out
.write(field
.message_type
.full_name
)
83 out
.write(field
.full_name
)
85 elif field
.type == descriptor
.FieldDescriptor
.TYPE_GROUP
:
87 out
.write(field
.message_type
.name
)
91 if field
.cpp_type
!= descriptor
.FieldDescriptor
.CPPTYPE_MESSAGE
:
96 PrintFieldValue(field
, value
, out
, indent
, as_utf8
, as_one_line
,
97 pointy_brackets
=pointy_brackets
)
104 def PrintFieldValue(field
, value
, out
, indent
=0, as_utf8
=False,
105 as_one_line
=False, pointy_brackets
=False):
106 """Print a single field value (not including name). For repeated fields,
107 the value should be a single element."""
116 if field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_MESSAGE
:
118 out
.write(' %s ' % openb
)
119 PrintMessage(value
, out
, indent
, as_utf8
, as_one_line
,
120 pointy_brackets
=pointy_brackets
)
123 out
.write(' %s\n' % openb
)
124 PrintMessage(value
, out
, indent
+ 2, as_utf8
, as_one_line
,
125 pointy_brackets
=pointy_brackets
)
126 out
.write(' ' * indent
+ closeb
)
127 elif field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_ENUM
:
128 enum_value
= field
.enum_type
.values_by_number
.get(value
, None)
129 if enum_value
is not None:
130 out
.write(enum_value
.name
)
132 out
.write(str(value
))
133 elif field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_STRING
:
135 if type(value
) is unicode:
136 out_value
= value
.encode('utf-8')
139 if field
.type == descriptor
.FieldDescriptor
.TYPE_BYTES
:
143 out_as_utf8
= as_utf8
144 out
.write(_CEscape(out_value
, out_as_utf8
))
146 elif field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_BOOL
:
152 out
.write(str(value
))
155 def _ParseOrMerge(text
, message
, allow_multiple_scalars
):
156 """Converts an ASCII representation of a protocol message into a message.
159 text: Message ASCII representation.
160 message: A protocol buffer message to merge into.
161 allow_multiple_scalars: Determines if repeated values for a non-repeated
162 field are permitted, e.g., the string "foo: 1 foo: 2" for a
163 required/optional field named "foo".
166 ParseError: On ASCII parsing problems.
168 tokenizer
= _Tokenizer(text
)
169 while not tokenizer
.AtEnd():
170 _MergeField(tokenizer
, message
, allow_multiple_scalars
)
173 def Parse(text
, message
):
174 """Parses an ASCII representation of a protocol message into a message.
177 text: Message ASCII representation.
178 message: A protocol buffer message to merge into.
181 ParseError: On ASCII parsing problems.
184 _ParseOrMerge(text
, message
, False)
187 def Merge(text
, message
):
188 """Parses an ASCII representation of a protocol message into a message.
190 Like Parse(), but allows repeated values for a non-repeated field, and uses
194 text: Message ASCII representation.
195 message: A protocol buffer message to merge into.
198 ParseError: On ASCII parsing problems.
201 _ParseOrMerge(text
, message
, True)
204 def _MergeField(tokenizer
, message
, allow_multiple_scalars
):
205 """Merges a single protocol message field into a message.
208 tokenizer: A tokenizer to parse the field name and values.
209 message: A protocol message to record the data.
210 allow_multiple_scalars: Determines if repeated values for a non-repeated
211 field are permitted, e.g., the string "foo: 1 foo: 2" for a
212 required/optional field named "foo".
215 ParseError: In case of ASCII parsing problems.
217 message_descriptor
= message
.DESCRIPTOR
218 if tokenizer
.TryConsume('['):
219 name
= [tokenizer
.ConsumeIdentifier()]
220 while tokenizer
.TryConsume('.'):
221 name
.append(tokenizer
.ConsumeIdentifier())
222 name
= '.'.join(name
)
224 if not message_descriptor
.is_extendable
:
225 raise tokenizer
.ParseErrorPreviousToken(
226 'Message type "%s" does not have extensions.' %
227 message_descriptor
.full_name
)
229 field
= message
.Extensions
._FindExtensionByName
(name
)
232 raise tokenizer
.ParseErrorPreviousToken(
233 'Extension "%s" not registered.' % name
)
234 elif message_descriptor
!= field
.containing_type
:
235 raise tokenizer
.ParseErrorPreviousToken(
236 'Extension "%s" does not extend message type "%s".' % (
237 name
, message_descriptor
.full_name
))
238 tokenizer
.Consume(']')
240 name
= tokenizer
.ConsumeIdentifier()
241 field
= message_descriptor
.fields_by_name
.get(name
, None)
247 field
= message_descriptor
.fields_by_name
.get(name
.lower(), None)
248 if field
and field
.type != descriptor
.FieldDescriptor
.TYPE_GROUP
:
251 if (field
and field
.type == descriptor
.FieldDescriptor
.TYPE_GROUP
and
252 field
.message_type
.name
!= name
):
256 raise tokenizer
.ParseErrorPreviousToken(
257 'Message type "%s" has no field named "%s".' % (
258 message_descriptor
.full_name
, name
))
260 if field
.cpp_type
== descriptor
.FieldDescriptor
.CPPTYPE_MESSAGE
:
261 tokenizer
.TryConsume(':')
263 if tokenizer
.TryConsume('<'):
266 tokenizer
.Consume('{')
269 if field
.label
== descriptor
.FieldDescriptor
.LABEL_REPEATED
:
270 if field
.is_extension
:
271 sub_message
= message
.Extensions
[field
].add()
273 sub_message
= getattr(message
, field
.name
).add()
275 if field
.is_extension
:
276 sub_message
= message
.Extensions
[field
]
278 sub_message
= getattr(message
, field
.name
)
279 sub_message
.SetInParent()
281 while not tokenizer
.TryConsume(end_token
):
282 if tokenizer
.AtEnd():
283 raise tokenizer
.ParseErrorPreviousToken('Expected "%s".' % (end_token
))
284 _MergeField(tokenizer
, sub_message
, allow_multiple_scalars
)
286 _MergeScalarField(tokenizer
, message
, field
, allow_multiple_scalars
)
290 if not tokenizer
.TryConsume(','):
291 tokenizer
.TryConsume(';')
294 def _MergeScalarField(tokenizer
, message
, field
, allow_multiple_scalars
):
295 """Merges a single protocol message scalar field into a message.
298 tokenizer: A tokenizer to parse the field value.
299 message: A protocol message to record the data.
300 field: The descriptor of the field to be merged.
301 allow_multiple_scalars: Determines if repeated values for a non-repeated
302 field are permitted, e.g., the string "foo: 1 foo: 2" for a
303 required/optional field named "foo".
306 ParseError: In case of ASCII parsing problems.
307 RuntimeError: On runtime errors.
309 tokenizer
.Consume(':')
312 if field
.type in (descriptor
.FieldDescriptor
.TYPE_INT32
,
313 descriptor
.FieldDescriptor
.TYPE_SINT32
,
314 descriptor
.FieldDescriptor
.TYPE_SFIXED32
):
315 value
= tokenizer
.ConsumeInt32()
316 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_INT64
,
317 descriptor
.FieldDescriptor
.TYPE_SINT64
,
318 descriptor
.FieldDescriptor
.TYPE_SFIXED64
):
319 value
= tokenizer
.ConsumeInt64()
320 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_UINT32
,
321 descriptor
.FieldDescriptor
.TYPE_FIXED32
):
322 value
= tokenizer
.ConsumeUint32()
323 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_UINT64
,
324 descriptor
.FieldDescriptor
.TYPE_FIXED64
):
325 value
= tokenizer
.ConsumeUint64()
326 elif field
.type in (descriptor
.FieldDescriptor
.TYPE_FLOAT
,
327 descriptor
.FieldDescriptor
.TYPE_DOUBLE
):
328 value
= tokenizer
.ConsumeFloat()
329 elif field
.type == descriptor
.FieldDescriptor
.TYPE_BOOL
:
330 value
= tokenizer
.ConsumeBool()
331 elif field
.type == descriptor
.FieldDescriptor
.TYPE_STRING
:
332 value
= tokenizer
.ConsumeString()
333 elif field
.type == descriptor
.FieldDescriptor
.TYPE_BYTES
:
334 value
= tokenizer
.ConsumeByteString()
335 elif field
.type == descriptor
.FieldDescriptor
.TYPE_ENUM
:
336 value
= tokenizer
.ConsumeEnum(field
)
338 raise RuntimeError('Unknown field type %d' % field
.type)
340 if field
.label
== descriptor
.FieldDescriptor
.LABEL_REPEATED
:
341 if field
.is_extension
:
342 message
.Extensions
[field
].append(value
)
344 getattr(message
, field
.name
).append(value
)
346 if field
.is_extension
:
347 if not allow_multiple_scalars
and message
.HasExtension(field
):
348 raise tokenizer
.ParseErrorPreviousToken(
349 'Message type "%s" should not have multiple "%s" extensions.' %
350 (message
.DESCRIPTOR
.full_name
, field
.full_name
))
352 message
.Extensions
[field
] = value
354 if not allow_multiple_scalars
and message
.HasField(field
.name
):
355 raise tokenizer
.ParseErrorPreviousToken(
356 'Message type "%s" should not have multiple "%s" fields.' %
357 (message
.DESCRIPTOR
.full_name
, field
.name
))
359 setattr(message
, field
.name
, value
)
362 class _Tokenizer(object):
363 """Protocol buffer ASCII representation tokenizer.
365 This class handles the lower level string parsing by splitting it into
368 It was directly ported from the Java protocol buffer API.
371 _WHITESPACE
= re
.compile('(\\s|(#.*$))+', re
.MULTILINE
)
373 '[a-zA-Z_][0-9a-zA-Z_+-]*|'
374 '[0-9+-][0-9a-zA-Z_.+-]*|'
375 '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'
376 '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')
377 _IDENTIFIER
= re
.compile(r
'\w+')
379 def __init__(self
, text_message
):
380 self
._text
_message
= text_message
385 self
._token
_start
= None
387 self
._lines
= deque(text_message
.split('\n'))
388 self
._current
_line
= ''
389 self
._previous
_line
= 0
390 self
._previous
_column
= 0
391 self
._SkipWhitespace
()
395 """Checks the end of the text was reached.
398 True iff the end was reached.
400 return not self
.token
403 while len(self
._current
_line
) <= self
._column
:
405 self
._current
_line
= ''
409 self
._current
_line
= self
._lines
.popleft()
411 def _SkipWhitespace(self
):
414 match
= self
._WHITESPACE
.match(self
._current
_line
, self
._column
)
417 length
= len(match
.group(0))
418 self
._column
+= length
420 def TryConsume(self
, token
):
421 """Tries to consume a given piece of text.
424 token: Text to consume.
427 True iff the text was consumed.
429 if self
.token
== token
:
434 def Consume(self
, token
):
435 """Consumes a piece of text.
438 token: Text to consume.
441 ParseError: If the text couldn't be consumed.
443 if not self
.TryConsume(token
):
444 raise self
._ParseError
('Expected "%s".' % token
)
446 def ConsumeIdentifier(self
):
447 """Consumes protocol message field identifier.
453 ParseError: If an identifier couldn't be consumed.
456 if not self
._IDENTIFIER
.match(result
):
457 raise self
._ParseError
('Expected identifier.')
461 def ConsumeInt32(self
):
462 """Consumes a signed 32bit integer number.
468 ParseError: If a signed 32bit integer couldn't be consumed.
471 result
= ParseInteger(self
.token
, is_signed
=True, is_long
=False)
472 except ValueError, e
:
473 raise self
._ParseError
(str(e
))
477 def ConsumeUint32(self
):
478 """Consumes an unsigned 32bit integer number.
484 ParseError: If an unsigned 32bit integer couldn't be consumed.
487 result
= ParseInteger(self
.token
, is_signed
=False, is_long
=False)
488 except ValueError, e
:
489 raise self
._ParseError
(str(e
))
493 def ConsumeInt64(self
):
494 """Consumes a signed 64bit integer number.
500 ParseError: If a signed 64bit integer couldn't be consumed.
503 result
= ParseInteger(self
.token
, is_signed
=True, is_long
=True)
504 except ValueError, e
:
505 raise self
._ParseError
(str(e
))
509 def ConsumeUint64(self
):
510 """Consumes an unsigned 64bit integer number.
516 ParseError: If an unsigned 64bit integer couldn't be consumed.
519 result
= ParseInteger(self
.token
, is_signed
=False, is_long
=True)
520 except ValueError, e
:
521 raise self
._ParseError
(str(e
))
525 def ConsumeFloat(self
):
526 """Consumes an floating point number.
532 ParseError: If a floating point number couldn't be consumed.
535 result
= ParseFloat(self
.token
)
536 except ValueError, e
:
537 raise self
._ParseError
(str(e
))
541 def ConsumeBool(self
):
542 """Consumes a boolean value.
548 ParseError: If a boolean value couldn't be consumed.
551 result
= ParseBool(self
.token
)
552 except ValueError, e
:
553 raise self
._ParseError
(str(e
))
557 def ConsumeString(self
):
558 """Consumes a string value.
564 ParseError: If a string value couldn't be consumed.
566 the_bytes
= self
.ConsumeByteString()
568 return unicode(the_bytes
, 'utf-8')
569 except UnicodeDecodeError, e
:
570 raise self
._StringParseError
(e
)
572 def ConsumeByteString(self
):
573 """Consumes a byte array value.
576 The array parsed (as a string).
579 ParseError: If a byte array value couldn't be consumed.
581 the_list
= [self
._ConsumeSingleByteString
()]
582 while self
.token
and self
.token
[0] in ('\'', '"'):
583 the_list
.append(self
._ConsumeSingleByteString
())
584 return ''.join(the_list
)
586 def _ConsumeSingleByteString(self
):
587 """Consume one token of a string literal.
589 String literals (whether bytes or text) can come in multiple adjacent
590 tokens which are automatically concatenated, like in C or Python. This
591 method only consumes one token.
594 if len(text
) < 1 or text
[0] not in ('\'', '"'):
595 raise self
._ParseError
('Expected string.')
597 if len(text
) < 2 or text
[-1] != text
[0]:
598 raise self
._ParseError
('String missing ending quote.')
601 result
= _CUnescape(text
[1:-1])
602 except ValueError, e
:
603 raise self
._ParseError
(str(e
))
607 def ConsumeEnum(self
, field
):
609 result
= ParseEnum(field
, self
.token
)
610 except ValueError, e
:
611 raise self
._ParseError
(str(e
))
615 def ParseErrorPreviousToken(self
, message
):
616 """Creates and *returns* a ParseError for the previously read token.
619 message: A message to set for the exception.
622 A ParseError instance.
624 return ParseError('%d:%d : %s' % (
625 self
._previous
_line
+ 1, self
._previous
_column
+ 1, message
))
627 def _ParseError(self
, message
):
628 """Creates and *returns* a ParseError for the current token."""
629 return ParseError('%d:%d : %s' % (
630 self
._line
+ 1, self
._column
+ 1, message
))
632 def _StringParseError(self
, e
):
633 return self
._ParseError
('Couldn\'t parse string: ' + str(e
))
636 """Reads the next meaningful token."""
637 self
._previous
_line
= self
._line
638 self
._previous
_column
= self
._column
640 self
._column
+= len(self
.token
)
641 self
._SkipWhitespace
()
643 if not self
._lines
and len(self
._current
_line
) <= self
._column
:
647 match
= self
._TOKEN
.match(self
._current
_line
, self
._column
)
649 token
= match
.group(0)
652 self
.token
= self
._current
_line
[self
._column
]
660 def _CEscape(text
, as_utf8
):
663 if o
== 10: return r
'\n'
664 if o
== 13: return r
'\r'
665 if o
== 9: return r
'\t'
666 if o
== 39: return r
"\'"
668 if o
== 34: return r
'\"'
669 if o
== 92: return r
'\\'
672 if not as_utf8
and (o
>= 127 or o
< 32):
675 return ''.join([escape(c
) for c
in text
])
678 _CUNESCAPE_HEX
= re
.compile(r
'(\\+)x([0-9a-fA-F])(?![0-9a-fA-F])')
681 def _CUnescape(text
):
685 if len(m
.group(1)) & 1:
686 return m
.group(1) + 'x0' + m
.group(2)
691 result
= _CUNESCAPE_HEX
.sub(ReplaceHex
, text
)
692 return result
.decode('string_escape')
695 def ParseInteger(text
, is_signed
=False, is_long
=False):
696 """Parses an integer.
699 text: The text to parse.
700 is_signed: True if a signed integer must be parsed.
701 is_long: True if a long integer must be parsed.
707 ValueError: Thrown Iff the text is not a valid integer.
711 result
= int(text
, 0)
713 raise ValueError('Couldn\'t parse integer: %s' % text
)
716 checker
= _INTEGER_CHECKERS
[2 * int(is_long
) + int(is_signed
)]
717 checker
.CheckValue(result
)
721 def ParseFloat(text
):
722 """Parse a floating point number.
731 ValueError: If a floating point number couldn't be parsed.
738 if _FLOAT_INFINITY
.match(text
):
743 elif _FLOAT_NAN
.match(text
):
748 return float(text
.rstrip('f'))
750 raise ValueError('Couldn\'t parse float: %s' % text
)
754 """Parse a boolean value.
760 Boolean values parsed
763 ValueError: If text is not a valid boolean.
765 if text
in ('true', 't', '1'):
767 elif text
in ('false', 'f', '0'):
770 raise ValueError('Expected "true" or "false".')
773 def ParseEnum(field
, value
):
774 """Parse an enum value.
776 The value can be specified by a number (the enum value), or by
777 a string literal (the enum name).
780 field: Enum field descriptor.
787 ValueError: If the enum value could not be parsed.
789 enum_descriptor
= field
.enum_type
791 number
= int(value
, 0)
794 enum_value
= enum_descriptor
.values_by_name
.get(value
, None)
795 if enum_value
is None:
797 'Enum type "%s" has no value named %s.' % (
798 enum_descriptor
.full_name
, value
))
801 enum_value
= enum_descriptor
.values_by_number
.get(number
, None)
802 if enum_value
is None:
804 'Enum type "%s" has no value with number %d.' % (
805 enum_descriptor
.full_name
, number
))
806 return enum_value
.number