python/google/net/proto2/python/public/text_format.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2007 Google Inc.
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17
  18
  19
  20
  21 """Contains routines for printing protocol messages in text format."""
  22
  23
  24 import cStringIO
  25 import re
  26
  27 from google.net.proto2.python.internal import type_checkers
  28 from google.net.proto2.python.public import descriptor
  29 from google.net.proto2.python.public import text_encoding
  30
  31 __all__ = ['MessageToString', 'PrintMessage', 'PrintField',
  32            'PrintFieldValue', 'Merge']
  33
  34
  35 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
  36                      type_checkers.Int32ValueChecker(),
  37                      type_checkers.Uint64ValueChecker(),
  38                      type_checkers.Int64ValueChecker())
  39 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
  40 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
  41 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
  42                           descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
  43
  44
  45 class Error(Exception):
  46   """Top-level module error for text_format."""
  47
  48
  49 class ParseError(Error):
  50   """Thrown in case of ASCII parsing error."""
  51
  52
  53 def MessageToString(message, as_utf8=False, as_one_line=False,
  54                     pointy_brackets=False, use_index_order=False,
  55                     float_format=None):
  56   """Convert protobuf message to text format.
  57
  58   Floating point values can be formatted compactly with 15 digits of
  59   precision (which is the most that IEEE 754 "double" can guarantee)
  60   using float_format='.15g'.
  61
  62   Args:
  63     message: The protocol buffers message.
  64     as_utf8: Produce text output in UTF8 format.
  65     as_one_line: Don't introduce newlines between fields.
  66     pointy_brackets: If True, use angle brackets instead of curly braces for
  67       nesting.
  68     use_index_order: If True, print fields of a proto message using the order
  69       defined in source code instead of the field number. By default, use the
  70       field number order.
  71     float_format: If set, use this to specify floating point number formatting
  72       (per the "Format Specification Mini-Language"); otherwise, str() is used.
  73
  74   Returns:
  75     A string of the text formatted protocol buffer message.
  76   """
  77   out = cStringIO.StringIO()
  78   PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
  79                pointy_brackets=pointy_brackets,
  80                use_index_order=use_index_order,
  81                float_format=float_format)
  82   result = out.getvalue()
  83   out.close()
  84   if as_one_line:
  85     return result.rstrip()
  86   return result
  87
  88
  89 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
  90                  pointy_brackets=False, use_index_order=False,
  91                  float_format=None):
  92   fields = message.ListFields()
  93   if use_index_order:
  94     fields.sort(key=lambda x: x[0].index)
  95   for field, value in fields:
  96     if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
  97       for element in value:
  98         PrintField(field, element, out, indent, as_utf8, as_one_line,
  99                    pointy_brackets=pointy_brackets,
 100                    use_index_order=use_index_order,
 101                    float_format=float_format)
 102     else:
 103       PrintField(field, value, out, indent, as_utf8, as_one_line,
 104                  pointy_brackets=pointy_brackets,
 105                  use_index_order=use_index_order,
 106                  float_format=float_format)
 107
 108
 109 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
 110                pointy_brackets=False, use_index_order=False, float_format=None):
 111   """Print a single field name/value pair.  For repeated fields, the value
 112   should be a single element."""
 113
 114   out.write(' ' * indent)
 115   if field.is_extension:
 116     out.write('[')
 117     if (field.containing_type.GetOptions().message_set_wire_format and
 118         field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
 119         field.message_type == field.extension_scope and
 120         field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
 121       out.write(field.message_type.full_name)
 122     else:
 123       out.write(field.full_name)
 124     out.write(']')
 125   elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
 126
 127     out.write(field.message_type.name)
 128   else:
 129     out.write(field.name)
 130
 131   if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
 132
 133
 134     out.write(': ')
 135
 136   PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,
 137                   pointy_brackets=pointy_brackets,
 138                   use_index_order=use_index_order,
 139                   float_format=float_format)
 140   if as_one_line:
 141     out.write(' ')
 142   else:
 143     out.write('\n')
 144
 145
 146 def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
 147                     as_one_line=False, pointy_brackets=False,
 148                     use_index_order=False,
 149                     float_format=None):
 150   """Print a single field value (not including name).  For repeated fields,
 151   the value should be a single element."""
 152
 153   if pointy_brackets:
 154     openb = '<'
 155     closeb = '>'
 156   else:
 157     openb = '{'
 158     closeb = '}'
 159
 160   if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
 161     if as_one_line:
 162       out.write(' %s ' % openb)
 163       PrintMessage(value, out, indent, as_utf8, as_one_line,
 164                    pointy_brackets=pointy_brackets,
 165                    use_index_order=use_index_order,
 166                    float_format=float_format)
 167       out.write(closeb)
 168     else:
 169       out.write(' %s\n' % openb)
 170       PrintMessage(value, out, indent + 2, as_utf8, as_one_line,
 171                    pointy_brackets=pointy_brackets,
 172                    use_index_order=use_index_order,
 173                    float_format=float_format)
 174       out.write(' ' * indent + closeb)
 175   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
 176     enum_value = field.enum_type.values_by_number.get(value, None)
 177     if enum_value is not None:
 178       out.write(enum_value.name)
 179     else:
 180       out.write(str(value))
 181   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
 182     out.write('\"')
 183     if isinstance(value, unicode):
 184       out_value = value.encode('utf-8')
 185     else:
 186       out_value = value
 187     if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
 188
 189       out_as_utf8 = False
 190     else:
 191       out_as_utf8 = as_utf8
 192     out.write(text_encoding.CEscape(out_value, out_as_utf8))
 193     out.write('\"')
 194   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
 195     if value:
 196       out.write('true')
 197     else:
 198       out.write('false')
 199   elif field.cpp_type in _FLOAT_TYPES and float_format is not None:
 200     out.write('{1:{0}}'.format(float_format, value))
 201   else:
 202     out.write(str(value))
 203
 204
 205 def _ParseOrMerge(lines, message, allow_multiple_scalars):
 206   """Converts an ASCII representation of a protocol message into a message.
 207
 208   Args:
 209     lines: Lines of a message's ASCII representation.
 210     message: A protocol buffer message to merge into.
 211     allow_multiple_scalars: Determines if repeated values for a non-repeated
 212       field are permitted, e.g., the string "foo: 1 foo: 2" for a
 213       required/optional field named "foo".
 214
 215   Raises:
 216     ParseError: On ASCII parsing problems.
 217   """
 218   tokenizer = _Tokenizer(lines)
 219   while not tokenizer.AtEnd():
 220     _MergeField(tokenizer, message, allow_multiple_scalars)
 221
 222
 223 def Parse(text, message):
 224   """Parses an ASCII representation of a protocol message into a message.
 225
 226   Args:
 227     text: Message ASCII representation.
 228     message: A protocol buffer message to merge into.
 229
 230   Returns:
 231     The same message passed as argument.
 232
 233   Raises:
 234     ParseError: On ASCII parsing problems.
 235   """
 236   if not isinstance(text, str): text = text.decode('utf-8')
 237   return ParseLines(text.split('\n'), message)
 238
 239
 240 def Merge(text, message):
 241   """Parses an ASCII representation of a protocol message into a message.
 242
 243   Like Parse(), but allows repeated values for a non-repeated field, and uses
 244   the last one.
 245
 246   Args:
 247     text: Message ASCII representation.
 248     message: A protocol buffer message to merge into.
 249
 250   Returns:
 251     The same message passed as argument.
 252
 253   Raises:
 254     ParseError: On ASCII parsing problems.
 255   """
 256   return MergeLines(text.split('\n'), message)
 257
 258
 259 def ParseLines(lines, message):
 260   """Parses an ASCII representation of a protocol message into a message.
 261
 262   Args:
 263     lines: An iterable of lines of a message's ASCII representation.
 264     message: A protocol buffer message to merge into.
 265
 266   Returns:
 267     The same message passed as argument.
 268
 269   Raises:
 270     ParseError: On ASCII parsing problems.
 271   """
 272   _ParseOrMerge(lines, message, False)
 273   return message
 274
 275
 276 def MergeLines(lines, message):
 277   """Parses an ASCII representation of a protocol message into a message.
 278
 279   Args:
 280     lines: An iterable of lines of a message's ASCII representation.
 281     message: A protocol buffer message to merge into.
 282
 283   Returns:
 284     The same message passed as argument.
 285
 286   Raises:
 287     ParseError: On ASCII parsing problems.
 288   """
 289   _ParseOrMerge(lines, message, True)
 290   return message
 291
 292
 293 def _MergeField(tokenizer, message, allow_multiple_scalars):
 294   """Merges a single protocol message field into a message.
 295
 296   Args:
 297     tokenizer: A tokenizer to parse the field name and values.
 298     message: A protocol message to record the data.
 299     allow_multiple_scalars: Determines if repeated values for a non-repeated
 300       field are permitted, e.g., the string "foo: 1 foo: 2" for a
 301       required/optional field named "foo".
 302
 303   Raises:
 304     ParseError: In case of ASCII parsing problems.
 305   """
 306   message_descriptor = message.DESCRIPTOR
 307   if tokenizer.TryConsume('['):
 308     name = [tokenizer.ConsumeIdentifier()]
 309     while tokenizer.TryConsume('.'):
 310       name.append(tokenizer.ConsumeIdentifier())
 311     name = '.'.join(name)
 312
 313     if not message_descriptor.is_extendable:
 314       raise tokenizer.ParseErrorPreviousToken(
 315           'Message type "%s" does not have extensions.' %
 316           message_descriptor.full_name)
 317
 318     field = message.Extensions._FindExtensionByName(name)
 319
 320     if not field:
 321       raise tokenizer.ParseErrorPreviousToken(
 322           'Extension "%s" not registered.' % name)
 323     elif message_descriptor != field.containing_type:
 324       raise tokenizer.ParseErrorPreviousToken(
 325           'Extension "%s" does not extend message type "%s".' % (
 326               name, message_descriptor.full_name))
 327     tokenizer.Consume(']')
 328   else:
 329     name = tokenizer.ConsumeIdentifier()
 330     field = message_descriptor.fields_by_name.get(name, None)
 331
 332
 333
 334
 335     if not field:
 336       field = message_descriptor.fields_by_name.get(name.lower(), None)
 337       if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
 338         field = None
 339
 340     if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
 341         field.message_type.name != name):
 342       field = None
 343
 344     if not field:
 345       raise tokenizer.ParseErrorPreviousToken(
 346           'Message type "%s" has no field named "%s".' % (
 347               message_descriptor.full_name, name))
 348
 349   if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
 350     tokenizer.TryConsume(':')
 351
 352     if tokenizer.TryConsume('<'):
 353       end_token = '>'
 354     else:
 355       tokenizer.Consume('{')
 356       end_token = '}'
 357
 358     if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
 359       if field.is_extension:
 360         sub_message = message.Extensions[field].add()
 361       else:
 362         sub_message = getattr(message, field.name).add()
 363     else:
 364       if field.is_extension:
 365         sub_message = message.Extensions[field]
 366       else:
 367         sub_message = getattr(message, field.name)
 368       sub_message.SetInParent()
 369
 370     while not tokenizer.TryConsume(end_token):
 371       if tokenizer.AtEnd():
 372         raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))
 373       _MergeField(tokenizer, sub_message, allow_multiple_scalars)
 374   else:
 375     _MergeScalarField(tokenizer, message, field, allow_multiple_scalars)
 376
 377
 378
 379   if not tokenizer.TryConsume(','):
 380     tokenizer.TryConsume(';')
 381
 382
 383 def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars):
 384   """Merges a single protocol message scalar field into a message.
 385
 386   Args:
 387     tokenizer: A tokenizer to parse the field value.
 388     message: A protocol message to record the data.
 389     field: The descriptor of the field to be merged.
 390     allow_multiple_scalars: Determines if repeated values for a non-repeated
 391       field are permitted, e.g., the string "foo: 1 foo: 2" for a
 392       required/optional field named "foo".
 393
 394   Raises:
 395     ParseError: In case of ASCII parsing problems.
 396     RuntimeError: On runtime errors.
 397   """
 398   tokenizer.Consume(':')
 399   value = None
 400
 401   if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
 402                     descriptor.FieldDescriptor.TYPE_SINT32,
 403                     descriptor.FieldDescriptor.TYPE_SFIXED32):
 404     value = tokenizer.ConsumeInt32()
 405   elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
 406                       descriptor.FieldDescriptor.TYPE_SINT64,
 407                       descriptor.FieldDescriptor.TYPE_SFIXED64):
 408     value = tokenizer.ConsumeInt64()
 409   elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
 410                       descriptor.FieldDescriptor.TYPE_FIXED32):
 411     value = tokenizer.ConsumeUint32()
 412   elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
 413                       descriptor.FieldDescriptor.TYPE_FIXED64):
 414     value = tokenizer.ConsumeUint64()
 415   elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
 416                       descriptor.FieldDescriptor.TYPE_DOUBLE):
 417     value = tokenizer.ConsumeFloat()
 418   elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
 419     value = tokenizer.ConsumeBool()
 420   elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
 421     value = tokenizer.ConsumeString()
 422   elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
 423     value = tokenizer.ConsumeByteString()
 424   elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
 425     value = tokenizer.ConsumeEnum(field)
 426   else:
 427     raise RuntimeError('Unknown field type %d' % field.type)
 428
 429   if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
 430     if field.is_extension:
 431       message.Extensions[field].append(value)
 432     else:
 433       getattr(message, field.name).append(value)
 434   else:
 435     if field.is_extension:
 436       if not allow_multiple_scalars and message.HasExtension(field):
 437         raise tokenizer.ParseErrorPreviousToken(
 438             'Message type "%s" should not have multiple "%s" extensions.' %
 439             (message.DESCRIPTOR.full_name, field.full_name))
 440       else:
 441         message.Extensions[field] = value
 442     else:
 443       if not allow_multiple_scalars and message.HasField(field.name):
 444         raise tokenizer.ParseErrorPreviousToken(
 445             'Message type "%s" should not have multiple "%s" fields.' %
 446             (message.DESCRIPTOR.full_name, field.name))
 447       else:
 448         setattr(message, field.name, value)
 449
 450
 451 class _Tokenizer(object):
 452   """Protocol buffer ASCII representation tokenizer.
 453
 454   This class handles the lower level string parsing by splitting it into
 455   meaningful tokens.
 456
 457   It was directly ported from the Java protocol buffer API.
 458   """
 459
 460   _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE)
 461   _TOKEN = re.compile(
 462       '[a-zA-Z_][0-9a-zA-Z_+-]*|'
 463       '[0-9+-][0-9a-zA-Z_.+-]*|'
 464       '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'
 465       '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')
 466   _IDENTIFIER = re.compile(r'\w+')
 467
 468   def __init__(self, lines):
 469     self._position = 0
 470     self._line = -1
 471     self._column = 0
 472     self._token_start = None
 473     self.token = ''
 474     self._lines = iter(lines)
 475     self._current_line = ''
 476     self._previous_line = 0
 477     self._previous_column = 0
 478     self._more_lines = True
 479     self._SkipWhitespace()
 480     self.NextToken()
 481
 482   def AtEnd(self):
 483     """Checks the end of the text was reached.
 484
 485     Returns:
 486       True iff the end was reached.
 487     """
 488     return not self.token
 489
 490   def _PopLine(self):
 491     while len(self._current_line) <= self._column:
 492       try:
 493         self._current_line = self._lines.next()
 494       except StopIteration:
 495         self._current_line = ''
 496         self._more_lines = False
 497         return
 498       else:
 499         self._line += 1
 500         self._column = 0
 501
 502   def _SkipWhitespace(self):
 503     while True:
 504       self._PopLine()
 505       match = self._WHITESPACE.match(self._current_line, self._column)
 506       if not match:
 507         break
 508       length = len(match.group(0))
 509       self._column += length
 510
 511   def TryConsume(self, token):
 512     """Tries to consume a given piece of text.
 513
 514     Args:
 515       token: Text to consume.
 516
 517     Returns:
 518       True iff the text was consumed.
 519     """
 520     if self.token == token:
 521       self.NextToken()
 522       return True
 523     return False
 524
 525   def Consume(self, token):
 526     """Consumes a piece of text.
 527
 528     Args:
 529       token: Text to consume.
 530
 531     Raises:
 532       ParseError: If the text couldn't be consumed.
 533     """
 534     if not self.TryConsume(token):
 535       raise self._ParseError('Expected "%s".' % token)
 536
 537   def ConsumeIdentifier(self):
 538     """Consumes protocol message field identifier.
 539
 540     Returns:
 541       Identifier string.
 542
 543     Raises:
 544       ParseError: If an identifier couldn't be consumed.
 545     """
 546     result = self.token
 547     if not self._IDENTIFIER.match(result):
 548       raise self._ParseError('Expected identifier.')
 549     self.NextToken()
 550     return result
 551
 552   def ConsumeInt32(self):
 553     """Consumes a signed 32bit integer number.
 554
 555     Returns:
 556       The integer parsed.
 557
 558     Raises:
 559       ParseError: If a signed 32bit integer couldn't be consumed.
 560     """
 561     try:
 562       result = ParseInteger(self.token, is_signed=True, is_long=False)
 563     except ValueError, e:
 564       raise self._ParseError(str(e))
 565     self.NextToken()
 566     return result
 567
 568   def ConsumeUint32(self):
 569     """Consumes an unsigned 32bit integer number.
 570
 571     Returns:
 572       The integer parsed.
 573
 574     Raises:
 575       ParseError: If an unsigned 32bit integer couldn't be consumed.
 576     """
 577     try:
 578       result = ParseInteger(self.token, is_signed=False, is_long=False)
 579     except ValueError, e:
 580       raise self._ParseError(str(e))
 581     self.NextToken()
 582     return result
 583
 584   def ConsumeInt64(self):
 585     """Consumes a signed 64bit integer number.
 586
 587     Returns:
 588       The integer parsed.
 589
 590     Raises:
 591       ParseError: If a signed 64bit integer couldn't be consumed.
 592     """
 593     try:
 594       result = ParseInteger(self.token, is_signed=True, is_long=True)
 595     except ValueError, e:
 596       raise self._ParseError(str(e))
 597     self.NextToken()
 598     return result
 599
 600   def ConsumeUint64(self):
 601     """Consumes an unsigned 64bit integer number.
 602
 603     Returns:
 604       The integer parsed.
 605
 606     Raises:
 607       ParseError: If an unsigned 64bit integer couldn't be consumed.
 608     """
 609     try:
 610       result = ParseInteger(self.token, is_signed=False, is_long=True)
 611     except ValueError, e:
 612       raise self._ParseError(str(e))
 613     self.NextToken()
 614     return result
 615
 616   def ConsumeFloat(self):
 617     """Consumes an floating point number.
 618
 619     Returns:
 620       The number parsed.
 621
 622     Raises:
 623       ParseError: If a floating point number couldn't be consumed.
 624     """
 625     try:
 626       result = ParseFloat(self.token)
 627     except ValueError, e:
 628       raise self._ParseError(str(e))
 629     self.NextToken()
 630     return result
 631
 632   def ConsumeBool(self):
 633     """Consumes a boolean value.
 634
 635     Returns:
 636       The bool parsed.
 637
 638     Raises:
 639       ParseError: If a boolean value couldn't be consumed.
 640     """
 641     try:
 642       result = ParseBool(self.token)
 643     except ValueError, e:
 644       raise self._ParseError(str(e))
 645     self.NextToken()
 646     return result
 647
 648   def ConsumeString(self):
 649     """Consumes a string value.
 650
 651     Returns:
 652       The string parsed.
 653
 654     Raises:
 655       ParseError: If a string value couldn't be consumed.
 656     """
 657     the_bytes = self.ConsumeByteString()
 658     try:
 659       return unicode(the_bytes, 'utf-8')
 660     except UnicodeDecodeError, e:
 661       raise self._StringParseError(e)
 662
 663   def ConsumeByteString(self):
 664     """Consumes a byte array value.
 665
 666     Returns:
 667       The array parsed (as a string).
 668
 669     Raises:
 670       ParseError: If a byte array value couldn't be consumed.
 671     """
 672     the_list = [self._ConsumeSingleByteString()]
 673     while self.token and self.token[0] in ('\'', '"'):
 674       the_list.append(self._ConsumeSingleByteString())
 675     return ''.encode('latin1').join(the_list)
 676
 677
 678   def _ConsumeSingleByteString(self):
 679     """Consume one token of a string literal.
 680
 681     String literals (whether bytes or text) can come in multiple adjacent
 682     tokens which are automatically concatenated, like in C or Python.  This
 683     method only consumes one token.
 684     """
 685     text = self.token
 686     if len(text) < 1 or text[0] not in ('\'', '"'):
 687       raise self._ParseError('Expected string.')
 688
 689     if len(text) < 2 or text[-1] != text[0]:
 690       raise self._ParseError('String missing ending quote.')
 691
 692     try:
 693       result = text_encoding.CUnescape(text[1:-1])
 694     except ValueError, e:
 695       raise self._ParseError(str(e))
 696     self.NextToken()
 697     return result
 698
 699   def ConsumeEnum(self, field):
 700     try:
 701       result = ParseEnum(field, self.token)
 702     except ValueError, e:
 703       raise self._ParseError(str(e))
 704     self.NextToken()
 705     return result
 706
 707   def ParseErrorPreviousToken(self, message):
 708     """Creates and *returns* a ParseError for the previously read token.
 709
 710     Args:
 711       message: A message to set for the exception.
 712
 713     Returns:
 714       A ParseError instance.
 715     """
 716     return ParseError('%d:%d : %s' % (
 717         self._previous_line + 1, self._previous_column + 1, message))
 718
 719   def _ParseError(self, message):
 720     """Creates and *returns* a ParseError for the current token."""
 721     return ParseError('%d:%d : %s' % (
 722         self._line + 1, self._column + 1, message))
 723
 724   def _StringParseError(self, e):
 725     return self._ParseError('Couldn\'t parse string: ' + str(e))
 726
 727   def NextToken(self):
 728     """Reads the next meaningful token."""
 729     self._previous_line = self._line
 730     self._previous_column = self._column
 731
 732     self._column += len(self.token)
 733     self._SkipWhitespace()
 734
 735     if not self._more_lines:
 736       self.token = ''
 737       return
 738
 739     match = self._TOKEN.match(self._current_line, self._column)
 740     if match:
 741       token = match.group(0)
 742       self.token = token
 743     else:
 744       self.token = self._current_line[self._column]
 745
 746
 747 def ParseInteger(text, is_signed=False, is_long=False):
 748   """Parses an integer.
 749
 750   Args:
 751     text: The text to parse.
 752     is_signed: True if a signed integer must be parsed.
 753     is_long: True if a long integer must be parsed.
 754
 755   Returns:
 756     The integer value.
 757
 758   Raises:
 759     ValueError: Thrown Iff the text is not a valid integer.
 760   """
 761
 762   try:
 763
 764
 765
 766     if is_long:
 767       result = long(text, 0)
 768     else:
 769       result = int(text, 0)
 770   except ValueError:
 771     raise ValueError('Couldn\'t parse integer: %s' % text)
 772
 773
 774   checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
 775   checker.CheckValue(result)
 776   return result
 777
 778
 779 def ParseFloat(text):
 780   """Parse a floating point number.
 781
 782   Args:
 783     text: Text to parse.
 784
 785   Returns:
 786     The number parsed.
 787
 788   Raises:
 789     ValueError: If a floating point number couldn't be parsed.
 790   """
 791   try:
 792
 793     return float(text)
 794   except ValueError:
 795
 796     if _FLOAT_INFINITY.match(text):
 797       if text[0] == '-':
 798         return float('-inf')
 799       else:
 800         return float('inf')
 801     elif _FLOAT_NAN.match(text):
 802       return float('nan')
 803     else:
 804
 805       try:
 806         return float(text.rstrip('f'))
 807       except ValueError:
 808         raise ValueError('Couldn\'t parse float: %s' % text)
 809
 810
 811 def ParseBool(text):
 812   """Parse a boolean value.
 813
 814   Args:
 815     text: Text to parse.
 816
 817   Returns:
 818     Boolean values parsed
 819
 820   Raises:
 821     ValueError: If text is not a valid boolean.
 822   """
 823   if text in ('true', 't', '1'):
 824     return True
 825   elif text in ('false', 'f', '0'):
 826     return False
 827   else:
 828     raise ValueError('Expected "true" or "false".')
 829
 830
 831 def ParseEnum(field, value):
 832   """Parse an enum value.
 833
 834   The value can be specified by a number (the enum value), or by
 835   a string literal (the enum name).
 836
 837   Args:
 838     field: Enum field descriptor.
 839     value: String value.
 840
 841   Returns:
 842     Enum value number.
 843
 844   Raises:
 845     ValueError: If the enum value could not be parsed.
 846   """
 847   enum_descriptor = field.enum_type
 848   try:
 849     number = int(value, 0)
 850   except ValueError:
 851
 852     enum_value = enum_descriptor.values_by_name.get(value, None)
 853     if enum_value is None:
 854       raise ValueError(
 855           'Enum type "%s" has no value named %s.' % (
 856               enum_descriptor.full_name, value))
 857   else:
 858
 859     enum_value = enum_descriptor.values_by_number.get(number, None)
 860     if enum_value is None:
 861       raise ValueError(
 862           'Enum type "%s" has no value with number %d.' % (
 863               enum_descriptor.full_name, number))
 864   return enum_value.number