python/google/net/proto2/python/public/text_format.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2007 Google Inc.
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17
  18
  19
  20
  21 """Contains routines for printing protocol messages in text format."""
  22
  23
  24
  25 import cStringIO
  26 import re
  27
  28 from google.net.proto2.python.internal import type_checkers
  29 from google.net.proto2.python.public import descriptor
  30 from google.net.proto2.python.public import text_encoding
  31
  32 __all__ = ['MessageToString', 'PrintMessage', 'PrintField',
  33            'PrintFieldValue', 'Merge']
  34
  35
  36 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
  37                      type_checkers.Int32ValueChecker(),
  38                      type_checkers.Uint64ValueChecker(),
  39                      type_checkers.Int64ValueChecker())
  40 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
  41 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
  42 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
  43                           descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
  44
  45
  46 class Error(Exception):
  47   """Top-level module error for text_format."""
  48
  49
  50 class ParseError(Error):
  51   """Thrown in case of ASCII parsing error."""
  52
  53
  54 def MessageToString(message, as_utf8=False, as_one_line=False,
  55                     pointy_brackets=False, use_index_order=False,
  56                     float_format=None):
  57   """Convert protobuf message to text format.
  58
  59   Floating point values can be formatted compactly with 15 digits of
  60   precision (which is the most that IEEE 754 "double" can guarantee)
  61   using float_format='.15g'.
  62
  63   Args:
  64     message: The protocol buffers message.
  65     as_utf8: Produce text output in UTF8 format.
  66     as_one_line: Don't introduce newlines between fields.
  67     pointy_brackets: If True, use angle brackets instead of curly braces for
  68       nesting.
  69     use_index_order: If True, print fields of a proto message using the order
  70       defined in source code instead of the field number. By default, use the
  71       field number order.
  72     float_format: If set, use this to specify floating point number formatting
  73       (per the "Format Specification Mini-Language"); otherwise, str() is used.
  74
  75   Returns:
  76     A string of the text formatted protocol buffer message.
  77   """
  78   out = cStringIO.StringIO()
  79   PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
  80                pointy_brackets=pointy_brackets,
  81                use_index_order=use_index_order,
  82                float_format=float_format)
  83   result = out.getvalue()
  84   out.close()
  85   if as_one_line:
  86     return result.rstrip()
  87   return result
  88
  89
  90 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
  91                  pointy_brackets=False, use_index_order=False,
  92                  float_format=None):
  93   fields = message.ListFields()
  94   if use_index_order:
  95     fields.sort(key=lambda x: x[0].index)
  96   for field, value in fields:
  97     if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
  98       for element in value:
  99         PrintField(field, element, out, indent, as_utf8, as_one_line,
 100                    pointy_brackets=pointy_brackets,
 101                    use_index_order=use_index_order,
 102                    float_format=float_format)
 103     else:
 104       PrintField(field, value, out, indent, as_utf8, as_one_line,
 105                  pointy_brackets=pointy_brackets,
 106                  use_index_order=use_index_order,
 107                  float_format=float_format)
 108
 109
 110 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
 111                pointy_brackets=False, use_index_order=False, float_format=None):
 112   """Print a single field name/value pair.  For repeated fields, the value
 113   should be a single element."""
 114
 115   out.write(' ' * indent)
 116   if field.is_extension:
 117     out.write('[')
 118     if (field.containing_type.GetOptions().message_set_wire_format and
 119         field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
 120         field.message_type == field.extension_scope and
 121         field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
 122       out.write(field.message_type.full_name)
 123     else:
 124       out.write(field.full_name)
 125     out.write(']')
 126   elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
 127
 128     out.write(field.message_type.name)
 129   else:
 130     out.write(field.name)
 131
 132   if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
 133
 134
 135     out.write(': ')
 136
 137   PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,
 138                   pointy_brackets=pointy_brackets,
 139                   use_index_order=use_index_order,
 140                   float_format=float_format)
 141   if as_one_line:
 142     out.write(' ')
 143   else:
 144     out.write('\n')
 145
 146
 147 def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
 148                     as_one_line=False, pointy_brackets=False,
 149                     use_index_order=False,
 150                     float_format=None):
 151   """Print a single field value (not including name).  For repeated fields,
 152   the value should be a single element."""
 153
 154   if pointy_brackets:
 155     openb = '<'
 156     closeb = '>'
 157   else:
 158     openb = '{'
 159     closeb = '}'
 160
 161   if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
 162     if as_one_line:
 163       out.write(' %s ' % openb)
 164       PrintMessage(value, out, indent, as_utf8, as_one_line,
 165                    pointy_brackets=pointy_brackets,
 166                    use_index_order=use_index_order,
 167                    float_format=float_format)
 168       out.write(closeb)
 169     else:
 170       out.write(' %s\n' % openb)
 171       PrintMessage(value, out, indent + 2, as_utf8, as_one_line,
 172                    pointy_brackets=pointy_brackets,
 173                    use_index_order=use_index_order,
 174                    float_format=float_format)
 175       out.write(' ' * indent + closeb)
 176   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
 177     enum_value = field.enum_type.values_by_number.get(value, None)
 178     if enum_value is not None:
 179       out.write(enum_value.name)
 180     else:
 181       out.write(str(value))
 182   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
 183     out.write('\"')
 184     if isinstance(value, unicode):
 185       out_value = value.encode('utf-8')
 186     else:
 187       out_value = value
 188     if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
 189
 190       out_as_utf8 = False
 191     else:
 192       out_as_utf8 = as_utf8
 193     out.write(text_encoding.CEscape(out_value, out_as_utf8))
 194     out.write('\"')
 195   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
 196     if value:
 197       out.write('true')
 198     else:
 199       out.write('false')
 200   elif field.cpp_type in _FLOAT_TYPES and float_format is not None:
 201     out.write('{1:{0}}'.format(float_format, value))
 202   else:
 203     out.write(str(value))
 204
 205
 206 def _ParseOrMerge(lines, message, allow_multiple_scalars):
 207   """Converts an ASCII representation of a protocol message into a message.
 208
 209   Args:
 210     lines: Lines of a message's ASCII representation.
 211     message: A protocol buffer message to merge into.
 212     allow_multiple_scalars: Determines if repeated values for a non-repeated
 213       field are permitted, e.g., the string "foo: 1 foo: 2" for a
 214       required/optional field named "foo".
 215
 216   Raises:
 217     ParseError: On ASCII parsing problems.
 218   """
 219   tokenizer = _Tokenizer(lines)
 220   while not tokenizer.AtEnd():
 221     _MergeField(tokenizer, message, allow_multiple_scalars)
 222
 223
 224 def Parse(text, message):
 225   """Parses an ASCII representation of a protocol message into a message.
 226
 227   Args:
 228     text: Message ASCII representation.
 229     message: A protocol buffer message to merge into.
 230
 231   Returns:
 232     The same message passed as argument.
 233
 234   Raises:
 235     ParseError: On ASCII parsing problems.
 236   """
 237   if not isinstance(text, str): text = text.decode('utf-8')
 238   return ParseLines(text.split('\n'), message)
 239
 240
 241 def Merge(text, message):
 242   """Parses an ASCII representation of a protocol message into a message.
 243
 244   Like Parse(), but allows repeated values for a non-repeated field, and uses
 245   the last one.
 246
 247   Args:
 248     text: Message ASCII representation.
 249     message: A protocol buffer message to merge into.
 250
 251   Returns:
 252     The same message passed as argument.
 253
 254   Raises:
 255     ParseError: On ASCII parsing problems.
 256   """
 257   return MergeLines(text.split('\n'), message)
 258
 259
 260 def ParseLines(lines, message):
 261   """Parses an ASCII representation of a protocol message into a message.
 262
 263   Args:
 264     lines: An iterable of lines of a message's ASCII representation.
 265     message: A protocol buffer message to merge into.
 266
 267   Returns:
 268     The same message passed as argument.
 269
 270   Raises:
 271     ParseError: On ASCII parsing problems.
 272   """
 273   _ParseOrMerge(lines, message, False)
 274   return message
 275
 276
 277 def MergeLines(lines, message):
 278   """Parses an ASCII representation of a protocol message into a message.
 279
 280   Args:
 281     lines: An iterable of lines of a message's ASCII representation.
 282     message: A protocol buffer message to merge into.
 283
 284   Returns:
 285     The same message passed as argument.
 286
 287   Raises:
 288     ParseError: On ASCII parsing problems.
 289   """
 290   _ParseOrMerge(lines, message, True)
 291   return message
 292
 293
 294 def _MergeField(tokenizer, message, allow_multiple_scalars):
 295   """Merges a single protocol message field into a message.
 296
 297   Args:
 298     tokenizer: A tokenizer to parse the field name and values.
 299     message: A protocol message to record the data.
 300     allow_multiple_scalars: Determines if repeated values for a non-repeated
 301       field are permitted, e.g., the string "foo: 1 foo: 2" for a
 302       required/optional field named "foo".
 303
 304   Raises:
 305     ParseError: In case of ASCII parsing problems.
 306   """
 307   message_descriptor = message.DESCRIPTOR
 308   if tokenizer.TryConsume('['):
 309     name = [tokenizer.ConsumeIdentifier()]
 310     while tokenizer.TryConsume('.'):
 311       name.append(tokenizer.ConsumeIdentifier())
 312     name = '.'.join(name)
 313
 314     if not message_descriptor.is_extendable:
 315       raise tokenizer.ParseErrorPreviousToken(
 316           'Message type "%s" does not have extensions.' %
 317           message_descriptor.full_name)
 318
 319     field = message.Extensions._FindExtensionByName(name)
 320
 321     if not field:
 322       raise tokenizer.ParseErrorPreviousToken(
 323           'Extension "%s" not registered.' % name)
 324     elif message_descriptor != field.containing_type:
 325       raise tokenizer.ParseErrorPreviousToken(
 326           'Extension "%s" does not extend message type "%s".' % (
 327               name, message_descriptor.full_name))
 328     tokenizer.Consume(']')
 329   else:
 330     name = tokenizer.ConsumeIdentifier()
 331     field = message_descriptor.fields_by_name.get(name, None)
 332
 333
 334
 335
 336     if not field:
 337       field = message_descriptor.fields_by_name.get(name.lower(), None)
 338       if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
 339         field = None
 340
 341     if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
 342         field.message_type.name != name):
 343       field = None
 344
 345     if not field:
 346       raise tokenizer.ParseErrorPreviousToken(
 347           'Message type "%s" has no field named "%s".' % (
 348               message_descriptor.full_name, name))
 349
 350   if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
 351     tokenizer.TryConsume(':')
 352
 353     if tokenizer.TryConsume('<'):
 354       end_token = '>'
 355     else:
 356       tokenizer.Consume('{')
 357       end_token = '}'
 358
 359     if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
 360       if field.is_extension:
 361         sub_message = message.Extensions[field].add()
 362       else:
 363         sub_message = getattr(message, field.name).add()
 364     else:
 365       if field.is_extension:
 366         sub_message = message.Extensions[field]
 367       else:
 368         sub_message = getattr(message, field.name)
 369       sub_message.SetInParent()
 370
 371     while not tokenizer.TryConsume(end_token):
 372       if tokenizer.AtEnd():
 373         raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))
 374       _MergeField(tokenizer, sub_message, allow_multiple_scalars)
 375   else:
 376     _MergeScalarField(tokenizer, message, field, allow_multiple_scalars)
 377
 378
 379
 380   if not tokenizer.TryConsume(','):
 381     tokenizer.TryConsume(';')
 382
 383
 384 def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars):
 385   """Merges a single protocol message scalar field into a message.
 386
 387   Args:
 388     tokenizer: A tokenizer to parse the field value.
 389     message: A protocol message to record the data.
 390     field: The descriptor of the field to be merged.
 391     allow_multiple_scalars: Determines if repeated values for a non-repeated
 392       field are permitted, e.g., the string "foo: 1 foo: 2" for a
 393       required/optional field named "foo".
 394
 395   Raises:
 396     ParseError: In case of ASCII parsing problems.
 397     RuntimeError: On runtime errors.
 398   """
 399   tokenizer.Consume(':')
 400   value = None
 401
 402   if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
 403                     descriptor.FieldDescriptor.TYPE_SINT32,
 404                     descriptor.FieldDescriptor.TYPE_SFIXED32):
 405     value = tokenizer.ConsumeInt32()
 406   elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
 407                       descriptor.FieldDescriptor.TYPE_SINT64,
 408                       descriptor.FieldDescriptor.TYPE_SFIXED64):
 409     value = tokenizer.ConsumeInt64()
 410   elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
 411                       descriptor.FieldDescriptor.TYPE_FIXED32):
 412     value = tokenizer.ConsumeUint32()
 413   elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
 414                       descriptor.FieldDescriptor.TYPE_FIXED64):
 415     value = tokenizer.ConsumeUint64()
 416   elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
 417                       descriptor.FieldDescriptor.TYPE_DOUBLE):
 418     value = tokenizer.ConsumeFloat()
 419   elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
 420     value = tokenizer.ConsumeBool()
 421   elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
 422     value = tokenizer.ConsumeString()
 423   elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
 424     value = tokenizer.ConsumeByteString()
 425   elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
 426     value = tokenizer.ConsumeEnum(field)
 427   else:
 428     raise RuntimeError('Unknown field type %d' % field.type)
 429
 430   if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
 431     if field.is_extension:
 432       message.Extensions[field].append(value)
 433     else:
 434       getattr(message, field.name).append(value)
 435   else:
 436     if field.is_extension:
 437       if not allow_multiple_scalars and message.HasExtension(field):
 438         raise tokenizer.ParseErrorPreviousToken(
 439             'Message type "%s" should not have multiple "%s" extensions.' %
 440             (message.DESCRIPTOR.full_name, field.full_name))
 441       else:
 442         message.Extensions[field] = value
 443     else:
 444       if not allow_multiple_scalars and message.HasField(field.name):
 445         raise tokenizer.ParseErrorPreviousToken(
 446             'Message type "%s" should not have multiple "%s" fields.' %
 447             (message.DESCRIPTOR.full_name, field.name))
 448       else:
 449         setattr(message, field.name, value)
 450
 451
 452 class _Tokenizer(object):
 453   """Protocol buffer ASCII representation tokenizer.
 454
 455   This class handles the lower level string parsing by splitting it into
 456   meaningful tokens.
 457
 458   It was directly ported from the Java protocol buffer API.
 459   """
 460
 461   _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE)
 462   _TOKEN = re.compile(
 463       '[a-zA-Z_][0-9a-zA-Z_+-]*|'
 464       '[0-9+-][0-9a-zA-Z_.+-]*|'
 465       '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'
 466       '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')
 467   _IDENTIFIER = re.compile(r'\w+')
 468
 469   def __init__(self, lines):
 470     self._position = 0
 471     self._line = -1
 472     self._column = 0
 473     self._token_start = None
 474     self.token = ''
 475     self._lines = iter(lines)
 476     self._current_line = ''
 477     self._previous_line = 0
 478     self._previous_column = 0
 479     self._more_lines = True
 480     self._SkipWhitespace()
 481     self.NextToken()
 482
 483   def AtEnd(self):
 484     """Checks the end of the text was reached.
 485
 486     Returns:
 487       True iff the end was reached.
 488     """
 489     return not self.token
 490
 491   def _PopLine(self):
 492     while len(self._current_line) <= self._column:
 493       try:
 494         self._current_line = self._lines.next()
 495       except StopIteration:
 496         self._current_line = ''
 497         self._more_lines = False
 498         return
 499       else:
 500         self._line += 1
 501         self._column = 0
 502
 503   def _SkipWhitespace(self):
 504     while True:
 505       self._PopLine()
 506       match = self._WHITESPACE.match(self._current_line, self._column)
 507       if not match:
 508         break
 509       length = len(match.group(0))
 510       self._column += length
 511
 512   def TryConsume(self, token):
 513     """Tries to consume a given piece of text.
 514
 515     Args:
 516       token: Text to consume.
 517
 518     Returns:
 519       True iff the text was consumed.
 520     """
 521     if self.token == token:
 522       self.NextToken()
 523       return True
 524     return False
 525
 526   def Consume(self, token):
 527     """Consumes a piece of text.
 528
 529     Args:
 530       token: Text to consume.
 531
 532     Raises:
 533       ParseError: If the text couldn't be consumed.
 534     """
 535     if not self.TryConsume(token):
 536       raise self._ParseError('Expected "%s".' % token)
 537
 538   def ConsumeIdentifier(self):
 539     """Consumes protocol message field identifier.
 540
 541     Returns:
 542       Identifier string.
 543
 544     Raises:
 545       ParseError: If an identifier couldn't be consumed.
 546     """
 547     result = self.token
 548     if not self._IDENTIFIER.match(result):
 549       raise self._ParseError('Expected identifier.')
 550     self.NextToken()
 551     return result
 552
 553   def ConsumeInt32(self):
 554     """Consumes a signed 32bit integer number.
 555
 556     Returns:
 557       The integer parsed.
 558
 559     Raises:
 560       ParseError: If a signed 32bit integer couldn't be consumed.
 561     """
 562     try:
 563       result = ParseInteger(self.token, is_signed=True, is_long=False)
 564     except ValueError, e:
 565       raise self._ParseError(str(e))
 566     self.NextToken()
 567     return result
 568
 569   def ConsumeUint32(self):
 570     """Consumes an unsigned 32bit integer number.
 571
 572     Returns:
 573       The integer parsed.
 574
 575     Raises:
 576       ParseError: If an unsigned 32bit integer couldn't be consumed.
 577     """
 578     try:
 579       result = ParseInteger(self.token, is_signed=False, is_long=False)
 580     except ValueError, e:
 581       raise self._ParseError(str(e))
 582     self.NextToken()
 583     return result
 584
 585   def ConsumeInt64(self):
 586     """Consumes a signed 64bit integer number.
 587
 588     Returns:
 589       The integer parsed.
 590
 591     Raises:
 592       ParseError: If a signed 64bit integer couldn't be consumed.
 593     """
 594     try:
 595       result = ParseInteger(self.token, is_signed=True, is_long=True)
 596     except ValueError, e:
 597       raise self._ParseError(str(e))
 598     self.NextToken()
 599     return result
 600
 601   def ConsumeUint64(self):
 602     """Consumes an unsigned 64bit integer number.
 603
 604     Returns:
 605       The integer parsed.
 606
 607     Raises:
 608       ParseError: If an unsigned 64bit integer couldn't be consumed.
 609     """
 610     try:
 611       result = ParseInteger(self.token, is_signed=False, is_long=True)
 612     except ValueError, e:
 613       raise self._ParseError(str(e))
 614     self.NextToken()
 615     return result
 616
 617   def ConsumeFloat(self):
 618     """Consumes an floating point number.
 619
 620     Returns:
 621       The number parsed.
 622
 623     Raises:
 624       ParseError: If a floating point number couldn't be consumed.
 625     """
 626     try:
 627       result = ParseFloat(self.token)
 628     except ValueError, e:
 629       raise self._ParseError(str(e))
 630     self.NextToken()
 631     return result
 632
 633   def ConsumeBool(self):
 634     """Consumes a boolean value.
 635
 636     Returns:
 637       The bool parsed.
 638
 639     Raises:
 640       ParseError: If a boolean value couldn't be consumed.
 641     """
 642     try:
 643       result = ParseBool(self.token)
 644     except ValueError, e:
 645       raise self._ParseError(str(e))
 646     self.NextToken()
 647     return result
 648
 649   def ConsumeString(self):
 650     """Consumes a string value.
 651
 652     Returns:
 653       The string parsed.
 654
 655     Raises:
 656       ParseError: If a string value couldn't be consumed.
 657     """
 658     the_bytes = self.ConsumeByteString()
 659     try:
 660       return unicode(the_bytes, 'utf-8')
 661     except UnicodeDecodeError, e:
 662       raise self._StringParseError(e)
 663
 664   def ConsumeByteString(self):
 665     """Consumes a byte array value.
 666
 667     Returns:
 668       The array parsed (as a string).
 669
 670     Raises:
 671       ParseError: If a byte array value couldn't be consumed.
 672     """
 673     the_list = [self._ConsumeSingleByteString()]
 674     while self.token and self.token[0] in ('\'', '"'):
 675       the_list.append(self._ConsumeSingleByteString())
 676     return ''.encode('latin1').join(the_list)
 677
 678
 679   def _ConsumeSingleByteString(self):
 680     """Consume one token of a string literal.
 681
 682     String literals (whether bytes or text) can come in multiple adjacent
 683     tokens which are automatically concatenated, like in C or Python.  This
 684     method only consumes one token.
 685     """
 686     text = self.token
 687     if len(text) < 1 or text[0] not in ('\'', '"'):
 688       raise self._ParseError('Expected string.')
 689
 690     if len(text) < 2 or text[-1] != text[0]:
 691       raise self._ParseError('String missing ending quote.')
 692
 693     try:
 694       result = text_encoding.CUnescape(text[1:-1])
 695     except ValueError, e:
 696       raise self._ParseError(str(e))
 697     self.NextToken()
 698     return result
 699
 700   def ConsumeEnum(self, field):
 701     try:
 702       result = ParseEnum(field, self.token)
 703     except ValueError, e:
 704       raise self._ParseError(str(e))
 705     self.NextToken()
 706     return result
 707
 708   def ParseErrorPreviousToken(self, message):
 709     """Creates and *returns* a ParseError for the previously read token.
 710
 711     Args:
 712       message: A message to set for the exception.
 713
 714     Returns:
 715       A ParseError instance.
 716     """
 717     return ParseError('%d:%d : %s' % (
 718         self._previous_line + 1, self._previous_column + 1, message))
 719
 720   def _ParseError(self, message):
 721     """Creates and *returns* a ParseError for the current token."""
 722     return ParseError('%d:%d : %s' % (
 723         self._line + 1, self._column + 1, message))
 724
 725   def _StringParseError(self, e):
 726     return self._ParseError('Couldn\'t parse string: ' + str(e))
 727
 728   def NextToken(self):
 729     """Reads the next meaningful token."""
 730     self._previous_line = self._line
 731     self._previous_column = self._column
 732
 733     self._column += len(self.token)
 734     self._SkipWhitespace()
 735
 736     if not self._more_lines:
 737       self.token = ''
 738       return
 739
 740     match = self._TOKEN.match(self._current_line, self._column)
 741     if match:
 742       token = match.group(0)
 743       self.token = token
 744     else:
 745       self.token = self._current_line[self._column]
 746
 747
 748 def ParseInteger(text, is_signed=False, is_long=False):
 749   """Parses an integer.
 750
 751   Args:
 752     text: The text to parse.
 753     is_signed: True if a signed integer must be parsed.
 754     is_long: True if a long integer must be parsed.
 755
 756   Returns:
 757     The integer value.
 758
 759   Raises:
 760     ValueError: Thrown Iff the text is not a valid integer.
 761   """
 762
 763   try:
 764
 765
 766
 767     if is_long:
 768       result = long(text, 0)
 769     else:
 770       result = int(text, 0)
 771   except ValueError:
 772     raise ValueError('Couldn\'t parse integer: %s' % text)
 773
 774
 775   checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
 776   checker.CheckValue(result)
 777   return result
 778
 779
 780 def ParseFloat(text):
 781   """Parse a floating point number.
 782
 783   Args:
 784     text: Text to parse.
 785
 786   Returns:
 787     The number parsed.
 788
 789   Raises:
 790     ValueError: If a floating point number couldn't be parsed.
 791   """
 792   try:
 793
 794     return float(text)
 795   except ValueError:
 796
 797     if _FLOAT_INFINITY.match(text):
 798       if text[0] == '-':
 799         return float('-inf')
 800       else:
 801         return float('inf')
 802     elif _FLOAT_NAN.match(text):
 803       return float('nan')
 804     else:
 805
 806       try:
 807         return float(text.rstrip('f'))
 808       except ValueError:
 809         raise ValueError('Couldn\'t parse float: %s' % text)
 810
 811
 812 def ParseBool(text):
 813   """Parse a boolean value.
 814
 815   Args:
 816     text: Text to parse.
 817
 818   Returns:
 819     Boolean values parsed
 820
 821   Raises:
 822     ValueError: If text is not a valid boolean.
 823   """
 824   if text in ('true', 't', '1'):
 825     return True
 826   elif text in ('false', 'f', '0'):
 827     return False
 828   else:
 829     raise ValueError('Expected "true" or "false".')
 830
 831
 832 def ParseEnum(field, value):
 833   """Parse an enum value.
 834
 835   The value can be specified by a number (the enum value), or by
 836   a string literal (the enum name).
 837
 838   Args:
 839     field: Enum field descriptor.
 840     value: String value.
 841
 842   Returns:
 843     Enum value number.
 844
 845   Raises:
 846     ValueError: If the enum value could not be parsed.
 847   """
 848   enum_descriptor = field.enum_type
 849   try:
 850     number = int(value, 0)
 851   except ValueError:
 852
 853     enum_value = enum_descriptor.values_by_name.get(value, None)
 854     if enum_value is None:
 855       raise ValueError(
 856           'Enum type "%s" has no value named %s.' % (
 857               enum_descriptor.full_name, value))
 858   else:
 859
 860     enum_value = enum_descriptor.values_by_number.get(number, None)
 861     if enum_value is None:
 862       raise ValueError(
 863           'Enum type "%s" has no value with number %d.' % (
 864               enum_descriptor.full_name, number))
 865   return enum_value.number