1 # Copyright (C) 2003-2007, 2009, 2010 Nominum, Inc.
3 # Permission to use, copy, modify, and distribute this software and its
4 # documentation for any purpose with or without fee is hereby granted,
5 # provided that the above copyright notice and this permission notice
6 # appear in all copies.
8 # THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
9 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
11 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
14 # OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 """Tokenize DNS master file format"""
34 _QUOTING_DELIMITERS
= { '"' : True }
44 class UngetBufferFull(dns
.exception
.DNSException
):
45 """Raised when an attempt is made to unget a token when the unget
50 """A DNS master file format token.
52 @ivar ttype: The token type
54 @ivar value: The token value
56 @ivar has_escape: Does the token value contain escapes?
57 @type has_escape: bool
60 def __init__(self
, ttype
, value
='', has_escape
=False):
61 """Initialize a token instance.
63 @param ttype: The token type
65 @ivar value: The token value
67 @ivar has_escape: Does the token value contain escapes?
68 @type has_escape: bool
72 self
.has_escape
= has_escape
75 return self
.ttype
== EOF
78 return self
.ttype
== EOL
80 def is_whitespace(self
):
81 return self
.ttype
== WHITESPACE
83 def is_identifier(self
):
84 return self
.ttype
== IDENTIFIER
86 def is_quoted_string(self
):
87 return self
.ttype
== QUOTED_STRING
90 return self
.ttype
== COMMENT
92 def is_delimiter(self
):
93 return self
.ttype
== DELIMITER
95 def is_eol_or_eof(self
):
96 return (self
.ttype
== EOL
or self
.ttype
== EOF
)
98 def __eq__(self
, other
):
99 if not isinstance(other
, Token
):
101 return (self
.ttype
== other
.ttype
and
102 self
.value
== other
.value
)
104 def __ne__(self
, other
):
105 if not isinstance(other
, Token
):
107 return (self
.ttype
!= other
.ttype
or
108 self
.value
!= other
.value
)
111 return '%d "%s"' % (self
.ttype
, self
.value
)
114 if not self
.has_escape
:
124 raise dns
.exception
.UnexpectedEnd
129 raise dns
.exception
.UnexpectedEnd
133 raise dns
.exception
.UnexpectedEnd
136 if not (c2
.isdigit() and c3
.isdigit()):
137 raise dns
.exception
.SyntaxError
138 c
= chr(int(c
) * 100 + int(c2
) * 10 + int(c3
))
140 return Token(self
.ttype
, unescaped
)
142 # compatibility for old-style tuple tokens
148 return iter((self
.ttype
, self
.value
))
150 def __getitem__(self
, i
):
158 class Tokenizer(object):
159 """A DNS master file format tokenizer.
161 A token is a (type, value) tuple, where I{type} is an int, and
162 I{value} is a string. The valid types are EOF, EOL, WHITESPACE,
163 IDENTIFIER, QUOTED_STRING, COMMENT, and DELIMITER.
165 @ivar file: The file to tokenize
167 @ivar ungotten_char: The most recently ungotten character, or None.
168 @type ungotten_char: string
169 @ivar ungotten_token: The most recently ungotten token, or None.
170 @type ungotten_token: (int, string) token tuple
171 @ivar multiline: The current multiline level. This value is increased
172 by one every time a '(' delimiter is read, and decreased by one every time
173 a ')' delimiter is read.
175 @ivar quoting: This variable is true if the tokenizer is currently
176 reading a quoted string.
178 @ivar eof: This variable is true if the tokenizer has encountered EOF.
180 @ivar delimiters: The current delimiter dictionary.
181 @type delimiters: dict
182 @ivar line_number: The current line number
183 @type line_number: int
184 @ivar filename: A filename that will be returned by the L{where} method.
185 @type filename: string
188 def __init__(self
, f
=sys
.stdin
, filename
=None):
189 """Initialize a tokenizer instance.
191 @param f: The file to tokenize. The default is sys.stdin.
192 This parameter may also be a string, in which case the tokenizer
193 will take its input from the contents of the string.
194 @type f: file or string
195 @param filename: the name of the filename that the L{where} method
197 @type filename: string
200 if isinstance(f
, str):
201 f
= cStringIO
.StringIO(f
)
203 filename
= '<string>'
211 self
.ungotten_char
= None
212 self
.ungotten_token
= None
216 self
.delimiters
= _DELIMITERS
218 self
.filename
= filename
221 """Read a character from input.
225 if self
.ungotten_char
is None:
229 c
= self
.file.read(1)
233 self
.line_number
+= 1
235 c
= self
.ungotten_char
236 self
.ungotten_char
= None
240 """Return the current location in the input.
242 @rtype: (string, int) tuple. The first item is the filename of
243 the input, the second is the current line number.
246 return (self
.filename
, self
.line_number
)
248 def _unget_char(self
, c
):
249 """Unget a character.
251 The unget buffer for characters is only one character large; it is
252 an error to try to unget a character when the unget buffer is not
255 @param c: the character to unget
257 @raises UngetBufferFull: there is already an ungotten char
260 if not self
.ungotten_char
is None:
261 raise UngetBufferFull
262 self
.ungotten_char
= c
264 def skip_whitespace(self
):
265 """Consume input until a non-whitespace character is encountered.
267 The non-whitespace character is then ungotten, and the number of
268 whitespace characters consumed is returned.
270 If the tokenizer is in multiline mode, then newlines are whitespace.
278 if c
!= ' ' and c
!= '\t':
279 if (c
!= '\n') or not self
.multiline
:
284 def get(self
, want_leading
= False, want_comment
= False):
285 """Get the next token.
287 @param want_leading: If True, return a WHITESPACE token if the
288 first character read is whitespace. The default is False.
289 @type want_leading: bool
290 @param want_comment: If True, return a COMMENT token if the
291 first token read is a comment. The default is False.
292 @type want_comment: bool
294 @raises dns.exception.UnexpectedEnd: input ended prematurely
295 @raises dns.exception.SyntaxError: input was badly formed
298 if not self
.ungotten_token
is None:
299 token
= self
.ungotten_token
300 self
.ungotten_token
= None
301 if token
.is_whitespace():
304 elif token
.is_comment():
309 skipped
= self
.skip_whitespace()
310 if want_leading
and skipped
> 0:
311 return Token(WHITESPACE
, ' ')
317 if c
== '' or c
in self
.delimiters
:
318 if c
== '' and self
.quoting
:
319 raise dns
.exception
.UnexpectedEnd
320 if token
== '' and ttype
!= QUOTED_STRING
:
323 self
.skip_whitespace()
326 if not self
.multiline
> 0:
327 raise dns
.exception
.SyntaxError
329 self
.skip_whitespace()
334 self
.delimiters
= _QUOTING_DELIMITERS
335 ttype
= QUOTED_STRING
339 self
.delimiters
= _DELIMITERS
340 self
.skip_whitespace()
343 return Token(EOL
, '\n')
347 if c
== '\n' or c
== '':
352 return Token(COMMENT
, token
)
355 raise dns
.exception
.SyntaxError('unbalanced parentheses')
358 self
.skip_whitespace()
362 return Token(EOL
, '\n')
364 # This code exists in case we ever want a
365 # delimiter to be returned. It never produces
376 raise dns
.exception
.UnexpectedEnd
378 c2
= self
._get
_char
()
380 raise dns
.exception
.UnexpectedEnd
381 c3
= self
._get
_char
()
383 raise dns
.exception
.UnexpectedEnd
384 if not (c2
.isdigit() and c3
.isdigit()):
385 raise dns
.exception
.SyntaxError
386 c
= chr(int(c
) * 100 + int(c2
) * 10 + int(c3
))
388 raise dns
.exception
.SyntaxError('newline in quoted string')
391 # It's an escape. Put it and the next character into
392 # the token; it will be checked later for goodness.
397 if c
== '' or c
== '\n':
398 raise dns
.exception
.UnexpectedEnd
400 if token
== '' and ttype
!= QUOTED_STRING
:
402 raise dns
.exception
.SyntaxError('unbalanced parentheses')
404 return Token(ttype
, token
, has_escape
)
406 def unget(self
, token
):
409 The unget buffer for tokens is only one token large; it is
410 an error to try to unget a token when the unget buffer is not
413 @param token: the token to unget
414 @type token: Token object
415 @raises UngetBufferFull: there is already an ungotten token
418 if not self
.ungotten_token
is None:
419 raise UngetBufferFull
420 self
.ungotten_token
= token
423 """Return the next item in an iteration.
424 @rtype: (int, string)
438 """Read the next token and interpret it as an integer.
440 @raises dns.exception.SyntaxError:
444 token
= self
.get().unescape()
445 if not token
.is_identifier():
446 raise dns
.exception
.SyntaxError('expecting an identifier')
447 if not token
.value
.isdigit():
448 raise dns
.exception
.SyntaxError('expecting an integer')
449 return int(token
.value
)
452 """Read the next token and interpret it as an 8-bit unsigned
455 @raises dns.exception.SyntaxError:
459 value
= self
.get_int()
460 if value
< 0 or value
> 255:
461 raise dns
.exception
.SyntaxError('%d is not an unsigned 8-bit integer' % value
)
464 def get_uint16(self
):
465 """Read the next token and interpret it as a 16-bit unsigned
468 @raises dns.exception.SyntaxError:
472 value
= self
.get_int()
473 if value
< 0 or value
> 65535:
474 raise dns
.exception
.SyntaxError('%d is not an unsigned 16-bit integer' % value
)
477 def get_uint32(self
):
478 """Read the next token and interpret it as a 32-bit unsigned
481 @raises dns.exception.SyntaxError:
485 token
= self
.get().unescape()
486 if not token
.is_identifier():
487 raise dns
.exception
.SyntaxError('expecting an identifier')
488 if not token
.value
.isdigit():
489 raise dns
.exception
.SyntaxError('expecting an integer')
490 value
= long(token
.value
)
491 if value
< 0 or value
> 4294967296L:
492 raise dns
.exception
.SyntaxError('%d is not an unsigned 32-bit integer' % value
)
495 def get_string(self
, origin
=None):
496 """Read the next token and interpret it as a string.
498 @raises dns.exception.SyntaxError:
502 token
= self
.get().unescape()
503 if not (token
.is_identifier() or token
.is_quoted_string()):
504 raise dns
.exception
.SyntaxError('expecting a string')
507 def get_identifier(self
, origin
=None):
508 """Read the next token and raise an exception if it is not an identifier.
510 @raises dns.exception.SyntaxError:
514 token
= self
.get().unescape()
515 if not token
.is_identifier():
516 raise dns
.exception
.SyntaxError('expecting an identifier')
519 def get_name(self
, origin
=None):
520 """Read the next token and interpret it as a DNS name.
522 @raises dns.exception.SyntaxError:
523 @rtype: dns.name.Name object"""
526 if not token
.is_identifier():
527 raise dns
.exception
.SyntaxError('expecting an identifier')
528 return dns
.name
.from_text(token
.value
, origin
)
531 """Read the next token and raise an exception if it isn't EOL or
534 @raises dns.exception.SyntaxError:
539 if not token
.is_eol_or_eof():
540 raise dns
.exception
.SyntaxError('expected EOL or EOF, got %d "%s"' % (token
.ttype
, token
.value
))
544 token
= self
.get().unescape()
545 if not token
.is_identifier():
546 raise dns
.exception
.SyntaxError('expecting an identifier')
547 return dns
.ttl
.from_text(token
.value
)