1 require 'voodoo/validator'
5 # The parser reads Voodoo[http://inglorion.net/documents/designs/voodoo/]
6 # source code and turns it into Ruby[http://www.ruby-lang.org/] objects.
8 # The public interface to Parser consists of the methods #new and
13 # require 'voodoo/parser'
15 # File.open('example.voo') do |infile|
16 # parser = Voodoo::Parser.new infile
18 # while (element = parser.parse_top_level)
19 # puts element.inspect
23 # Creates a parser using the specified object as input.
24 # The input object must support a method +getc+, which must
25 # return the next character of the input, or +nil+ to indicate
26 # the end of the input has been reached.
29 @input_name = input.respond_to?(:path) ? input.path : nil
30 @start_line = @line = 1
31 @start_column = @column = 0
36 class ParseError < StandardError
37 def initialize message, input_name, start_line, start_column, text
39 @input_name = input_name
40 @start_line = start_line
41 @start_column = start_column
45 attr_reader :message, :input_name, :start_line, :start_column, :text
48 # Parses a top-level element.
49 # Returns an array containing the parts of the element.
50 # Each element of the array is a Symbol, a String, or an
53 # For a label, returns:
54 # [:label, label_name]
56 # For a function definition, returns:
57 # [:function, [formala, formalb, ...], statementa, statementb, ...]
59 # For a conditional, returns:
60 # [condition, expression, [truea, trueb, ...], [falsea, falseb, ...]]
63 # Skip whitespace, comments, and empty lines
64 skip_to_next_top_level
67 parse_top_level_nonvalidating
71 # Parses a body for a function or a conditional
77 kind_text = 'function definition'
85 statement = parse_top_level_nonvalidating
88 parse_error "End of input while inside #{kind_text}", nil
89 elsif statement[0] == :end
92 elsif kind == :conditional && statement[0] == :else
93 # Done parsing body, but there is another one coming up
97 # Should be a normal statement. Validate it, then add it to body
98 if statement[0] == :function
99 parse_error "Function definitions are only allowed at top-level"
102 Validator.validate_statement statement
104 rescue Validator::ValidationError => e
105 parse_error e.message
110 # Got some kind of error. Still try to parse the rest of the body.
111 # Save the error if it was the first one.
125 # Parses an escape sequence.
126 # This method should be called while the lookahead is the escape
127 # character (backslash). It decodes the escape sequence and returns
128 # the result as a string.
134 parse_error "Unexpected end of input in escape sequence", nil
143 # \r is carriage return
147 # \xXX is byte with hex value XX
149 @column = @column + 2
152 result = [code].pack('H2')
154 # \<newline> is line continuation character
156 # Skip indentation of next line
157 while lookahead =~ /\s/
162 # Default to just passing on next character
170 # This method should be called while the lookahead is the first
171 # character of the number.
175 while lookahead =~ /\d/
183 # This method should be called while the lookahead is the opening
194 result << parse_escape
204 # This method should be called while the lookahead is the first
205 # character of the symbol name.
216 # Colon parsed as last character of the symbol name
232 # Consumes the current lookahead character.
233 # The character is appended to @text.
240 @lookahead = @input.getc
241 @lookahead = :eof if @lookahead == nil
242 @column = @column.succ unless @lookahead == :eof
247 # Tests if a symbol is a label
249 symbol.to_s[-1] == ?:
252 # Tests if a symbol is a conditional starter
253 def is_conditional? symbol
254 [:ifeq, :ifge, :ifgt, :ifle, :iflt, :ifne].member? symbol
257 # Returns the current lookahead character,
258 # or +nil+ when the end of the input has been reached.
261 @lookahead = @input.getc
262 @column = @column.succ
272 # Parses a conditional statement
273 def parse_conditional1 condition, operands
274 # Parse first clause and condition for next clause
275 consequent, next_condition = split_if_clause parse_body(:conditional)
276 if next_condition == nil
279 elsif next_condition == :else
280 # Next clause is else without if
281 alternative = parse_body :conditional
283 # Next clause is else with if
284 alternative = [parse_conditional1(next_condition[0],
287 [condition, operands, consequent, alternative]
290 # Raises a ParseError at the current input position
291 def parse_error message, text = @text
292 # Create the error object
293 error = ParseError.new(message, @input_name, @start_line,
296 # Set a backtrace to the calling method
297 error.set_backtrace caller
299 # If we are not at a new line, skip until the next line
300 while @column != 1 && lookahead != :eof
308 # Parses a top-level directive without validating it
309 def parse_top_level_nonvalidating
310 # Skip whitespace, comments, and empty lines
311 skip_to_next_top_level
316 word = try_parse_token
318 # Word is nil; that means we did not get a token
326 # Exit the loop, but only if the line wasn't empty
327 break unless words.empty?
330 while lookahead != :eof && lookahead != "\n"
335 parse_error "Unexpected character (#{lookahead}) in input"
339 if words.empty? && word.kind_of?(::Symbol) && word.to_s[-1] == ?:
340 # First word is a label
341 words = [:label, word.to_s[0..-2].to_sym]
344 # Add word to statement
349 # We have a line of input. Conditionals and function declarations
350 # must be handled specially, because they consist of more than one
353 # Nothing to parse; return nil
355 elsif words[0] == :function
356 # Function declaration. Parse function body
357 body = parse_body :function
358 [:function, words[1..-1]] + body
359 elsif is_conditional?(words[0])
360 parse_conditional1 words[0], words[1..-1]
361 elsif words[0] == :block
362 body = parse_body :block
365 # Statement or data declaration; simply return it
370 # Skips whitespace, newlines, and comments before a top-level directive
371 def skip_to_next_top_level
382 while lookahead != :eof && lookahead != "\n"
391 # Consumes characters until a character other than space or tab is
394 while lookahead == " " || lookahead == "\t"
399 # Splits a parsed if-clause into two parts:
400 # 1. The list of statements making up the clause proper
401 # 2. The condition for the next clause:
402 # - If there is no next clause, nil
403 # - If the next clause is introduced by else without a condition, :else
404 # - If the next clause is introduced by else iflt x y, [:iflt [:x, :y]]
405 # - And so on for other if.. instances
406 def split_if_clause clause
408 if last.respond_to?(:[]) && last.length > 0 && last[0] == :else
409 clause = clause[0..-2]
412 [clause, [last[1], last[2..-1]]]
423 # Tries to parse a symbol, number, string, or at-expression. If
424 # such a token starts at the current position, it is parsed and returned.
425 # Else, nil is returned.
429 # Digit; parse number
432 # Letter, underscore, or backslash; parse symbol
433 # Note: \w matches digits, too, so keep this case after \d
436 # Double quote; parse string
439 # Parse at-expression.
440 # '@' must be followed by a number or symbol.
448 parse_error "Invalid character (#{lookahead}) " +
449 "in at-expression; expecting number or symbol"
453 # No valid starter for a token, return nil
458 # Evaluate block and check that the result is a valid top-level
460 def validate_top_level &block
465 Validator.validate_top_level result
468 rescue Validator::ValidationError => e
469 parse_error e.message
474 # Evaluate block, keeping track of @start_line, @start_column
475 # at the beginning of the block, and @text during the evaluation
477 def with_position &block
479 old_line = @start_line
480 old_column = @start_column
483 # Evaluate block with new values
486 @start_column = @column
491 @start_line = old_line
492 @start_column = old_column
493 @text = old_text + @text