1 require 'voodoo/validator'
5 # The parser reads Voodoo[http://inglorion.net/documents/designs/voodoo/]
6 # source code and turns it into Ruby[http://www.ruby-lang.org/] objects.
8 # The public interface to Parser consists of the methods #new and
13 # require 'voodoo/parser'
15 # File.open('example.voo') do |infile|
16 # parser = Voodoo::Parser.new infile
18 # while (element = parser.parse_top_level)
19 # puts element.inspect
23 NUMBER_STARTER = /\d|-/
25 SYMBOL_STARTER = /[[:alpha:]]|_|\\/
27 # Creates a parser using the specified object as input.
28 # The input object must support a method +getc+, which must
29 # return the next character of the input, or +nil+ to indicate
30 # the end of the input has been reached.
33 @input_name = input.respond_to?(:path) ? input.path : nil
34 @start_line = @line = 1
35 @start_column = @column = 0
40 # Base class for errors reported from the parser.
41 # This provides methods to get the name of the input being processed,
42 # as well as the start_line, start_column, and text of the code
43 # that triggered the error.
44 class Error < Voodoo::Error
45 def initialize message, input_name, start_line, start_column, text
47 @input_name = input_name
48 @start_line = start_line
49 @start_column = start_column
53 attr_reader :input_name, :start_line, :start_column, :text
56 # Class for parse errors.
57 # A ParseError indicates an error in the code being parsed.
58 # For other errors that the parser may raise, see ParserInternalError.
59 class ParseError < Parser::Error
60 def initialize message, input_name, start_line, start_column, text
61 super message, input_name, start_line, start_column, text
65 # Class for parser internal errors.
66 # A ParserInternalError indicates an error in the parser that is not
67 # flagged as an error in the code being parsed. Possible causes
68 # include I/O errors while reading code, as well as bugs in the
71 # The +cause+ attribute indicates the initial cause for the error.
72 # The other attributes of ParserInternalError are inherited from
73 # Parser::Error and indicate the input that was being
74 # processed when the error occurred.
75 class ParserInternalError < Parser::Error
76 def initialize cause, input_name, start_line, start_column, text
77 super cause.message, input_name, start_line, start_column, text
84 # Class wrapping multiple Parser::Errors.
85 class MultipleErrors < Parser::Error
88 super(nil, errors[0].input_name, errors[0].start_line,
89 errors[0].start_column, nil)
96 msg = "Multiple errors:\n\n"
97 @errors.each do |error|
98 msg << error.input_name << ":" if error.input_name
99 msg << "#{error.start_line}: " << error.message
101 msg << "\n\n #{error.text.gsub("\n", "\n ")}"
112 texts = @errors.map {|error| error.text}
113 @text = texts.join "\n"
119 # Parses a top-level element.
120 # Returns an array containing the parts of the element.
122 # Some examples (Voodoo code, Ruby return values in comments):
125 # # [:section, :functions]
128 # # [:call, :foo, :x, 12]
131 # # [:set, :x, :add, :x, 42]
134 # # [:"set-byte", [:"@", :x], 1, 10]
141 # # [:ifeq, [:x, :y], [[:set, :z, :equal]], [[:set, :z, :"not-equal"]]]
150 # # [:function, [:x, :y], [:let, :z, :add, :x, :y], [:return, :z]]
155 # Skip whitespace, comments, and empty lines
156 skip_to_next_top_level
158 validate_top_level do
159 parse_top_level_nonvalidating
164 # Parses statements up to "end X". _kind_ should indicate the type
165 # of body being parsed: :block, :conditional, :function, or :group.
166 # Returns an array of statements.
173 kind_text = 'function definition'
175 kind_text = kind.to_s
177 # Groups are allowed to contain top-level statements.
178 # All other kinds aren't.
179 top_level = kind == :group
184 statement = parse_top_level_nonvalidating
187 parse_error "End of input while inside #{kind_text}", nil
189 elsif statement[0] == :end
192 elsif kind == :conditional && statement[0] == :else
193 # Done parsing body, but there is another one coming up
197 # Should be a normal statement. Validate it, then add it to body
200 Validator.validate_top_level statement
202 Validator.validate_statement statement
205 rescue Validator::ValidationError => e
206 magic_word = statement[0]
208 Validator::TOP_LEVELS.member?(magic_word) &&
209 !Validator::STATEMENTS.member?(magic_word)
210 parse_error "#{magic_word} is only allowed at top-level"
212 parse_error e.message
218 # Got some kind of error. Still try to parse the rest of the body.
223 # Raise error if we had just one.
224 # If we had more than one, raise a MultipleErrors instance.
225 if errors.length == 1
227 elsif errors.length > 1
228 raise MultipleErrors.new errors
235 # Parses an escape sequence.
236 # This method should be called while the lookahead is the escape
237 # character (backslash). It decodes the escape sequence and returns
238 # the result as a string.
245 parse_error "Unexpected end of input in escape sequence", nil
254 # \r is carriage return
258 # \xXX is byte with hex value XX
260 @column = @column + 2
263 result = [code].pack('H2')
265 # \<newline> is line continuation character
267 # Skip indentation of next line
268 while lookahead =~ /\s/
273 # Default to just passing on next character
282 # This method should be called while the lookahead is the first
283 # character of the number.
288 while lookahead =~ /\d/
297 # This method should be called while the lookahead is the opening
309 result << parse_escape
320 # This method should be called while the lookahead is the first
321 # character of the symbol name.
326 # Continues parsing a symbol.
327 # +name+ the part of the symbol that has already been parsed.
328 def parse_symbol1 name
330 while lookahead != :eof
338 # Colon parsed as last character of the symbol name
355 # Consumes the current lookahead character.
356 # The character is appended to @text.
368 # Tests if a symbol is a label
370 symbol.to_s[-1] == ?:
373 # Tests if a symbol is a conditional starter.
374 def is_conditional? symbol
375 [:ifeq, :ifge, :ifgt, :ifle, :iflt, :ifne].member? symbol
378 # Returns the current lookahead character,
379 # or +:eof+ when the end of the input has been reached.
382 @lookahead = @input.getc
386 @lookahead = @lookahead.chr
387 @column = @column.succ
393 # Parses a conditional statement.
394 def parse_conditional1 condition, operands
395 # Parse first clause and condition for next clause
396 consequent, next_condition = split_if_clause parse_body(:conditional)
397 if next_condition == nil
400 elsif next_condition == :else
401 # Next clause is else without if
402 alternative = parse_body :conditional
404 # Next clause is else with if
405 alternative = [parse_conditional1(next_condition[0],
408 [condition, operands, consequent, alternative]
411 # Raises a ParseError at the current input position.
412 def parse_error message, text = @text
413 # Create the error object
414 error = ParseError.new(message, @input_name, @start_line,
417 # Set a backtrace to the calling method
418 error.set_backtrace caller
420 # If we are not at a new line, skip until the next line
421 while @column > 1 && lookahead != :eof
429 # Parses a top-level incantation without validating it.
430 def parse_top_level_nonvalidating
431 # Skip whitespace, comments, and empty lines
432 skip_to_next_top_level
438 word = try_parse_token
440 # Word is nil; that means we did not get a token
448 # Exit the loop, but only if the line wasn't empty
449 break unless words.empty?
452 while lookahead != :eof && lookahead != "\n"
456 parse_error "Unexpected character (#{lookahead}) in input"
459 # Word is not nil - we got a token
460 if words.empty? && word.kind_of?(::Symbol) && word.to_s[-1] == ?:
461 # First word is a label
462 words = [:label, word.to_s[0..-2].to_sym]
465 # Add word to statement
471 # We have a line of input. Conditionals and function declarations
472 # must be handled specially, because they consist of more than one
475 # Nothing to parse; return nil
477 elsif words[0] == :function
478 # Function declaration. Parse function body
479 body = parse_body :function
480 [:function, words[1..-1]] + body
481 elsif is_conditional?(words[0])
482 parse_conditional1 words[0], words[1..-1]
483 elsif words[0] == :block || words[0] == :group
484 body = parse_body words[0]
487 # Statement or data declaration; simply return it
492 # Skips whitespace, newlines, and comments before a top-level incantation.
493 def skip_to_next_top_level
504 while lookahead != :eof && lookahead != "\n"
513 # Consumes characters until a character other than space or tab is
516 while lookahead == " " || lookahead == "\t"
521 # Splits a parsed if-clause into two parts:
522 # 1. The list of statements making up the clause proper
523 # 2. The condition for the next clause:
524 # - If there is no next clause, nil
525 # - If the next clause is introduced by else without a condition, :else
526 # - If the next clause is introduced by else iflt x y, [:iflt [:x, :y]]
527 # - And so on for other if.. instances
528 def split_if_clause clause
530 if last.respond_to?(:[]) && last.length > 0 && last[0] == :else
531 clause = clause[0..-2]
534 [clause, [last[1], last[2..-1]]]
545 # Tries to parse a symbol, number, string, or at-expression. If
546 # such a token starts at the current position, it is parsed and returned.
547 # Else, nil is returned.
553 # Digit; parse number
556 # Check if this is the line continuation escape or some other escape.
557 decoded = parse_escape
559 # Line continuation. Now that we've parsed that, try again.
562 # Some other escape. That means it's a symbol.
563 parse_symbol1 decoded
566 # Letter, underscore, or backslash; parse symbol
567 # Note: SYMBOL_STARTER matches digits and backslashes, too, so
568 # keep it after the cases that match those.
571 # Double quote; parse string
574 # Parse at-expression.
575 # '@' must be followed by a number or symbol.
583 parse_error "Invalid character (#{lookahead}) " +
584 "in at-expression; expecting number or symbol"
589 # Must be followed by a symbol.
590 if lookahead !~ SYMBOL_STARTER
591 parse_error "'%' must be followed by a symbol"
595 # No valid starter for a token, return nil
600 # Evaluates _block_ and checks that the result is a valid top-level
602 def validate_top_level &block
607 Validator.validate_top_level result
610 rescue Validator::ValidationError => e
611 parse_error e.message
616 # Evaluates block, keeping track of @start_line, @start_column
617 # at the beginning of the block, and @text during the evaluation
619 def with_position &block
621 old_line = @start_line
622 old_column = @start_column
625 # Evaluate block with new values
628 @start_column = @column
635 @start_line = old_line
636 @start_column = old_column
637 @text = old_text + @text
641 # Ensures that any exceptions that escape from block are instances of
643 def wrap_exceptions &block
647 # Already an instance of Parser::Error; pass it through.
650 # Some other error; wrap in ParserInternalError.
651 wrapped = ParserInternalError.new(e, @input_name, @line,
653 wrapped.set_backtrace e.backtrace