1 require 'voodoo/validator'
5 # The parser reads Voodoo[http://inglorion.net/documents/designs/voodoo/]
6 # source code and turns it into Ruby[http://www.ruby-lang.org/] objects.
8 # The public interface to Parser consists of the methods #new and
13 # require 'voodoo/parser'
15 # File.open('example.voo') do |infile|
16 # parser = Voodoo::Parser.new infile
18 # while (element = parser.parse_top_level)
19 # puts element.inspect
23 NUMBER_STARTER = /\d|-/
25 SYMBOL_STARTER = /[[:alpha:]]|\\/
27 # Creates a parser using the specified object as input.
28 # The input object must support a method +getc+, which must
29 # return the next character of the input, or +nil+ to indicate
30 # the end of the input has been reached.
33 @input_name = input.respond_to?(:path) ? input.path : nil
34 @start_line = @line = 1
35 @start_column = @column = 0
40 # Base class for errors reported from the parser.
41 # This provides methods to get the name of the input being processed,
42 # as well as the start_line, start_column, and text of the code
43 # that triggered the error.
44 class Error < StandardError
45 def initialize message, input_name, start_line, start_column, text
47 @input_name = input_name
48 @start_line = start_line
49 @start_column = start_column
53 attr_reader :input_name, :start_line, :start_column, :text
56 # Class for parse errors.
57 # A ParseError indicates an error in the code being parsed.
58 # For other errors that the parser may raise, see ParserInternalError.
59 class ParseError < Parser::Error
60 def initialize message, input_name, start_line, start_column, text
61 super message, input_name, start_line, start_column, text
65 # Class for parser internal errors.
66 # A ParserInternalError indicates an error in the parser that is not
67 # flagged as an error in the code being parsed. Possible causes
68 # include I/O errors while reading code, as well as bugs in the
71 # The +cause+ attribute indicates the initial cause for the error.
72 # The other attributes of ParserInternalError are inherited from
73 # Parser::Error and indicate the input that was being
74 # processed when the error occurred.
75 class ParserInternalError < Parser::Error
76 def initialize cause, input_name, start_line, start_column, text
77 super cause.message, input_name, start_line, start_column, text
84 # Class wrapping multiple Parser::Errors.
85 class MultipleErrors < Parser::Error
88 super(nil, errors[0].input_name, errors[0].start_line,
89 errors[0].start_column, nil)
96 msg = "Multiple errors:\n\n"
97 @errors.each do |error|
98 msg << error.input_name << ":" if error.input_name
99 msg << "#{error.start_line}: " << error.message
101 msg << "\n\n #{error.text.gsub("\n", "\n ")}"
112 texts = @errors.map {|error| error.text}
113 @text = texts.join "\n"
119 # Parses a top-level element.
120 # Returns an array containing the parts of the element.
122 # Some examples (Voodoo code, Ruby return values in comments):
125 # # [:section, :functions]
128 # # [:call, :foo, :x, 12]
131 # # [:set, :x, :add, :x, 42]
134 # # [:"set-byte", [:"@", :x], 1, 10]
141 # # [:ifeq, [:x, :y], [[:set, :z, :equal]], [[:set, :z, :"not-equal"]]]
150 # # [:function, [:x, :y], [:let, :z, :add, :x, :y], [:return, :z]]
155 # Skip whitespace, comments, and empty lines
156 skip_to_next_top_level
158 validate_top_level do
159 parse_top_level_nonvalidating
164 # Parses statements up to "end X". _kind_ should indicate the type
165 # of body being parsed: :block, :conditional, :function, or :group.
166 # Returns an array of statements.
173 kind_text = 'function definition'
175 kind_text = kind.to_s
177 # Groups are allowed to contain top-level statements.
178 # All other kinds aren't.
179 top_level = kind == :group
184 statement = parse_top_level_nonvalidating
187 parse_error "End of input while inside #{kind_text}", nil
189 elsif statement[0] == :end
192 elsif kind == :conditional && statement[0] == :else
193 # Done parsing body, but there is another one coming up
197 # Should be a normal statement. Validate it, then add it to body
200 Validator.validate_top_level statement
202 Validator.validate_statement statement
205 rescue Validator::ValidationError => e
206 magic_word = statement[0]
208 Validator::TOP_LEVELS.member?(magic_word) &&
209 !Validator::STATEMENTS.member?(magic_word)
210 parse_error "#{magic_word} is only allowed at top-level"
212 parse_error e.message
218 # Got some kind of error. Still try to parse the rest of the body.
223 # Raise error if we had just one.
224 # If we had more than one, raise a MultipleErrors instance.
225 if errors.length == 1
227 elsif errors.length > 1
228 raise MultipleErrors.new errors
235 # Parses an escape sequence.
236 # This method should be called while the lookahead is the escape
237 # character (backslash). It decodes the escape sequence and returns
238 # the result as a string.
245 parse_error "Unexpected end of input in escape sequence", nil
254 # \r is carriage return
258 # \xXX is byte with hex value XX
260 @column = @column + 2
263 result = [code].pack('H2')
265 # \<newline> is line continuation character
267 # Skip indentation of next line
268 while lookahead =~ /\s/
273 # Default to just passing on next character
282 # This method should be called while the lookahead is the first
283 # character of the number.
288 while lookahead =~ /\d/
297 # This method should be called while the lookahead is the opening
309 result << parse_escape
320 # This method should be called while the lookahead is the first
321 # character of the symbol name.
325 while lookahead != :eof
333 # Colon parsed as last character of the symbol name
350 # Consumes the current lookahead character.
351 # The character is appended to @text.
363 # Tests if a symbol is a label
365 symbol.to_s[-1] == ?:
368 # Tests if a symbol is a conditional starter.
369 def is_conditional? symbol
370 [:ifeq, :ifge, :ifgt, :ifle, :iflt, :ifne].member? symbol
373 # Returns the current lookahead character,
374 # or +:eof+ when the end of the input has been reached.
377 @lookahead = @input.getc
381 @lookahead = @lookahead.chr
382 @column = @column.succ
388 # Parses a conditional statement.
389 def parse_conditional1 condition, operands
390 # Parse first clause and condition for next clause
391 consequent, next_condition = split_if_clause parse_body(:conditional)
392 if next_condition == nil
395 elsif next_condition == :else
396 # Next clause is else without if
397 alternative = parse_body :conditional
399 # Next clause is else with if
400 alternative = [parse_conditional1(next_condition[0],
403 [condition, operands, consequent, alternative]
406 # Raises a ParseError at the current input position.
407 def parse_error message, text = @text
408 # Create the error object
409 error = ParseError.new(message, @input_name, @start_line,
412 # Set a backtrace to the calling method
413 error.set_backtrace caller
415 # If we are not at a new line, skip until the next line
416 while @column > 1 && lookahead != :eof
424 # Parses a top-level incantation without validating it.
425 def parse_top_level_nonvalidating
426 # Skip whitespace, comments, and empty lines
427 skip_to_next_top_level
433 word = try_parse_token
435 # Word is nil; that means we did not get a token
443 # Exit the loop, but only if the line wasn't empty
444 break unless words.empty?
447 while lookahead != :eof && lookahead != "\n"
451 parse_error "Unexpected character (#{lookahead}) in input"
454 # Word is not nil - we got a token
455 if words.empty? && word.kind_of?(::Symbol) && word.to_s[-1] == ?:
456 # First word is a label
457 words = [:label, word.to_s[0..-2].to_sym]
460 # Add word to statement
466 # We have a line of input. Conditionals and function declarations
467 # must be handled specially, because they consist of more than one
470 # Nothing to parse; return nil
472 elsif words[0] == :function
473 # Function declaration. Parse function body
474 body = parse_body :function
475 [:function, words[1..-1]] + body
476 elsif is_conditional?(words[0])
477 parse_conditional1 words[0], words[1..-1]
478 elsif words[0] == :block || words[0] == :group
479 body = parse_body words[0]
482 # Statement or data declaration; simply return it
487 # Skips whitespace, newlines, and comments before a top-level incantation.
488 def skip_to_next_top_level
499 while lookahead != :eof && lookahead != "\n"
508 # Consumes characters until a character other than space or tab is
511 while lookahead == " " || lookahead == "\t"
516 # Splits a parsed if-clause into two parts:
517 # 1. The list of statements making up the clause proper
518 # 2. The condition for the next clause:
519 # - If there is no next clause, nil
520 # - If the next clause is introduced by else without a condition, :else
521 # - If the next clause is introduced by else iflt x y, [:iflt [:x, :y]]
522 # - And so on for other if.. instances
523 def split_if_clause clause
525 if last.respond_to?(:[]) && last.length > 0 && last[0] == :else
526 clause = clause[0..-2]
529 [clause, [last[1], last[2..-1]]]
540 # Tries to parse a symbol, number, string, or at-expression. If
541 # such a token starts at the current position, it is parsed and returned.
542 # Else, nil is returned.
548 # Digit; parse number
551 # Letter, underscore, or backslash; parse symbol
552 # Note: \w matches digits, too, so keep this case after \d
555 # Double quote; parse string
558 # Parse at-expression.
559 # '@' must be followed by a number or symbol.
567 parse_error "Invalid character (#{lookahead}) " +
568 "in at-expression; expecting number or symbol"
573 # Must be followed by a symbol.
574 if lookahead !~ SYMBOL_STARTER
575 parse_error "'%' must be followed by a symbol"
579 # No valid starter for a token, return nil
584 # Evaluates _block_ and checks that the result is a valid top-level
586 def validate_top_level &block
591 Validator.validate_top_level result
594 rescue Validator::ValidationError => e
595 parse_error e.message
600 # Evaluates block, keeping track of @start_line, @start_column
601 # at the beginning of the block, and @text during the evaluation
603 def with_position &block
605 old_line = @start_line
606 old_column = @start_column
609 # Evaluate block with new values
612 @start_column = @column
619 @start_line = old_line
620 @start_column = old_column
621 @text = old_text + @text
625 # Ensures that any exceptions that escape from block are instances of
627 def wrap_exceptions &block
631 # Already an instance of Parser::Error; pass it through.
634 # Some other error; wrap in ParserInternalError.
635 wrapped = ParserInternalError.new(e, @input_name, @line,
637 wrapped.set_backtrace e.backtrace