1 require 'voodoo/validator'
5 # The parser reads Voodoo[http://inglorion.net/documents/designs/voodoo/]
6 # source code and turns it into Ruby[http://www.ruby-lang.org/] objects.
8 # The public interface to Parser consists of the methods #new and
13 # require 'voodoo/parser'
15 # File.open('example.voo') do |infile|
16 # parser = Voodoo::Parser.new infile
18 # while (element = parser.parse_top_level)
19 # puts element.inspect
23 NUMBER_STARTER = /\d|-/
25 SYMBOL_STARTER = /[[:alpha:]]|\\/
27 # Creates a parser using the specified object as input.
28 # The input object must support a method +getc+, which must
29 # return the next character of the input, or +nil+ to indicate
30 # the end of the input has been reached.
33 @input_name = input.respond_to?(:path) ? input.path : nil
34 @start_line = @line = 1
35 @start_column = @column = 0
40 # Base class for errors reported from the parser.
41 # This provides methods to get the name of the input being processed,
42 # as well as the start_line, start_column, and text of the code
43 # that triggered the error.
44 class Error < StandardError
45 def initialize message, input_name, start_line, start_column, text
47 @input_name = input_name
48 @start_line = start_line
49 @start_column = start_column
53 attr_reader :input_name, :start_line, :start_column, :text
56 # Class for parse errors.
57 # A ParseError indicates an error in the code being parsed.
58 # For other errors that the parser may raise, see ParserInternalError.
59 class ParseError < Parser::Error
60 def initialize message, input_name, start_line, start_column, text
61 super message, input_name, start_line, start_column, text
65 # Class for parser internal errors.
66 # A ParserInternalError indicates an error in the parser that is not
67 # flagged as an error in the code being parsed. Possible causes
68 # include I/O errors while reading code, as well as bugs in the
71 # The +cause+ attribute indicates the initial cause for the error.
72 # The other attributes of ParserInternalError are inherited from
73 # Parser::Error and indicate the input that was being
74 # processed when the error occurred.
75 class ParserInternalError < Parser::Error
76 def initialize cause, input_name, start_line, start_column, text
77 super cause.message, input_name, start_line, start_column, text
84 # Class wrapping multiple Parser::Errors.
85 class MultipleErrors < Parser::Error
88 super(nil, errors[0].input_name, errors[0].start_line,
89 errors[0].start_column, nil)
96 msg = "Multiple errors:\n\n"
97 @errors.each do |error|
98 msg << error.input_name << ":" if error.input_name
99 msg << "#{error.start_line}: " << error.message
101 msg << "\n\n #{error.text.gsub("\n", "\n ")}"
112 texts = @errors.map {|error| error.text}
113 @text = texts.join "\n"
119 # Parses a top-level element.
120 # Returns an array containing the parts of the element.
122 # Some examples (Voodoo code, Ruby return values in comments):
125 # # [:section, :functions]
128 # # [:call, :foo, :x, 12]
131 # # [:set, :x, :add, :x, 42]
134 # # [:"set-byte", [:"@", :x], 1, 10]
141 # # [:ifeq, [:x, :y], [[:set, :z, :equal]], [[:set, :z, :"not-equal"]]]
150 # # [:function, [:x, :y], [:let, :z, :add, :x, :y], [:return, :z]]
155 # Skip whitespace, comments, and empty lines
156 skip_to_next_top_level
158 validate_top_level do
159 parse_top_level_nonvalidating
164 # Parses a body for a function or a conditional
171 kind_text = 'function definition'
173 kind_text = kind.to_s
179 statement = parse_top_level_nonvalidating
182 parse_error "End of input while inside #{kind_text}", nil
184 elsif statement[0] == :end
187 elsif kind == :conditional && statement[0] == :else
188 # Done parsing body, but there is another one coming up
192 # Should be a normal statement. Validate it, then add it to body
193 if statement[0] == :function
194 parse_error "Function definitions are only allowed at top-level"
197 Validator.validate_statement statement
199 rescue Validator::ValidationError => e
200 parse_error e.message
205 # Got some kind of error. Still try to parse the rest of the body.
210 # Raise error if we had just one.
211 # If we had more than one, raise a MultipleErrors instance.
212 if errors.length == 1
214 elsif errors.length > 1
215 raise MultipleErrors.new errors
222 # Parses an escape sequence.
223 # This method should be called while the lookahead is the escape
224 # character (backslash). It decodes the escape sequence and returns
225 # the result as a string.
232 parse_error "Unexpected end of input in escape sequence", nil
241 # \r is carriage return
245 # \xXX is byte with hex value XX
247 @column = @column + 2
250 result = [code].pack('H2')
252 # \<newline> is line continuation character
254 # Skip indentation of next line
255 while lookahead =~ /\s/
260 # Default to just passing on next character
269 # This method should be called while the lookahead is the first
270 # character of the number.
275 while lookahead =~ /\d/
284 # This method should be called while the lookahead is the opening
296 result << parse_escape
307 # This method should be called while the lookahead is the first
308 # character of the symbol name.
312 while lookahead != :eof
320 # Colon parsed as last character of the symbol name
337 # Consumes the current lookahead character.
338 # The character is appended to @text.
350 # Tests if a symbol is a label
352 symbol.to_s[-1] == ?:
355 # Tests if a symbol is a conditional starter
356 def is_conditional? symbol
357 [:ifeq, :ifge, :ifgt, :ifle, :iflt, :ifne].member? symbol
360 # Returns the current lookahead character,
361 # or +:eof+ when the end of the input has been reached.
364 @lookahead = @input.getc
368 @lookahead = @lookahead.chr
369 @column = @column.succ
375 # Parses a conditional statement
376 def parse_conditional1 condition, operands
377 # Parse first clause and condition for next clause
378 consequent, next_condition = split_if_clause parse_body(:conditional)
379 if next_condition == nil
382 elsif next_condition == :else
383 # Next clause is else without if
384 alternative = parse_body :conditional
386 # Next clause is else with if
387 alternative = [parse_conditional1(next_condition[0],
390 [condition, operands, consequent, alternative]
393 # Raises a ParseError at the current input position
394 def parse_error message, text = @text
395 # Create the error object
396 error = ParseError.new(message, @input_name, @start_line,
399 # Set a backtrace to the calling method
400 error.set_backtrace caller
402 # If we are not at a new line, skip until the next line
403 while @column > 1 && lookahead != :eof
411 # Parses a top-level directive without validating it
412 def parse_top_level_nonvalidating
413 # Skip whitespace, comments, and empty lines
414 skip_to_next_top_level
420 word = try_parse_token
422 # Word is nil; that means we did not get a token
430 # Exit the loop, but only if the line wasn't empty
431 break unless words.empty?
434 while lookahead != :eof && lookahead != "\n"
438 parse_error "Unexpected character (#{lookahead}) in input"
441 # Word is not nil - we got a token
442 if words.empty? && word.kind_of?(::Symbol) && word.to_s[-1] == ?:
443 # First word is a label
444 words = [:label, word.to_s[0..-2].to_sym]
447 # Add word to statement
453 # We have a line of input. Conditionals and function declarations
454 # must be handled specially, because they consist of more than one
457 # Nothing to parse; return nil
459 elsif words[0] == :function
460 # Function declaration. Parse function body
461 body = parse_body :function
462 [:function, words[1..-1]] + body
463 elsif is_conditional?(words[0])
464 parse_conditional1 words[0], words[1..-1]
465 elsif words[0] == :block
466 body = parse_body :block
469 # Statement or data declaration; simply return it
474 # Skips whitespace, newlines, and comments before a top-level directive
475 def skip_to_next_top_level
486 while lookahead != :eof && lookahead != "\n"
495 # Consumes characters until a character other than space or tab is
498 while lookahead == " " || lookahead == "\t"
503 # Splits a parsed if-clause into two parts:
504 # 1. The list of statements making up the clause proper
505 # 2. The condition for the next clause:
506 # - If there is no next clause, nil
507 # - If the next clause is introduced by else without a condition, :else
508 # - If the next clause is introduced by else iflt x y, [:iflt [:x, :y]]
509 # - And so on for other if.. instances
510 def split_if_clause clause
512 if last.respond_to?(:[]) && last.length > 0 && last[0] == :else
513 clause = clause[0..-2]
516 [clause, [last[1], last[2..-1]]]
527 # Tries to parse a symbol, number, string, or at-expression. If
528 # such a token starts at the current position, it is parsed and returned.
529 # Else, nil is returned.
535 # Digit; parse number
538 # Letter, underscore, or backslash; parse symbol
539 # Note: \w matches digits, too, so keep this case after \d
542 # Double quote; parse string
545 # Parse at-expression.
546 # '@' must be followed by a number or symbol.
554 parse_error "Invalid character (#{lookahead}) " +
555 "in at-expression; expecting number or symbol"
560 # Must be followed by a symbol.
561 if lookahead !~ SYMBOL_STARTER
562 parse_error "'%' must be followed by a symbol"
566 # No valid starter for a token, return nil
571 # Evaluate block and check that the result is a valid top-level
573 def validate_top_level &block
578 Validator.validate_top_level result
581 rescue Validator::ValidationError => e
582 parse_error e.message
587 # Evaluate block, keeping track of @start_line, @start_column
588 # at the beginning of the block, and @text during the evaluation
590 def with_position &block
592 old_line = @start_line
593 old_column = @start_column
596 # Evaluate block with new values
599 @start_column = @column
606 @start_line = old_line
607 @start_column = old_column
608 @text = old_text + @text
612 # Ensures that any exceptions that escape from block are instances of
614 def wrap_exceptions &block
618 # Already an instance of Parser::Error; pass it through.
621 # Some other error; wrap in ParserInternalError.
622 wrapped = ParserInternalError.new(e, @input_name, @line,
624 wrapped.set_backtrace e.backtrace