From dc636a025413969046f71ef21e69136396329487 Mon Sep 17 00:00:00 2001 From: inglorion Date: Sat, 3 Oct 2009 10:56:24 +0200 Subject: [PATCH] Removed common NasmGenerator code from I386NasmGenerator. --- lib/ruby/voodoo/generators/amd64_nasm_generator.rb | 3 +- lib/ruby/voodoo/generators/i386_nasm_generator.rb | 891 ++++----------------- 2 files changed, 163 insertions(+), 731 deletions(-) rewrite lib/ruby/voodoo/generators/i386_nasm_generator.rb (83%) diff --git a/lib/ruby/voodoo/generators/amd64_nasm_generator.rb b/lib/ruby/voodoo/generators/amd64_nasm_generator.rb index ca78de9..6bfdc44 100644 --- a/lib/ruby/voodoo/generators/amd64_nasm_generator.rb +++ b/lib/ruby/voodoo/generators/amd64_nasm_generator.rb @@ -118,7 +118,6 @@ module Voodoo end end - # Emit function epilogue. # Emit function prologue. def emit_function_prologue formals = [] emit "push rbp\nmov rbp, rsp\n" @@ -154,7 +153,7 @@ module Voodoo (i >= 0 || func != args[x[1]]) # Save value newsym = @environment.gensym - let newsym, symbol + let newsym, arg # Change reference if i >= 0 args[i] = newsym diff --git a/lib/ruby/voodoo/generators/i386_nasm_generator.rb b/lib/ruby/voodoo/generators/i386_nasm_generator.rb dissimilarity index 83% index 277f3ec..b625cbd 100644 --- a/lib/ruby/voodoo/generators/i386_nasm_generator.rb +++ b/lib/ruby/voodoo/generators/i386_nasm_generator.rb @@ -1,729 +1,162 @@ -require 'voodoo/generators/common_code_generator' - -module Voodoo - # Call frames: - # - # argn - # : - # arg1 <-- ebp + 8 - # oldeip <-- ebp + 4 - # oldebp <-- ebp - # local1 <-- ebp - 4 - # local2 <-- ebp - 8 - # : - # localn <-- esp - - ## Class to generate i386 assembly code for the Netwide Assembler (NASM) - class I386NasmGenerator < NasmGenerator - WORDSIZE = 4 - - def initialize params - # Number of bytes in a word - @WORDSIZE = 4 - # Word name in NASM lingo - @WORD_NAME = 'dword' - # Default alignment for code - @CODE_ALIGNMENT = 0 - # Default alignment for data - @DATA_ALIGNMENT = @WORDSIZE - # Default alignment for functions - @FUNCTION_ALIGNMENT = 16 - # Register used for return values - @RETURN_REG = 'eax' - # Register used as scratch register - @SCRATCH_REG = 'ebx' - # Accumulator index - @AX = 'eax' - # Base index - @BX = 'ebx' - # Count index - @CX = 'ecx' - # Data index - @DX = 'edx' - super params - end - - def output_file_name input_name - input_name.sub(/\.voo$/, '') + '.asm' - end - - # Emit code for a binary operation - def binop op, target, x, y - if target == x - binop2 op, target, y - elsif symmetric_operation?(op) && y == target - binop2 op, target, x - else - # Cases that are handled specially - return div(target, x, y) if op == :div - return mod(target, x, y) if op == :mod - return mul(target, x, y) if op == :mul - - target_ref = load_value target, "eax" - x_ref = load_value x, "edx" - y_ref = load_value y, "ebx" - - if memory_operand?(target_ref) - if memory_operand?(x_ref) || memory_operand?(y_ref) - emit "mov ecx, #{x_ref}\n" - emit "#{op} ecx, #{y_ref}\n" - emit "mov #{target_ref}, ecx\n" - else - emit "mov dword #{target_ref}, #{x_ref}\n" - emit "#{op} dword #{target_ref}, #{y_ref}\n" - end - else - raise "Can't happen: target_ref is #{target_ref.inspect}" - end - end - end - - # Emit code for a binary operation where the first operand - # is also the target - def binop2 op, target, y - # Cases that are handled specially - return div2(target, y) if op == :div - return mod2(target, y) if op == :mod - return mul2(target, y) if op == :mul - - target_ref = load_value target, "ebx" - y_ref = load_value y, "edx" - if memory_operand?(target_ref) && memory_operand?(y_ref) - emit "mov eax, #{y_ref}\n" - emit "#{op} dword #{target_ref}, eax\n" - else - emit "#{op} dword #{target_ref}, #{y_ref}\n" - end - end - - # tests if a value is an at-reference - def at_reference? value - value.to_s[0] == ?@ - end - - # tests if op is a binary operation - def binop? op - [:div, :mod, :sub].member?(op) || symmetric_operation?(op) - end - - def byte value - emit "db #{value}\n" - end - - def call func, *args - emit "; call #{func} #{args.join ' '}\n" - revargs = args.reverse - revargs.each { |arg| push arg } - use_value "call", func - if args.length > 0 - emit "add esp, #{WORDSIZE * args.length}\n" - end - end - - # Emit a comment - def comment text - emit ";#{text}\n" - end - - # Divide x by y and store the result in target - def div target, x, y - eval_div x, y - target_ref = load_value target, "ebx" - emit "mov #{target_ref}, eax\n" - end - - # Divide target by x and store the result in target - def div2 target, x - eval_div target, x - target_ref = load_value target, "ebx" - emit "mov #{target_ref}, eax\n" - end - - # end a function body - def end_function - emit "; end function\n\n" - if @environment == @top_level - raise "Cannot end function when not in a function" - else - @environment = @top_level - end - end - - def end_if - label = @if_labels.pop - emit "#{label}:\n" - end - - # evaluates an expr and stores the result in eax - def eval_expr words - target_ref = "eax" - if words.length == 1 - if words[0] == '0' - emit "xor #{target_ref}, #{target_ref}\n" - else - value_ref = load_value words[0], "ebx" - emit "mov #{target_ref}, #{value_ref}\n" - end - else - op = words[0] - case op - when :call - call *words[1..-1] - when :div - eval_div words[1], words[2] - when :'get-byte' - emit "xor dword #{target_ref}, #{target_ref}\n" - address_ref = load_address words[1], words[2], 1 - emit "mov byte al, #{address_ref}\n" - when :'get-word' - value_ref = load_address words[1], words[2], WORDSIZE - emit "mov dword #{target_ref}, #{value_ref}\n" - when :mod - eval_div words[1], words[2] - emit "mov eax, edx\n" - when :mul - eval_mul target_ref, words[1], words[2] - when :not - x_ref = load_value words[1], "edx" - emit "mov #{target_ref}, #{x_ref}\n" - emit "not #{target_ref}\n" - else - if binop?(op) - x_ref = load_value words[1], "edx" - y_ref = load_value words[2], "ebx" - emit "mov #{target_ref}, #{x_ref}\n" - emit "#{op} #{target_ref}, #{y_ref}\n" - else - raise "Not a magic word: #{words[0]}" - end - end - end - end - - # Divide x by y, leaving the quotient in eax and the remainder in edx - def eval_div x, y - x_ref = load_value x, "ebx" - y_ref = load_value y, "ecx" - emit "mov eax, #{x_ref}\n" - emit "xor edx, edx\n" - if immediate_operand?(y_ref) - emit "mov ecx, #{y_ref}\n" - emit "idiv ecx\n" - else - emit "idiv dword #{y_ref}\n" - end - end - - # Multiply x by y and store the result in target - def eval_mul target_ref, x, y - # Assumes target_ref is not ebx or edx - x_ref = load_value x, "edx" - y_ref = load_value y, "ebx" - - if immediate_operand? x_ref - if immediate_operand? y_ref - emit "mov #{target_ref}, #{x_ref}\n" - emit "imul #{target_ref}, #{y_ref}\n" - else - emit "imul #{target_ref}, #{y_ref}, #{x_ref}\n" - end - elsif immediate_operand? y_ref - emit "imul #{target_ref}, #{x_ref}, #{y_ref}\n" - else - emit "mov #{target_ref}, #{x_ref}\n" - emit "imul dword #{y_ref}\n" - end - end - - def export *symbols - emit "global #{symbols.join ', '}\n" - end - - # start a function definition - def begin_function *args - emit "\n; function #{args.join ' '}\n" - environment = Environment.new @environment - environment.add_args args - @environment = environment - emit "push ebp\nmov ebp, esp\n" - end - - # tests if a symbol is a global variable - def global? symbol - @environment[symbol] == nil - end - - def goto value - emit "; goto #{value}\n" - value_ref = load_value value, "eax" - emit "jmp #{value_ref}\n" - end - - def ifelse - emit "; else\n" - newlabel = @environment.gensym - emit "jmp #{newlabel}\n" - label = @if_labels.pop - emit "#{label}:\n" - @if_labels.push newlabel - end - - def if_epilogue truelabel, falselabel - emit "jmp #{falselabel}\n" - emit "#{truelabel}:\n" - end - - # Tests if x is equal to y - def ifeq x, y - emit "; ifeq #{x} #{y}\n" - truelabel, falselabel = if_prologue x, y - emit "cmp eax, edx\n" - emit "je #{truelabel}\n" - if_epilogue truelabel, falselabel - end - - # Tests if x is greater than or equal to y - def ifge x, y - emit "; ifge #{x} #{y}\n" - truelabel, falselabel = if_prologue x, y - emit "cmp eax, edx\n" - emit "jge #{truelabel}\n" - if_epilogue truelabel, falselabel - end - - # Tests if x is strictly greater than y - def ifgt x, y - emit "; ifgt #{x} #{y}\n" - truelabel, falselabel = if_prologue x, y - emit "cmp eax, edx\n" - emit "jg #{truelabel}\n" - if_epilogue truelabel, falselabel - end - - # Tests if x is less than or equal to y - def ifle x, y - emit "; ifle #{x} #{y}\n" - truelabel, falselabel = if_prologue x, y - emit "cmp eax, edx\n" - emit "jle #{truelabel}\n" - if_epilogue truelabel, falselabel - end - - # Tests if x is strictly less than y - def iflt x, y - emit "; iflt #{x} #{y}\n" - truelabel, falselabel = if_prologue x, y - emit "cmp eax, edx\n" - emit "jl #{truelabel}\n" - if_epilogue truelabel, falselabel - end - - # Tests if x different from y - def ifne x, y - emit "; ifne #{x} #{y}\n" - truelabel, falselabel = if_prologue x, y - emit "cmp eax, edx\n" - emit "jne #{truelabel}\n" - if_epilogue truelabel, falselabel - end - - def if_prologue x, y = nil - ref2 = load_value y, "edx" if y - ref = load_value x, "eax" - emit "mov edx, #{ref2}\n" if y && ref2 != "edx" - emit "mov eax, #{ref}\n" unless ref == "eax" - - truelabel = @environment.gensym - falselabel = @environment.gensym - @if_labels.push falselabel - [truelabel, falselabel] - end - - # Tests if an operand is an immediate operand - def immediate_operand? operand - integer?(operand) || (global?(operand) && operand !~ /^e[abcd]x$/) - end - - def import *symbols - emit "extern #{symbols.join ', '}\n" - end - - # tests if a value is an integer - def integer? value - value.kind_of? Integer - end - - def label symbol - emit "#{symbol}:\n" - end - - def let symbol, *words - emit "; let #{symbol} #{words.join ' '}\n" - @environment.add_local symbol - eval_expr words - emit "push eax\n" - end - - def load_address base, offset, scale - base_ref = load_value base, "ebx" - offset_ref = load_value offset, "ecx" - - if offset_ref == '0' - if integer? base_ref - "[#{base_ref}]" - else - emit "mov ebx, #{base_ref}\n" - "[ebx]" - end - elsif base_ref == '0' - if integer? offset_ref - "[#{offset_ref.to_i * scale}]" - else - emit "mov ecx, #{offset_ref}\n" - "[ecx * #{scale}]" - end - elsif integer? base_ref - if integer? offset_ref - "[#{base_ref.to_i + (offset_ref.to_i * scale)}]" - else - emit "mov ecx, #{offset_ref}\n" - "[#{base_ref} + ecx * #{scale}]" - end - elsif integer? offset_ref - emit "mov ebx, #{base_ref}\n" - "[ebx + #{offset_ref.to_i * scale}]" - else - emit "mov ebx, #{base_ref}\n" - emit "mov ecx, #{offset_ref}\n" - "[ebx + ecx * #{scale}]" - end - end - - # symbol -> value of symbol - # @symbol -> value at address in symbol - # number -> number - def load_value value, free_register - if value_reference? value - symbol_value value_symbol(value) - elsif at_reference? value - symbol = value_symbol value - if global? symbol - "[" + symbol + "]" - else - emit "mov #{free_register}, #{symbol_value symbol}\n" - "[" + free_register + "]" - end - elsif integer? value - value - else - raise "Invalid value: #{value.inspect}" - end - end - - # Tests if an operand is a memory operand - def memory_operand? operand - operand[0] == ?[ - end - - # Divide x by y and store the remainder in target - def mod target, x, y - eval_div x, y - target_ref = load_value target, "ebx" - emit "mov #{target_ref}, edx\n" - end - - # Divide target by x and store the remainder in target - def mod2 target, x - eval_div target, x - target_ref = load_value target, "ebx" - emit "mov #{target_ref}, edx\n" - end - - # Multiply x by y and store the result in target - def mul target, x, y - # Assumes that eval_mul does not clobber ecx - target_ref = load_value target, "ecx" - eval_mul 'eax', x, y - emit "mov #{target_ref}, eax\n" - end - - # Multiply target by x, storing the result in target - def mul2 target, x - target_ref = load_value target, "ebx" - x_ref = load_value x, "edx" - emit "mov eax, #{target_ref}\n" - if immediate_operand?(x_ref) - emit "imul eax, #{x_ref}\n" - else - emit "imul dword #{x_ref}\n" - end - emit "mov #{target_ref}, eax\n" - end - - def public_label label - emit "global #{label}\n#{label}:\n" - end - - def push value - #emit "; push #{value}\n" - value_ref = load_value value, "ebx" - emit "push dword #{value_ref}\n" - end - - def ret *words - emit "; return #{words.join ' '}\n" - eval_expr words - emit "mov esp, ebp\npop ebp\nret\n" - end - - # Evaluate the expr in words and store the result in target - def set target, *words - if integer? target - raise "Cannot change value of integer #{target}" - elsif value_reference?(target) && global?(target) - raise "Cannot change value of global #{target}" - end - - emit "; set #{target} #{words.join ' '}\n" - if words.length == 1 - if words[0] == target - emit "; nothing to do; destination equals source\n" - else - target_ref = load_value target, "ebx" - if integer?(words[0]) - if words[0].to_i == 0 - # Set destination to 0 - emit "xor eax, eax\nmov #{target_ref}, eax\n" - else - # Load immediate - emit "mov dword #{target_ref}, #{words[0]}\n" - end - else - # Copy source to destination - eval_expr words - emit "mov dword #{target_ref}, eax\n" - end - end - else - op = words[0] - - if words.length == 3 && binop?(op) - # Binary operation - binop op, target, words[1], words[2] - else - # Not a binary operation - eval_expr words - target_ref = load_value target, "ebx" - emit "mov dword #{target_ref}, eax\n" - end - end - end - - def set_byte base, offset, value - emit "; set-byte #{base} #{offset} #{value}\n" - value_ref = load_value value, "eax" - addr_ref = load_address base, offset, 1 - emit "mov byte #{addr_ref}, #{value_ref}\n" - end - - def set_word base, offset, value - emit "; set-word #{base} #{offset} #{value}\n" - value_ref = load_value value, "eax" - addr_ref = load_address base, offset, WORDSIZE - emit "mov dword #{addr_ref}, #{value_ref}\n" - end - - def string str - code = '' - in_quote = false - str.each_byte do |b| - if b >= 32 && b < 128 - if in_quote - code << b.chr - else - code << "'" + b.chr - in_quote = true - end - else - if in_quote - code << "',#{b}" - in_quote = false - else - code << ",#{b}" - end - end - end - emit "db #{code}\n" - end - - def symbol_value symbol - x = @environment[symbol] - if x - case x[0] - when :arg - "[ebp + #{WORDSIZE * x[1] + (2 * WORDSIZE)}]" - when :local - "[ebp - #{WORDSIZE * x[1] + WORDSIZE}]" - else - raise "Invalid variable type: #{x[0]}" - end - else - # Assume global - symbol - end - end - - # Test if op is a symmetric operation (i.e. it will yield the - # same result if the order of its source operands is changed). - def symmetric_operation? op - [:add, :and, :mul, :or, :xor].member? op - end - - def tail_call fun, *args - emit "; tail-call #{fun} #{args.join ' '}\n" - if args.length > @environment.args - # Not enough space to do proper tail call; do normal call instead - emit "; not enough space for proper tail call; changed to regular call\n" - ret :call, fun, *args - else - # Any value in the current frame that is passed to the called - # function must be copied to a local variable if it would otherwise - # be overwritten before it is used - i = args.length - 1 - while i >= -1 - arg = (i >= 0) ? args[i] : fun - - if value_reference?(arg) || at_reference?(arg) - symbol = value_symbol arg - x = @environment[symbol] - if x && x[0] == :arg && x[1] < args.length && x[1] > i && - (i >= 0 || fun != args[x[1]]) - # Save value - newsym = @environment.gensym - let newsym, symbol - # Change reference - newref = at_reference?(arg) ? "@#{newsym}" : newsym - if i >= 0 - args[i] = newref - else - fun = newref - end - end - end - i = i - 1 - end - - # Set arguments - if args.length > 0 - (args.length - 1 .. 0).each do |i| - arg = args[i] - - value_ref = load_value arg, "eax" - newarg_ref = "[ebp + #{(i + 2) * WORDSIZE}]" - # Elide code if source is same as destination - unless value_ref == newarg_ref - emit "mov [ebp + #{(i + 2) * WORDSIZE}], #{value_ref}\n" - end - end - end - - # Tail call - emit "mov esp, ebp\npop ebp\n" - use_value "jmp", fun - end - end - - def use_value operation, value - value_ref = load_value value, "eax" - emit "#{operation} #{value_ref}\n" - end - - # tests if a value is a value reference - def value_reference? value - !integer?(value) && value.to_s =~ /^\.?(\w|-)+$/ - end - - # extracts the symbol name from a value - def value_symbol value - value.to_s[0] == ?@ ? value.to_s[1..-1].to_sym : value - end - - def word value - emit "dd #{value}\n" - end - - def write io - io.puts "bits 32\n\n" - @sections.each do |section,code| - unless code.empty? - case section - when :code - section_name = '.text' - when :data - section_name = '.data' - when :functions - section_name = '.text' - else - section_name = section.to_s - end - io.puts "section #{section_name}" - io.puts code - io.puts - end - end - end - - class Environment - @@gensym_counter = 0 - - attr_reader :args, :locals, :symbols - - def initialize parent = nil - ## Parent environment - @parent = parent - ## Symbol lookup table - @symbols = parent ? parent.symbols.dup : {} - ## Number of arguments - @args = parent ? parent.args : 0 - ## Number of local variables - @locals = parent ? parent.locals : 0 - end - - def add_arg symbol - @symbols[symbol] = [:arg, @args] - @args = @args + 1 - end - - def add_args symbols - symbols.each { |sym| add_arg sym } - end - - def add_local symbol - @symbols[symbol] = [:local, @locals] - @locals = @locals + 1 - end - - def add_locals symbols - symbols.each { |sym| add_local sym } - end - - def gensym - @@gensym_counter = @@gensym_counter + 1 - ".G#{@@gensym_counter}" - end - - def [] symbol - @symbols[symbol] - end - - def self.initial_environment - Environment.new - end - end - end - - # Register class - Voodoo::CodeGenerator.register_generator I386NasmGenerator, - :architecture => :i386, - :format => :nasm -end +require 'voodoo/generators/common_code_generator' + +module Voodoo + # = i386 NASM Code Generator + # + # The i386 NASM code generator generates i386 assembly code for use with + # the {Netwide Assembler}[http://www.nasm.us/]. + # + # == Call Frames + # + # Call frames have the following layout: + # + # argn + # : + # arg1 <-- ebp + 8 + # oldeip <-- ebp + 4 + # oldebp <-- ebp + # local1 <-- ebp - 4 + # local2 <-- ebp - 8 + # : + # localn <-- esp + # + class I386NasmGenerator < NasmGenerator + WORDSIZE = 4 + + def initialize params + # Number of bytes in a word + @WORDSIZE = 4 + # Word name in NASM lingo + @WORD_NAME = 'dword' + # Default alignment for code + @CODE_ALIGNMENT = 0 + # Default alignment for data + @DATA_ALIGNMENT = @WORDSIZE + # Default alignment for functions + @FUNCTION_ALIGNMENT = 16 + # Register used for return values + @RETURN_REG = 'eax' + # Register used as scratch register + @SCRATCH_REG = 'ebx' + # Accumulator index + @AX = 'eax' + # Base index + @BX = 'ebx' + # Count index + @CX = 'ecx' + # Data index + @DX = 'edx' + super params + end + + # Call a function + def call func, *args + emit "; call #{func} #{args.join ' '}\n" + revargs = args.reverse + revargs.each { |arg| push arg } + use_value "call", func + if args.length > 0 + emit "add esp, #{WORDSIZE * args.length}\n" + end + end + + # Emit function prologue. + def emit_function_prologue formals = [] + emit "push ebp\nmov ebp, esp\n" + end + + # Load the value of the nth argument + def load_arg n, reg = @SCRATCH_REG + "[ebp + #{n * @WORDSIZE + 8}]" + end + + # Load the value of the nth local variable + def load_local n, reg = @SCRATCH_REG + "[ebp - #{(n + 1) * @WORDSIZE}]" + end + + # Introduce a new local variable + def let symbol, *words + emit "; let #{symbol} #{words.join ' '}\n" + @environment.add_local symbol + eval_expr words + emit "push eax\n" + end + + # Push a word on the stack + def push value + #emit "; push #{value}\n" + value_ref = load_value value, "ebx" + emit "push dword #{value_ref}\n" + end + + # Call a function, re-using the current call fram if possible + def tail_call fun, *args + emit "; tail-call #{fun} #{args.join ' '}\n" + if args.length > @environment.args + # Not enough space to do proper tail call; do normal call instead + emit "; not enough space for proper tail call; changed to regular call\n" + ret :call, fun, *args + else + # Any value in the current frame that is passed to the called + # function must be copied to a local variable if it would otherwise + # be overwritten before it is used + i = args.length - 1 + while i >= -1 + arg = (i >= 0) ? args[i] : fun + + if symbol?(arg) + x = @environment[arg] + if x && x[0] == :arg && x[1] < args.length && x[1] > i && + (i >= 0 || fun != args[x[1]]) + # Save value + newsym = @environment.gensym + let newsym, arg + # Change reference + if i >= 0 + args[i] = newsym + else + fun = newsym + end + end + end + i = i - 1 + end + + # Set arguments + if args.length > 0 + (args.length - 1 .. 0).each do |i| + arg = args[i] + + value_ref = load_value arg, "eax" + newarg_ref = "[ebp + #{(i + 2) * WORDSIZE}]" + # Elide code if source is same as destination + unless value_ref == newarg_ref + emit "mov [ebp + #{(i + 2) * WORDSIZE}], #{value_ref}\n" + end + end + end + + # Tail call + emit "mov esp, ebp\npop ebp\n" + use_value "jmp", fun + end + end + + def use_value operation, value + value_ref = load_value value, "eax" + emit "#{operation} #{value_ref}\n" + end + + # Define a machine word with the given value + def word value + emit "dd #{value}\n" + end + + end + + # Register class + Voodoo::CodeGenerator.register_generator I386NasmGenerator, + :architecture => :i386, + :format => :nasm +end -- 2.11.4.GIT