Lookahead improvements: EOF handling and better ambiguity detection.
[gazelle.git] / compiler / bc.lua
blobc3a81777d20b2567a47d8faebd8029264d3400aa
1 --[[--------------------------------------------------------------------
3 Gazelle: a system for building fast, reusable parsers
5 bc.lua
7 A quick and dirty module for writing files in Bitcode format.
9 Copyright (c) 2007 Joshua Haberman. See LICENSE for details.
11 --------------------------------------------------------------------]]--
13 module("bc", package.seeall)
15 END_BLOCK = 0
16 ENTER_SUBBLOCK = 1
17 DEFINE_ABBREV = 2
18 UNABBREV_RECORD = 3
20 ENCODING_FIXED = 1
21 ENCODING_VBR = 2
22 ENCODING_ARRAY = 3
23 ENCODING_CHAR6 = 4
25 BLOCKINFO = 0
27 SETBID = 1
29 LiteralOp = {name="LiteralOp"}
30 function LiteralOp:new(value)
31 local obj = newobject(self)
32 obj.value = value
33 return obj
34 end
36 VBROp = {name="VBROp"}
37 function VBROp:new(bits)
38 local obj = newobject(self)
39 obj.bits = bits
40 obj.name = name
41 return obj
42 end
44 FixedOp = {name="FixedOp"}
45 function FixedOp:new(bits)
46 local obj = newobject(self)
47 obj.bits = bits
48 obj.name = name
49 return obj
50 end
52 ArrayOp = {name="ArrayOp"}
53 function ArrayOp:new(elem_type)
54 local obj = newobject(self)
55 obj.elem_type = elem_type
56 obj.name = name
57 return obj
58 end
60 File = {name="File"}
61 function File:new(filename, app_magic_number)
62 local obj = newobject(self)
63 obj.name = name
64 obj.file = io.open(filename, "w")
65 obj.current_byte = 0
66 obj.current_bits = 0
67 obj.file:write("BC")
68 obj.file:write(app_magic_number)
69 obj.current_abbrev_width = 2
70 obj.offset = 0
71 obj.stack = {}
72 return obj
73 end
75 -- Witness the joy of trying to do bitwise manipulation in a language that
76 -- has no bitwise operators.
77 function File:write_fixed(val, bits)
78 -- print(string.format("Write fixed: %d, %d", val, bits))
79 -- print(string.format("Existing bytes(%d): %d", self.current_bits, self.current_byte))
80 while bits > 0 do
81 local bits_this_byte = math.min(8-self.current_bits, bits)
82 local low_bits = val % (2 ^ bits_this_byte)
83 self.current_byte = self.current_byte + (low_bits * (2 ^ self.current_bits))
84 self.current_bits = self.current_bits + bits_this_byte
85 if self.current_bits == 8 then
86 -- print(string.format("Flushing byte %d", self.current_byte))
87 self.file:write(string.char(self.current_byte))
88 self.current_byte = 0
89 self.current_bits = 0
90 self.offset = self.offset + 1
91 end
92 bits = bits - bits_this_byte
93 val = math.floor(val / (2 ^ bits_this_byte))
94 end
95 end
97 function File:align_32_bits()
98 if self.current_bits > 0 then
99 self:write_fixed(0, 8-self.current_bits)
102 if self.offset % 4 > 0 then
103 self:write_fixed(0, (4 - (self.offset % 4)) * 8)
107 function File:write_vbr(val, bits)
108 -- print(string.format("Write VBR: %d, %d", val, bits))
109 local high_bit = 2 ^ (bits-1)
110 local bits_remaining
112 if val == 0 then
113 bits_remaining = 1
114 else
115 bits_remaining = 1 + math.floor(math.log(val) / math.log(2))
118 while true do
119 if bits_remaining > (bits-1) then
120 self:write_fixed(high_bit + (val % high_bit), bits)
121 bits_remaining = bits_remaining - (bits-1)
122 val = math.floor(val / high_bit)
123 else
124 self:write_fixed(val, bits)
125 break
130 function File:enter_subblock(block_id)
131 -- print(string.format("++ Enter subblock: %d", block_id))
132 local old_abbrev_width = self.current_abbrev_width
133 self:write_fixed(ENTER_SUBBLOCK, self.current_abbrev_width)
134 self:write_vbr(block_id, 8)
135 self:write_vbr(4, 4) -- no need to make this configurable at the moment
136 self.current_abbrev_width = 4
137 self:align_32_bits()
139 self:write_fixed(0, 32) -- we'll fill this in later
140 table.insert(self.stack, {old_abbrev_width, self.file:seek()})
143 function File:end_subblock(block_id)
144 -- print(string.format("-- End subblock: %d", block_id))
145 self:write_fixed(END_BLOCK, self.current_abbrev_width)
146 self:align_32_bits()
147 local block_offset
148 self.current_abbrev_width, block_offset = unpack(table.remove(self.stack))
149 local current_offset = self.file:seek()
150 self.file:seek("set", block_offset - 4)
151 self:write_fixed((current_offset - block_offset) / 4, 32)
152 self.file:seek("set", current_offset)
155 function File:write_unabbreviated_record(id, ...)
156 -- print(string.format("Write unabbreviated record: %d", id))
157 local args = {...}
158 self:write_fixed(UNABBREV_RECORD, self.current_abbrev_width)
159 self:write_vbr(id, 6)
160 self:write_vbr(#args, 6)
161 for arg in each(args) do
162 self:write_vbr(arg, 6)
166 function File:write_abbreviated_val(val, op)
167 if op.class == VBROp then
168 self:write_vbr(val, op.bits)
169 elseif op.class == FixedOp then
170 self:write_fixed(val, op.bits)
171 else
172 error("Unknown op type!")
176 function File:write_abbreviated_record(abbreviation, ...)
177 local args = {...}
178 -- print("Write abbreviated record:" .. serialize({...}))
179 -- ", #args: " .. #args .. ", expected #args: " .. #abbreviation.ops)
180 if #args ~= #abbreviation.ops then
181 error("Wrong number of arguments for abbreviated record")
183 self:write_fixed(abbreviation.id, self.current_abbrev_width)
184 for i, arg in ipairs(args) do
185 local op = abbreviation.ops[i]
186 if op.class == ArrayOp then
187 if type(arg) == "string" then
188 self:write_vbr(arg:len(), 6)
189 for int in each({arg:byte(1, arg:len())}) do
190 self:write_abbreviated_val(int, op.elem_type)
192 else
193 self:write_vbr(#arg, 6)
194 for int in each(arg) do
195 self:write_abbreviated_val(int, op.elem_type)
198 else
199 self:write_abbreviated_val(arg, op)
204 function File:write_abbrev_op(arg)
205 if arg.class == LiteralOp then
206 self:write_fixed(1, 1)
207 self:write_vbr(arg.value, 8)
208 elseif arg.class == VBROp then
209 self:write_fixed(0, 1)
210 self:write_fixed(ENCODING_VBR, 3)
211 self:write_vbr(arg.bits, 5)
212 elseif arg.class == FixedOp then
213 self:write_fixed(0, 1)
214 self:write_fixed(ENCODING_FIXED, 3)
215 self:write_vbr(arg.bits, 5)
216 else
217 error("Unknown/unhandled op type")
221 function File:define_abbreviation(abbrev_id, ...)
222 local abbrev = {id=abbrev_id, ops={}}
223 local args = {...}
224 self:write_fixed(DEFINE_ABBREV, self.current_abbrev_width)
225 if args[#args].class == ArrayOp then
226 self:write_vbr(#args+1, 5)
227 else
228 self:write_vbr(#args, 5)
231 for arg in each(args) do
232 if arg.class ~= LiteralOp then
233 table.insert(abbrev.ops, arg)
236 if arg.class == ArrayOp then
237 self:write_fixed(0, 1)
238 self:write_fixed(ENCODING_ARRAY, 3)
239 self:write_abbrev_op(arg.elem_type)
240 else
241 self:write_abbrev_op(arg)
244 return abbrev
247 -- vim:et:sts=2:sw=2