add avm2 exception handler support to assembler
[swf2.git] / asm / asm.lisp
blobaf3b82bfe670a20402f0991526f7c3c3d69ec3cf
1 (in-package :avm2-asm)
4 ;;; for now just using keywords as opcode names...
5 ;;; * possibly would be better to use an equal or equal-p hash table and look
6 ;;; up opcodes by name instead of symbol?
7 ;;; * functions have some nice properties also:
8 ;;; + arglist hints from slime autodoc
9 ;;; + backtraces show specific opcode when we get an error in a opcode
10 ;;; + assemble is a bit shorter
11 ;;; - name clashes with CL functions make it messier though, so storing
12 ;;; opcodes in a hash...
13 (defparameter *opcodes* (make-hash-table))
14 (defparameter *disassemble-opcodes* (make-hash-table))
16 (defparameter +need-args+ #x01)
17 (defparameter +need-activation+ #x02)
18 (defparameter +need-rest+ #x04)
19 (defparameter +has-optional+ #x08)
20 (defparameter +set-dxns+ #x40)
21 (defparameter +has-param-names+ #x80)
23 (defclass method-body ()
24 ((method-id :initarg method :accessor method-id)
25 (max-stack :initarg max-stack :accessor max-stack)
26 (local-count :initarg local-count :accessor local-count)
27 (init-scope-depth :initarg init-scope-depth :accessor init-scope-depth)
28 (max-scope-depth :initarg max-scope-depth :accessor max-scope-depth)
29 (code :initarg code :accessor code)
30 ;; stored in an array since we need to keep them in order
31 (exceptions :initform (make-array 4 :adjustable t :fill-pointer 0) :accessor exceptions)
32 ;; mapping of exception target label names to indices in exceptions array
33 (exception-names :initform () :accessor exception-names)
34 (traits :initform nil :initarg traits :accessor traits)
35 ;; temporaries for tracking values during assembly
36 (current-stack :initform 0 :accessor current-stack)
37 (current-scope :initform 2 :initarg current-scope :accessor current-scope)
38 (flags :initform 0 :accessor flags)
39 (label :initform () :accessor label)
40 (fixups :initform () :accessor fixups)))
43 (defparameter *current-method* nil)
44 (defparameter *code-offset* 0)
46 (defun assemble (forms)
47 "simple assembler, returns sequence of octets containing the
48 bytecode corresponding to forms, interns stuff as needed, or
49 optionally uses constant pool indices (with no error checking
50 currently) when operand is a list of the form (:id ###). "
51 (let ((*code-offset* 0))
52 (loop for i in (peephole forms)
53 for opcode = (gethash (car i) *opcodes*)
54 for octets = (when opcode (apply opcode (cdr i)))
55 if opcode
56 append octets
57 ;;and do (format t "assemble ~s-> ~s ofs = ~s + ~s ~%"
58 ;; i octets *code-offset* (length octets))
59 and do (incf *code-offset* (length octets))
60 else do (error "invalid opcode ~s " i))))
62 (defun assemble-method-body (forms &key (init-scope 0)
63 (max-scope 1 max-scope-p)
64 (max-stack 1 max-stack-p))
65 (let ((*current-method* (make-instance 'method-body
66 'local-count 1
67 'max-stack 1
68 'init-scope-depth init-scope
69 'max-scope-depth init-scope
70 'current-scope init-scope)))
71 (setf (code *current-method*)
72 (assemble forms))
73 (when max-stack-p
74 (setf (max-stack *current-method*) max-stack))
75 (when max-scope-p
76 (setf (max-scope-depth *current-method*) (+ init-scope max-scope)))
77 (when (fixups *current-method*)
78 ;; fix any fixups
79 (loop for (label addr base) in (fixups *current-method*)
80 for dest = (cdr (assoc label (label *current-method*)))
81 when dest
82 do (replace (code *current-method*)
83 (u24-to-sequence (- dest base))
84 :start1 (+ 1 addr ))
85 else do (error "!!!!! unknown fixup ~s !!! ~%" label)))
86 ;; update exception table with addresses of labels
87 (flet ((ensure-label (name)
88 (or (cdr (assoc name (label *current-method*)))
89 (error "unknown label ~s in avm2 exception handler" name))))
90 (loop for i below (length (exceptions *current-method*))
91 for ex = (aref (exceptions *current-method*) i)
92 do (setf (from ex) (ensure-label (from ex))
93 (to ex) (ensure-label (to ex))
94 (target ex)(ensure-label (target ex)))))
95 *current-method*))
98 (defun u16-to-sequence (u16)
99 (list
100 (ldb (byte 8 0) u16)
101 (ldb (byte 8 8) u16)))
103 (defun u24-to-sequence (u24)
104 (list
105 (ldb (byte 8 0) u24)
106 (ldb (byte 8 8) u24)
107 (ldb (byte 8 16) u24)))
109 (defun double-to-sequence (double)
110 (loop with d = (ieee-floats::encode-float64 double)
111 for i from 0 below 64 by 8
112 collect (ldb (byte 8 i) d)))
115 (defun counted-s24-to-sequence (seq)
116 (append
117 (variable-length-encode (length seq))
118 (mapcan 'u24-to-sequence seq)))
120 (defun count+1-s24-to-sequence (seq)
121 (append
122 (variable-length-encode (1- (length seq)))
123 (mapcan 'u24-to-sequence seq)))
125 (defun variable-length-encode (integer)
126 (loop
127 for i = integer then i2
128 for i2 = (ash i -7)
129 for b = (ldb (byte 7 0) i)
130 for done = (or (= i2 0) (= i2 -1))
131 when (not done)
132 do (setf b (logior #x80 b))
133 collect b
134 until done))
136 ;;; fixme: these should probably avoid repeated elt calls if seq is a list
137 (defun decode-u16 (sequence &key (start 0))
138 (values
139 (logior (elt sequence start)
140 (ash (elt sequence (1+ start)) 8))
141 (+ 2 start)))
143 (defun decode-u24 (sequence &key (start 0))
144 (values
145 (logior (elt sequence start)
146 (ash (elt sequence (+ 1 start)) 8)
147 (ash (elt sequence (+ 2 start)) 16))
148 (+ 3 start)))
150 (defun decode-variable-length (sequence &key (start 0))
151 (loop with sum = 0
152 for i from start
153 for offset from 0 by 7
154 for j = (elt sequence i)
155 do (setf (ldb (byte 7 offset) sum) (ldb (byte 7 0) j))
156 while (logbitp 7 j)
157 finally (return (values sum (1+ i)))))
159 (defun decode-counted-s24 (sequence &key (start 0))
160 (multiple-value-bind (count start)
161 (decode-variable-length sequence :start start)
162 (values
163 (loop repeat (1+ count)
164 with value
165 do (setf (values value start) (decode-u24 sequence :start start))
166 collect value)
167 start)))
169 ;;; new types for automatic interning
170 ;;; (many of these probably just map to the same qname code, but
171 ;;; separating just in case)
172 ;; string-u30 int-u30 uint-u30 double-u30 namespace-q30 multiname-q30 class-u30
173 ;; fix runtime-name-count? or just set arg to index after interning
174 ;; and before calling arg count stuff?
176 ;;; todo: figure out if these need handled:
177 ;;; slot-index for :get-slot/:set-slot/etc
179 ;(decode-u16 (u16-to-sequence 12345))
180 ;(decode-u24 (u24-to-sequence 12345))
181 ;(decode-u24 (u24-to-sequence 123456))
182 ;(decode-variable-length (variable-length-encode 1))
183 ;(decode-variable-length (variable-length-encode 127))
184 ;(decode-variable-length (variable-length-encode 128))
185 ;(decode-variable-length (variable-length-encode 256))
186 ;(decode-variable-length (variable-length-encode 12345))
187 ;(decode-variable-length (variable-length-encode 123456789))
188 ;(decode-counted-s24 (counted-s24-to-sequence '(1 2 3 4 5)))
189 ;(decode-counted-s24 (counted-s24-to-sequence '(12345 2 345678 4 5)))
190 (decode-variable-length '(#b10000010 #b1)) ; 130
191 (decode-variable-length '(#b1)) ; 1
192 (decode-variable-length '(#b10010110 #b11))
194 (defun avm2-disassemble (sequence &key (start 0))
195 (loop
196 for length = (length sequence)
197 with op = nil
198 for byte = (elt sequence start)
199 for dis = (gethash byte *disassemble-opcodes*)
200 ;;do (format t "op=~s byte=~s start=~s cur-seq=~{ ~2,'0x~}~% dis=~s ~%"
201 ;; op byte start (coerce
202 ;; (subseq sequence start (min length
203 ;; (+ start 8))) 'list) dis)
204 ;; (finish-output)
205 do (incf start)
206 when dis
207 do (setf (values op start) (funcall dis sequence :start start))
208 ;;and do (format t "op -> ~s start -> ~s~%" op start)
209 and collect op
210 else do (error "invalid byte ~s at ~d " byte start)
211 while (< start length)))
214 ;;; these don't actually work in general, since they don't take
215 ;;; branching into account, but simplifies things for now...
216 (defun adjust-stack (pop push)
217 (when *current-method*
218 (decf (current-stack *current-method*) pop)
219 ;;(when (< (current-stack *current-method*) 0)
220 ;; (error "assembler error : stack underflow !"))
221 ;; be conservative, probably should warn once compiler is smarter...
222 (when (< (current-stack *current-method*) 0)
223 (setf (current-stack *current-method*) 0))
224 (incf (current-stack *current-method*) push)
225 (when (> (current-stack *current-method*)
226 (max-stack *current-method*))
227 (setf (max-stack *current-method*)
228 (current-stack *current-method*)))))
230 (defun adjust-scope (pop push)
231 (when *current-method*
232 (decf (current-scope *current-method*) pop)
233 ;;(when (< (current-scope *current-method*) 0)
234 ;; (error "assembler error : scope underflow !"))
235 (incf (current-scope *current-method*) push)
236 (when (> (current-scope *current-method*)
237 (max-scope-depth *current-method*))
238 (setf (max-scope-depth *current-method*)
239 (current-scope *current-method*)))))
241 (macrolet
242 ((make-interner (intern-name lookup-name interner pool)
243 `(progn
244 (defun ,intern-name (value)
245 (if (typep value '(cons (eql :id)))
246 (second value)
247 (,interner value)))
248 (defun ,lookup-name (value)
249 (if *assembler-context*
250 (aref (,pool *assembler-context*) value)
251 (list :id value))))))
253 (make-interner asm-intern-string lookup-string avm2-string strings)
254 ;; fixme: avm2-intern-* can break if first thing interned is wrong type
255 (make-interner asm-intern-int lookup-int avm2-intern-int ints)
256 (make-interner asm-intern-uint lookup-uint avm2-intern-uint uints)
257 (make-interner asm-intern-double lookup-double avm2-intern-double doubles)
258 (make-interner asm-intern-namespace lookup-namespace avm2-ns-intern namespaces)
259 (make-interner asm-intern-method lookup-method intern-method-id method-infos))
260 ;; (asm-intern-string "foo")
261 ;; (asm-intern-string '(:id 2))
262 ;; (asm-intern-string :id)
263 ;; (asm-intern-int 1232)
264 ;; (asm-intern-int '(:id 3))
265 ;; x(asm-intern-int :id) ;; should fail even if no ints interned yet, but doesn't
268 (defun symbol-to-qname-list (name &key init-cap)
269 ;; just a quick hack for now, doesn't actually try to determine if
270 ;; there is a valid property or not...
271 (let ((package (symbol-package name))
272 (sym (coerce
273 (loop
274 for prev = (if init-cap #\- #\Space) then c
275 for c across (symbol-name name)
276 when (or (not (alpha-char-p prev)) (char/= c #\-))
277 collect (if (char= prev #\-)
278 (char-upcase c)
279 (char-downcase c)))
280 'string)))
281 (if (eql package (find-package :keyword))
282 (setf package "")
283 (setf package (string-downcase (or (package-name package) ""))))
284 (values (list :qname package sym) sym)))
286 ;; fixme: not sure we want this anymore, instead store a symbol->qname
287 ;; hash in compiler-context, and use that for lookups?
288 ;;; --- still used by defun stuff, so keeping for now... not calling automatically any more though, need to actually have a valid *symbol-table*
289 (defun symbol-to-qname-old (name &key init-cap)
290 ;; just a quick hack for now, doesn't actually try to determine if
291 ;; there is a valid property or not...
292 (let ((package (symbol-package name))
293 (sym (coerce
294 (loop
295 for prev = (if init-cap #\- #\Space) then c
296 for c across (symbol-name name)
297 when (or (not (alpha-char-p prev)) (char/= c #\-))
298 collect (if (char= prev #\-)
299 (char-upcase c)
300 (char-downcase c)))
301 'string)))
302 (if (eql package (find-package :keyword))
303 (setf package "")
304 (setf package (string-downcase (or (package-name package) ""))))
305 (values (avm2-asm::qname package sym) sym)))
307 (defun asm-intern-multiname (mn)
308 (typecase mn
309 ((integer 0 0) 0) ;; shortcut for (:id 0)
310 ((cons (eql :qname)) (apply 'qname (cdr mn)))
311 ((cons (eql :multiname-l)) (apply 'intern-multiname-l +multiname-l+ (cdr mn)))
312 ;; todo: add other types of multinames
313 ((cons (eql :id)) (second mn))
314 (symbol (apply 'qname (cdr (symbol-to-qname-list mn)))) ;; not sure if this is good or not, needed for calling as-yet undefined functions though...
315 (t (parsed-qname mn))))
316 ;; (asm-intern-multiname '(:qname "foo" "bar"))
317 ;; (asm-intern-multiname '(:id 321))
318 ;; (asm-intern-multiname "foo:bax")
319 ;; (asm-intern-multiname '(:qname "foo" "bax"))
320 ;; (asm-intern-multiname '(:qname "foo" "bax"))
321 ;; x(asm-intern-multiname 'cos) ;; not sure if we should support symbols or not
322 ;;(intern-multiname +multiname-l+ "" "") (elt (multinames *assembler-context*) 1)
325 (defparameter *multiname-kinds* (make-hash-table))
326 (setf (gethash +qname+ *multiname-kinds*) :qname)
327 (setf (gethash +qname-a+ *multiname-kinds*) :qname-a)
328 (setf (gethash +rt-qname+ *multiname-kinds*) :rt-qname)
329 (setf (gethash +rt-qname-a+ *multiname-kinds*) :rt-qname-a)
330 (setf (gethash +rt-qname-l+ *multiname-kinds*) :rt-qname-l)
331 (setf (gethash +rt-qname-la+ *multiname-kinds*) :rt-qname-la)
332 (setf (gethash +multiname+ *multiname-kinds*) :multiname)
333 (setf (gethash +multiname-a+ *multiname-kinds*) :multiname-a)
334 (setf (gethash +multiname-l+ *multiname-kinds*) :multiname-l)
335 (setf (gethash +multiname-la+ *multiname-kinds*) :multiname-la)
337 (defun lookup-multiname (id)
338 (if (boundp '*assembler-context*)
339 (destructuring-bind (kind ns name)
340 (elt (multinames *assembler-context*) id)
341 (list (gethash kind *multiname-kinds* kind)
342 (elt (strings *assembler-context*)
343 (second (elt (namespaces *assembler-context*) ns)))
344 (elt (strings *assembler-context*) name)))
345 (list :id id)))
347 (defun label-to-offset (name op)
348 (let ((dest (gensym "DEST-"))
349 (here (gensym "HERE-"))
350 (ofs (if (eq op :lookup-switch) 0 4)))
351 `(when (symbolp ,name)
352 (let ((,dest (cdr (assoc ,name (label *current-method*))))
353 (,here *code-offset*))
354 (unless ,dest
355 (push (list ,name ,here (+ ,here ,ofs)) (fixups *current-method*))
356 (setf ,dest (+ 4 ,here)))
357 (setf ,name (- ,dest ,here ,ofs))))))
359 (defun labels-to-offsets (name)
360 (let ((dest (gensym "DEST-"))
361 (here (gensym "HERE-"))
362 (i (gensym "I-"))
363 (j (gensym "J-")))
364 `(setf ,name
365 (loop with ,here = *code-offset*
366 for ,i in ,name
367 for ,j from 4 by 4
368 when (symbolp ,i)
369 collect
370 (let ((,dest (cdr (assoc ,i (label *current-method*)))))
371 (unless ,dest
372 (push (list ,i (+ ,here ,j) ,here)
373 (fixups *current-method*))
374 (setf ,dest ,here))
375 (- ,dest ,here 0))
376 else collect ,i
377 ))))
379 (defun asm-intern-exception (exception)
380 (cond
381 ;; allow (:id ##) to specify index directly
382 ((and (consp exception) (eq (first exception) :id))
383 (second exception))
384 ;; look up by name
385 ((cdr (assoc exception (exception-names *current-method*))))
386 ;;TODO: should we handle calling :new-catch before the
387 ;; target label has been seen in the asm?
388 (t (error "unknown exception block name ~s" exception))))
390 (defmacro define-ops (&body ops)
391 (let ((coders
392 ;; type tag , encoder , optional interner
393 `((u8 list)
394 (u16 u16-to-sequence)
395 (u24 u24-to-sequence)
396 (s24 u24-to-sequence)
397 (ofs24 u24-to-sequence) ;; for using labels directly in branches
398 (u30 variable-length-encode)
399 (q30 variable-length-encode) ;; hack for name interning
400 (u32 variable-length-encode)
401 (s32 variable-length-encode)
402 (double double-to-sequence)
403 (counted-s24 counted-s24-to-sequence)
404 (counted-ofs24 count+1-s24-to-sequence)
406 (string-u30 variable-length-encode asm-intern-string)
407 (int-u30 variable-length-encode asm-intern-int)
408 (uint-u30 variable-length-encode asm-intern-uint)
409 (double-u30 variable-length-encode asm-intern-double)
410 (namespace-q30 variable-length-encode asm-intern-namespace)
411 (multiname-q30 variable-length-encode asm-intern-multiname)
412 (class-u30 variable-length-encode asm-intern-class)
413 (method-u30 variable-length-encode asm-intern-method)
414 (exception-u30 variable-length-encode asm-intern-exception)
416 (decoders
417 ;; type tag, decoder, optional constant pool lookup function
418 `((u8 (lambda (s &key (start 0)) (elt s start)))
419 (u16 decode-u16)
420 (u24 decode-u24)
421 (s24 decode-u24)
422 (ofs24 decode-u24) ;; for using labels directly in branches
423 (u30 decode-variable-length)
424 (q30 decode-variable-length) ;; hack for name interning
425 (u32 decode-variable-length)
426 (s32 decode-variable-length)
427 (double (lambda (s) (error "not done")))
428 (counted-s24 decode-counted-s24)
429 (counted-ofs24 decode-counted-s24) ;; array of ofs24 in lookupswitch
431 (string-u30 decode-variable-length lookup-string)
432 (int-u30 decode-variable-length lookup-int)
433 (uint-u30 decode-variable-length lookup-uint)
434 (double-u30 decode-variable-length lookup-double)
435 (namespace-q30 decode-variable-length lookup-namespace)
436 (multiname-q30 decode-variable-length lookup-multiname)
437 (class-u30 decode-variable-length lookup-class)
438 (method-u30 decode-variable-length lookup-method)
439 (exception-u30 decode-variable-length) ;; todo: add lookup
441 (flet ((defop (name args opcode
442 &optional (pop 0) (push 0) (pop-scope 0) (push-scope 0) (local 0) (flag 0))
443 `(setf (gethash ',name *opcodes*)
444 (flet ((,name (,@(mapcar 'car args))
445 ,@(when args `((declare (ignorable ,@(mapcar 'car args)))))
446 ,@(loop with op-name = name
447 for (name type) in args
448 for interner = (third (assoc type coders))
449 when interner
450 collect `(setf ,name (,interner ,name))
451 when (eq 'ofs24 type)
452 collect (label-to-offset name op-name)
453 when (eq 'counted-ofs24 type)
454 collect (labels-to-offsets name))
455 ,@(unless (and (numberp pop) (numberp push) (= 0 pop push))
456 `((adjust-stack ,pop ,push)))
457 ,@(unless (and (numberp pop-scope) (numberp push-scope)
458 (= 0 pop-scope push-scope))
459 `((adjust-scope ,pop-scope ,push-scope)))
460 ,@(unless (and (numberp local) (zerop local))
461 `((when (and *current-method*
462 (> ,local (local-count *current-method*)))
463 (setf (local-count *current-method*) ,local))))
464 ,@(unless (and (numberp flag) (zerop flag))
465 `((when *current-method*
466 (setf (flags *current-method*)
467 (logior ,local (flags *current-method*))))))
468 ,(if (null args)
469 `(list ,opcode)
470 `(append
471 (list ,opcode)
472 ,@(loop
473 for (name type) in args
474 for encoder = (second (assoc type coders))
475 when encoder
476 collect `(,encoder ,name))))))
477 #',name)))
478 ;; fixme: gensyms
479 (defop-disasm (name args opcode &rest ignore)
480 (declare (ignore ignore))
481 `(setf (gethash ,opcode *disassemble-opcodes*)
482 (flet ((,name (sequence &key (start 0))
483 (declare (ignorable sequence start))
484 (values
485 ,(if (null args)
486 `(list ',name)
487 `(let (junk)
488 (list ',name
489 ,@(loop for (name type) in args
490 for (nil decoder lookup) = (assoc type decoders)
491 collect`(progn
492 (setf (values junk start)
493 (,decoder sequence :start start))
494 ,@(when lookup
495 `((,lookup junk))))))))
496 start)))
497 #',name
498 ))))
499 `(progn
500 ,@(loop for op in ops
501 collect (apply #'defop op)
502 collect (apply #'defop-disasm op))))))
505 (defmacro define-asm-macro (name (&rest args) &body body)
506 `(setf (gethash ',name *opcodes*)
507 (lambda (,@args)
508 ,@(if (stringp (car body))
509 (cdr body) ;; drop docstring ;TODO: store docstring somewhere?
510 body))))
512 ;;; not sure if these should be handled like this or not...
513 (define-asm-macro :%label (name)
514 (push (cons name *code-offset*) (label *current-method*))
515 (assemble `((:label))))
518 (define-asm-macro :%dlabel (name)
519 ;; !!!! if this gets moved somewhere before the peephole optimizer, make
520 ;; !!!! sure it leaves a nop of some sort in the instruction stream so we
521 ;; !!!! don't combine stuff on either side of a jump target
522 "for forward jumps, exception ranges, etc. that don't need an actual
523 jump instruction in the bytecode"
524 (push (cons name *code-offset*) (label *current-method*))
525 nil)
527 (define-asm-macro :%exception (name start end &optional (type-name 0) (var-name 0))
528 ;; !!!! if this gets moved somewhere before the peephole optimizer, make
529 ;; !!!! sure it leaves a nop of some sort in the instruction stream so we
530 ;; !!!! don't combine stuff on either side of a jump target
532 "create an exception handler block named NAME, active between the
533 labels START and END, catching objects of type TYPE-NAME (default to *)
534 using VAR-NAME as name for :new-catch slot (default to no name)"
535 (push (cons name *code-offset*) (label *current-method*))
536 ;; vm pushes thrown object onto stack, so adjust stack depth
537 (adjust-stack 0 1)
538 ;; save the exception data
539 (let ((index (length (exceptions *current-method*))))
540 (vector-push-extend
541 (make-instance 'exception-info
542 'from start
543 'to end
544 'target name
545 'exc-type (asm-intern-multiname type-name)
546 'var-name (asm-intern-multiname var-name))
547 (exceptions *current-method*)
548 (length (exceptions *current-method*)))
549 (push (cons name index)
550 (exception-names *current-method*)))
551 nil)
554 (defmacro with-assembler-context (&body body)
555 `(let ((*assembler-context* (make-instance 'assembler-context)))
556 ,@body))
558 ;;; not sure if this should be asm level or not...
559 (define-asm-macro :%array-read (index)
560 (assemble `((:push-int ,index)
561 (:get-property (:multiname-l "" "")))))