get rid of extra special forms
[swf2.git] / asm / asm.lisp
blobd0e39de486250b076ec8ee4ff0897f64c8d99684
1 (in-package :avm2-asm)
4 ;;; for now just using keywords as opcode names...
5 ;;; * possibly would be better to use an equal or equal-p hash table and look
6 ;;; up opcodes by name instead of symbol?
7 ;;; * functions have some nice properties also:
8 ;;; + arglist hints from slime autodoc
9 ;;; + backtraces show specific opcode when we get an error in a opcode
10 ;;; + assemble is a bit shorter
11 ;;; - name clashes with CL functions make it messier though, so storing
12 ;;; opcodes in a hash...
13 (defparameter *opcodes* (make-hash-table))
14 (defparameter *disassemble-opcodes* (make-hash-table))
16 (defparameter +need-args+ #x01)
17 (defparameter +need-activation+ #x02)
18 (defparameter +need-rest+ #x04)
19 (defparameter +has-optional+ #x08)
20 (defparameter +set-dxns+ #x40)
21 (defparameter +has-param-names+ #x80)
23 (defclass method-body ()
24 ((method-id :initarg method :accessor method-id)
25 (max-stack :initarg max-stack :accessor max-stack)
26 (local-count :initarg local-count :accessor local-count)
27 (init-scope-depth :initarg init-scope-depth :accessor init-scope-depth)
28 (max-scope-depth :initarg max-scope-depth :accessor max-scope-depth)
29 (code :initarg code :accessor code)
30 (exceptions :initform nil :initarg exceptions :accessor exceptions)
31 (traits :initform nil :initarg traits :accessor traits)
32 ;; temporaries for tracking values during assembly
33 (current-stack :initform 0 :accessor current-stack)
34 (current-scope :initform 2 :initarg current-scope :accessor current-scope)
35 (flags :initform 0 :accessor flags)
36 (label :initform () :accessor label)
37 (fixups :initform () :accessor fixups)))
40 (defparameter *current-method* nil)
41 (defparameter *code-offset* 0)
43 (defun assemble (forms)
44 "simple assembler, returns sequence of octets containing the
45 bytecode corresponding to forms, interns stuff as needed, or
46 optionally uses constant pool indices (with no error checking
47 currently) when operand is a list of the form (:id ###). "
48 (let ((*code-offset* 0))
49 (loop for i in (peephole forms)
50 for opcode = (gethash (car i) *opcodes*)
51 for octets = (when opcode (apply opcode (cdr i)))
52 if opcode
53 append octets
54 ;;and do (format t "assemble ~s-> ~s ofs = ~s + ~s ~%"
55 ;; i octets *code-offset* (length octets))
56 and do (incf *code-offset* (length octets))
57 else do (error "invalid opcode ~s " i))))
59 (defun assemble-method-body (forms &key (init-scope 0)
60 (max-scope 1 max-scope-p)
61 (max-stack 1 max-stack-p))
62 (let ((*current-method* (make-instance 'method-body
63 'local-count 1
64 'max-stack 1
65 'init-scope-depth init-scope
66 'max-scope-depth init-scope
67 'current-scope init-scope)))
68 (setf (code *current-method*)
69 (assemble forms))
70 (when max-stack-p
71 (setf (max-stack *current-method*) max-stack))
72 (when max-scope-p
73 (setf (max-scope-depth *current-method*) (+ init-scope max-scope)))
74 (when (fixups *current-method*)
75 ;; fix any fixups
76 (loop for (label . addr) in (fixups *current-method*)
77 for dest = (cdr (assoc label (label *current-method*)))
78 when dest
79 do (replace (code *current-method*)
80 (u24-to-sequence (- dest addr 4))
81 :start1 (+ 1 addr ))
82 ;;and do (format t "fixup ~s ~%" label)
83 else do (format t "!!!!! unknown fixup ~s !!! ~%" label)))
84 *current-method*))
87 (defun u16-to-sequence (u16)
88 (list
89 (ldb (byte 8 0) u16)
90 (ldb (byte 8 8) u16)))
92 (defun u24-to-sequence (u24)
93 (list
94 (ldb (byte 8 0) u24)
95 (ldb (byte 8 8) u24)
96 (ldb (byte 8 16) u24)))
98 (defun double-to-sequence (double)
99 (loop with d = (ieee-floats::encode-float64 double)
100 for i from 0 below 64 by 8
101 collect (ldb (byte 8 i) d)))
104 (defun counted-s24-to-sequence (seq)
105 (append
106 (variable-length-encode (length seq))
107 (mapcan 'u24-to-sequence seq)))
109 (defun count+1-s24-to-sequence (seq)
110 (append
111 (variable-length-encode (1- (length seq)))
112 (mapcan 'u24-to-sequence seq)))
114 (defun variable-length-encode (integer)
115 (loop
116 for i = integer then i2
117 for i2 = (ash i -7)
118 for b = (ldb (byte 7 0) i)
119 for done = (or (= i2 0) (= i2 -1))
120 when (not done)
121 do (setf b (logior #x80 b))
122 collect b
123 until done))
125 ;;; fixme: these should probably avoid repeated elt calls if seq is a list
126 (defun decode-u16 (sequence &key (start 0))
127 (values
128 (logior (elt sequence start)
129 (ash (elt sequence (1+ start)) 8))
130 (+ 2 start)))
132 (defun decode-u24 (sequence &key (start 0))
133 (values
134 (logior (elt sequence start)
135 (ash (elt sequence (+ 1 start)) 8)
136 (ash (elt sequence (+ 2 start)) 16))
137 (+ 3 start)))
139 (defun decode-variable-length (sequence &key (start 0))
140 (loop with sum = 0
141 for i from start
142 for offset from 0 by 7
143 for j = (elt sequence i)
144 ;;do (format t "sum = ~s, j=~s b=~s ofs=~s s2=~s~%"
145 ;; sum j (ldb (byte 7 0) j) offset
146 ;; (dpb (ldb (byte 7 0) j) (byte 7 offset) sum))
147 do (setf (ldb (byte 7 offset) sum) (ldb (byte 7 0) j))
148 while (logbitp 7 j)
149 finally (return (values sum (1+ i)))))
151 (defun decode-counted-s24 (sequence &key (start 0))
152 (multiple-value-bind (count start)
153 (decode-variable-length sequence :start start)
154 (values
155 (loop repeat (1+ count)
156 with value
157 do (setf (values value start) (decode-u24 sequence :start start))
158 collect value)
159 start)))
161 ;;; new types for automatic interning
162 ;;; (many of these probably just map to the same qname code, but
163 ;;; separating just in case)
164 ;; string-u30 int-u30 uint-u30 double-u30 namespace-q30 multiname-q30 class-u30
165 ;; fix runtime-name-count? or just set arg to index after interning
166 ;; and before calling arg count stuff?
168 ;;; todo: figure out if these need handled:
169 ;;; method-index arg for :new-function
170 ;;; slot-index for :get-slot/:set-slot/etc
171 ;;; exception-index for new-catch
173 ;(decode-u16 (u16-to-sequence 12345))
174 ;(decode-u24 (u24-to-sequence 12345))
175 ;(decode-u24 (u24-to-sequence 123456))
176 ;(decode-variable-length (variable-length-encode 1))
177 ;(decode-variable-length (variable-length-encode 127))
178 ;(decode-variable-length (variable-length-encode 128))
179 ;(decode-variable-length (variable-length-encode 256))
180 ;(decode-variable-length (variable-length-encode 12345))
181 ;(decode-variable-length (variable-length-encode 123456789))
182 ;(decode-counted-s24 (counted-s24-to-sequence '(1 2 3 4 5)))
183 ;(decode-counted-s24 (counted-s24-to-sequence '(12345 2 345678 4 5)))
184 (decode-variable-length '(#b10000010 #b1)) ; 130
185 (decode-variable-length '(#b1)) ; 1
186 (decode-variable-length '(#b10010110 #b11))
188 (defun avm2-disassemble (sequence &key (start 0))
189 (loop
190 for length = (length sequence)
191 with op = nil
192 for byte = (elt sequence start)
193 for dis = (gethash byte *disassemble-opcodes*)
194 do (format t "op=~s byte=~s start=~s cur-seq=~{ ~2,'0x~}~% dis=~s ~%"
195 op byte start (coerce
196 (subseq sequence start (min length
197 (+ start 8))) 'list) dis)
198 (finish-output)
199 do (incf start)
200 when dis
201 do (setf (values op start) (funcall dis sequence :start start))
202 and do (format t "op -> ~s start -> ~s~%" op start)
203 and collect op
204 else do (error "invalid byte ~s at ~d " byte start)
205 while (< start length)))
208 ;;; these don't actually work in general, since they don't take
209 ;;; branching into account, but simplifies things for now...
210 (defun adjust-stack (pop push)
211 (when *current-method*
212 (decf (current-stack *current-method*) pop)
213 ;;(when (< (current-stack *current-method*) 0)
214 ;; (error "assembler error : stack underflow !"))
215 (incf (current-stack *current-method*) push)
216 (when (> (current-stack *current-method*)
217 (max-stack *current-method*))
218 (setf (max-stack *current-method*)
219 (current-stack *current-method*)))))
221 (defun adjust-scope (pop push)
222 (when *current-method*
223 (decf (current-scope *current-method*) pop)
224 ;;(when (< (current-scope *current-method*) 0)
225 ;; (error "assembler error : scope underflow !"))
226 (incf (current-scope *current-method*) push)
227 (when (> (current-scope *current-method*)
228 (max-scope-depth *current-method*))
229 (setf (max-scope-depth *current-method*)
230 (current-scope *current-method*)))))
232 (macrolet
233 ((make-interner (intern-name lookup-name interner pool)
234 `(progn
235 (defun ,intern-name (value)
236 (if (typep value '(cons (eql :id)))
237 (second value)
238 (,interner value)))
239 (defun ,lookup-name (value)
240 (if *assembler-context*
241 (aref (,pool *assembler-context*) value)
242 (list :id value))))))
244 (make-interner asm-intern-string lookup-string avm2-string strings)
245 ;; fixme: avm2-intern-* can break if first thing interned is wrong type
246 (make-interner asm-intern-int lookup-int avm2-intern-int ints)
247 (make-interner asm-intern-uint lookup-uint avm2-intern-uint uints)
248 (make-interner asm-intern-double lookup-double avm2-intern-double doubles)
249 (make-interner asm-intern-namespace lookup-namespace avm2-ns-intern namespaces))
250 ;; (asm-intern-string "foo")
251 ;; (asm-intern-string '(:id 2))
252 ;; (asm-intern-string :id)
253 ;; (asm-intern-int 1232)
254 ;; (asm-intern-int '(:id 3))
255 ;; x(asm-intern-int :id) ;; should fail even if no ints interned yet, but doesn't
258 (defun symbol-to-qname-list (name &key init-cap)
259 ;; just a quick hack for now, doesn't actually try to determine if
260 ;; there is a valid property or not...
261 (let ((package (symbol-package name))
262 (sym (coerce
263 (loop
264 for prev = (if init-cap #\- #\Space) then c
265 for c across (symbol-name name)
266 when (or (not (alpha-char-p prev)) (char/= c #\-))
267 collect (if (char= prev #\-)
268 (char-upcase c)
269 (char-downcase c)))
270 'string)))
271 (if (eql package (find-package :keyword))
272 (setf package "")
273 (setf package (string-downcase (or (package-name package) ""))))
274 (values (list :qname package sym) sym)))
276 ;; fixme: not sure we want this anymore, instead store a symbol->qname
277 ;; hash in compiler-context, and use that for lookups?
278 ;;; --- still used by defun stuff, so keeping for now... not calling automatically any more though, need to actually have a valid *symbol-table*
279 (defun symbol-to-qname-old (name &key init-cap)
280 ;; just a quick hack for now, doesn't actually try to determine if
281 ;; there is a valid property or not...
282 (let ((package (symbol-package name))
283 (sym (coerce
284 (loop
285 for prev = (if init-cap #\- #\Space) then c
286 for c across (symbol-name name)
287 when (or (not (alpha-char-p prev)) (char/= c #\-))
288 collect (if (char= prev #\-)
289 (char-upcase c)
290 (char-downcase c)))
291 'string)))
292 (if (eql package (find-package :keyword))
293 (setf package "")
294 (setf package (string-downcase (or (package-name package) ""))))
295 (values (avm2-asm::qname package sym) sym)))
297 (defun asm-intern-multiname (mn)
298 (typecase mn
299 ((cons (eql :qname)) (apply 'qname (cdr mn)))
300 ((cons (eql :multiname-l)) (apply 'intern-multiname-l +multiname-l+ (cdr mn)))
301 ;; todo: add other types of multinames
302 ((cons (eql :id)) (second mn))
303 (symbol (apply 'qname (cdr (symbol-to-qname-list mn)))) ;; not sure if this is good or not, needed for calling as-yet undefined functions though...
304 (t (parsed-qname mn))))
305 ;; (asm-intern-multiname '(:qname "foo" "bar"))
306 ;; (asm-intern-multiname '(:id 321))
307 ;; (asm-intern-multiname "foo:bax")
308 ;; (asm-intern-multiname '(:qname "foo" "bax"))
309 ;; (asm-intern-multiname '(:qname "foo" "bax"))
310 ;; x(asm-intern-multiname 'cos) ;; not sure if we should support symbols or not
311 ;;(intern-multiname +multiname-l+ "" "") (elt (multinames *assembler-context*) 1)
314 (defparameter *multiname-kinds* (make-hash-table))
315 (setf (gethash +qname+ *multiname-kinds*) :qname)
316 (setf (gethash +qname-a+ *multiname-kinds*) :qname-a)
317 (setf (gethash +rt-qname+ *multiname-kinds*) :rt-qname)
318 (setf (gethash +rt-qname-a+ *multiname-kinds*) :rt-qname-a)
319 (setf (gethash +rt-qname-l+ *multiname-kinds*) :rt-qname-l)
320 (setf (gethash +rt-qname-la+ *multiname-kinds*) :rt-qname-la)
321 (setf (gethash +multiname+ *multiname-kinds*) :multiname)
322 (setf (gethash +multiname-a+ *multiname-kinds*) :multiname-a)
323 (setf (gethash +multiname-l+ *multiname-kinds*) :multiname-l)
324 (setf (gethash +multiname-la+ *multiname-kinds*) :multiname-la)
326 (defun lookup-multiname (id)
327 (if (boundp '*assembler-context*)
328 (destructuring-bind (kind ns name)
329 (elt (multinames *assembler-context*) id)
330 (list (gethash kind *multiname-kinds* kind)
331 (elt (strings *assembler-context*)
332 (second (elt (namespaces *assembler-context*) ns)))
333 (elt (strings *assembler-context*) name)))
334 (list :id id)))
336 (defun label-to-offset (name op)
337 (let ((dest (gensym "DEST-"))
338 (here (gensym "HERE-"))
339 (ofs (if (eq op :lookup-switch) 0 4)))
340 `(when (symbolp ,name)
341 (let ((,dest (cdr (assoc ,name (label *current-method*))))
342 (,here *code-offset*))
343 (unless ,dest
344 (push (cons ,name ,here) (fixups *current-method*))
345 (setf ,dest (+ 4 ,here)))
346 (setf ,name (- ,dest ,here ,ofs))))))
348 (defun labels-to-offsets (name)
349 (let ((dest (gensym "DEST-"))
350 (here (gensym "HERE-"))
351 (i (gensym "I-"))
352 (j (gensym "J-")))
353 `(setf ,name
354 (loop with ,here = *code-offset*
355 for ,i in ,name
356 for ,j from 4 by 4
357 when (symbolp ,i)
358 collect
359 (let ((,dest (cdr (assoc ,i (label *current-method*)))))
360 (unless ,dest
361 (push (cons ,i ,j) (fixups *current-method*))
362 (setf ,dest ,i))
363 (- ,dest ,here 0))
364 else collect ,i
365 ))))
367 (defmacro define-ops (&body ops)
368 (let ((coders
369 ;; type tag , encoder , optional interner
370 `((u8 list)
371 (u16 u16-to-sequence)
372 (u24 u24-to-sequence)
373 (s24 u24-to-sequence)
374 (ofs24 u24-to-sequence) ;; for using labels directly in branches
375 (u30 variable-length-encode)
376 (q30 variable-length-encode) ;; hack for name interning
377 (u32 variable-length-encode)
378 (s32 variable-length-encode)
379 (double double-to-sequence)
380 (counted-s24 counted-s24-to-sequence)
381 (counted-ofs24 count+1-s24-to-sequence)
383 (string-u30 variable-length-encode asm-intern-string)
384 (int-u30 variable-length-encode asm-intern-int)
385 (uint-u30 variable-length-encode asm-intern-uint)
386 (double-u30 variable-length-encode asm-intern-double)
387 (namespace-q30 variable-length-encode asm-intern-namespace)
388 (multiname-q30 variable-length-encode asm-intern-multiname)
389 (class-u30 variable-length-encode asm-intern-class)
391 (decoders
392 ;; type tag, decoder, optional constant pool lookup function
393 `((u8 (lambda (s &key (start 0)) (elt s start)))
394 (u16 decode-u16)
395 (u24 decode-u24)
396 (s24 decode-u24)
397 (ofs24 decode-u24) ;; for using labels directly in branches
398 (u30 decode-variable-length)
399 (q30 decode-variable-length) ;; hack for name interning
400 (u32 decode-variable-length)
401 (s32 decode-variable-length)
402 (double (lambda (s) (error "not done")))
403 (counted-s24 decode-counted-s24)
404 (counted-ofs24 decode-counted-s24) ;; array of ofs24 in lookupswitch
406 (string-u30 decode-variable-length lookup-string)
407 (int-u30 decode-variable-length lookup-int)
408 (uint-u30 decode-variable-length lookup-uint)
409 (double-u30 decode-variable-length lookup-double)
410 (namespace-q30 decode-variable-length lookup-namespace)
411 (multiname-q30 decode-variable-length lookup-multiname)
412 (class-u30 decode-variable-length lookup-class)
414 (flet ((defop (name args opcode
415 &optional (pop 0) (push 0) (pop-scope 0) (push-scope 0) (local 0) (flag 0))
416 `(setf (gethash ',name *opcodes*)
417 (lambda (,@(mapcar 'car args) ;;&aux (#:debug-name ',name)
419 ,@(when args `((declare (ignorable ,@(mapcar 'car args)))))
420 ;;(format t "assemble ~a ~%" ',name)
421 ,@(loop with op-name = name
422 for (name type) in args
423 for interner = (third (assoc type coders))
424 when interner
425 collect `(setf ,name (,interner ,name))
426 ;;when (eq 'q30 type)
427 ;;collect `(when (and (consp ,name)
428 ;; (eql 'qname (car ,name)))
429 ;; (setf ,name (apply 'qname (rest ,name))))
430 when (eq 'ofs24 type)
431 collect (label-to-offset name op-name)
432 when (eq 'counted-ofs24 type)
433 collect (labels-to-offsets name))
434 ,@(unless (and (numberp pop) (numberp push) (= 0 pop push))
435 `((adjust-stack ,pop ,push)))
436 ,@(unless (and (numberp pop-scope) (numberp push-scope)
437 (= 0 pop-scope push-scope))
438 `((adjust-scope ,pop-scope ,push-scope)))
439 ,@(unless (and (numberp local) (zerop local))
440 `((when (and *current-method*
441 (> ,local (local-count *current-method*)))
442 (setf (local-count *current-method*) ,local))))
443 ,@(unless (and (numberp flag) (zerop flag))
444 `((when *current-method*
445 (setf (flags *current-method*)
446 (logior ,local (flags *current-method*))))))
447 ,(if (null args)
448 `(list ,opcode)
449 `(append
450 (list ,opcode)
451 ,@(loop
452 for (name type) in args
453 for encoder = (second (assoc type coders))
454 when encoder
455 collect `(,encoder ,name)))))))
456 ;; fixme: gensyms
457 (defop-disasm (name args opcode &rest ignore)
458 (declare (ignore ignore))
459 `(setf (gethash ,opcode *disassemble-opcodes*)
460 (lambda (sequence &key (start 0);; &aux (#:debug-name ',name)
462 (declare (ignorable sequence start))
463 (values
464 ,(if (null args)
465 `(list ',name)
466 `(let (junk)
467 ;;(declare (ignore junk))
468 (list ',name
469 ,@(loop for (name type) in args
470 for (nil decoder lookup) = (assoc type decoders)
471 collect`(progn
472 (setf (values junk start)
473 (,decoder sequence :start start))
474 ,@(when lookup
475 `((,lookup junk))))))))
476 start)))))
477 `(progn
478 ,@(loop for op in ops
479 collect (apply #'defop op)
480 collect (apply #'defop-disasm op))))))
483 (defmacro define-asm-macro (name (&rest args) &body body)
484 `(setf (gethash ',name *opcodes*)
485 (lambda (,@args)
486 ,@body)))
488 ;;; not sure if these should be handled like this or not...
489 (define-asm-macro :%label (name)
490 (push (cons name *code-offset*) (label *current-method*))
491 (assemble `((:label))))
494 (define-asm-macro :%dlabel (name)
495 ;; !!!! if this gets moved somewhere before the peephole optimizer, make
496 ;; !!!! sure it leaves a nop of some sort in the instruction stream so we
497 ;; !!!! don't combine stuff on either side of a jump target
498 ;; for forward jumps, just mark the location but don't put a label instr
499 (push (cons name *code-offset*) (label *current-method*))
500 nil)
503 (defmacro with-assembler-context (&body body)
504 `(let ((*assembler-context* (make-instance 'assembler-context)))
505 ,@body))
507 ;;; not sure if this should be asm level or not...
508 (define-asm-macro :%array-read (index)
509 (assemble `((:push-int ,index)
510 (:get-property (:multiname-l "" "")))))