1 ------------------------------------------------------------------------------
2 -- DynASM x86/x64 module.
4 -- Copyright (C) 2005-2011 Mike Pall. All rights reserved.
5 -- See dynasm.lua for full copyright notice.
6 ------------------------------------------------------------------------------
10 -- Module information:
12 arch
= x64
and "x64" or "x86",
13 description
= "DynASM x86/x64 module",
16 release
= "2011-05-05",
21 -- Exported glue functions for the arch-specific module.
22 local _M
= { _info
= _info
}
24 -- Cache library functions.
25 local type, tonumber, pairs
, ipairs
= type, tonumber, pairs
, ipairs
26 local assert, unpack
, setmetatable
= assert, unpack
, setmetatable
28 local sub
, format, byte
, char
= _s
.sub
, _s
.format, _s
.byte
, _s
.char
29 local find
, match
, gmatch
, gsub = _s
.find
, _s
.match
, _s
.gmatch
, _s
.gsub
30 local concat
, sort = table.concat
, table.sort
32 -- Inherited tables and callbacks.
34 local wline
, werror
, wfatal
, wwarn
37 -- CHECK: Keep this in sync with the C code!
38 local action_names
= {
39 -- int arg, 1 buffer pos:
40 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
41 -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
42 "VREG", "SPACE", -- !x64: VREG support NYI.
43 -- ptrdiff_t arg, 1 buffer pos (address): !x64
45 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
47 -- action arg (1 byte) or int arg, 1 buffer pos (link):
49 -- action arg (1 byte) or int arg, 1 buffer pos (offset):
50 "LABEL_LG", "LABEL_PC",
51 -- action arg (1 byte), 1 buffer pos (offset):
53 -- action args (2 bytes), no buffer pos.
55 -- action arg (1 byte), no buffer pos.
57 -- no action arg, no buffer pos.
59 -- action arg (1 byte), no buffer pos, terminal action:
61 -- no args, no buffer pos, terminal action:
65 -- Maximum number of section buffer positions for dasm_put().
66 -- CHECK: Keep this in sync with the C code!
67 local maxsecpos
= 25 -- Keep this low, to avoid excessively long C lines.
69 -- Action name -> action number (dynamically generated below).
71 -- First action number. Everything below does not need to be escaped.
72 local actfirst
= 256-#action_names
74 -- Action list buffer and string (only used to remove dupes).
78 -- Argument list for next dasm_put(). Start with offset 0 into action list.
81 -- Current number of section buffer positions for dasm_put().
84 ------------------------------------------------------------------------------
86 -- Compute action numbers for action names.
87 for n
,name
in ipairs(action_names
) do
88 local num
= actfirst
+ n
- 1
89 map_action
[name
] = num
92 -- Dump action names and numbers.
93 local function dumpactions(out
)
94 out
:write("DynASM encoding engine action codes:\n")
95 for n
,name
in ipairs(action_names
) do
96 local num
= map_action
[name
]
97 out
:write(format(" %-10s %02X %d\n", name
, num
, num
))
102 -- Write action list buffer as a huge static C array.
103 local function writeactions(out
, name
)
105 local last
= actlist
[nn
] or 255
106 actlist
[nn
] = nil -- Remove last byte.
107 if nn
== 0 then nn
= 1 end
108 out
:write("static const unsigned char ", name
, "[", nn
, "] = {\n")
110 for n
,b
in ipairs(actlist
) do
113 assert(out
:write(s
, "\n"))
117 out
:write(s
, last
, "\n};\n\n") -- Add last byte back.
120 ------------------------------------------------------------------------------
122 -- Add byte to action list.
123 local function wputxb(n
)
124 assert(n
>= 0 and n
<= 255 and n
% 1 == 0, "byte out of range")
125 actlist
[#actlist
+1] = n
128 -- Add action to list with optional arg. Advance buffer pos, too.
129 local function waction(action
, a
, num
)
130 wputxb(assert(map_action
[action
], "bad action name `"..action
.."'"))
131 if a
then actargs
[#actargs
+1] = a
end
132 if a
or num
then secpos
= secpos
+ (num
or 1) end
135 -- Add call to embedded DynASM C code.
136 local function wcall(func
, args
)
137 wline(format("dasm_%s(Dst, %s);", func
, concat(args
, ", ")), true)
140 -- Delete duplicate action list chunks. A tad slow, but so what.
141 local function dedupechunk(offset
)
142 local al
, as
= actlist
, actstr
143 local chunk
= char(unpack(al
, offset
+1, #al
))
144 local orig
= find(as
, chunk
, 1, true)
146 actargs
[1] = orig
-1 -- Replace with original offset.
147 for i
=offset
+1,#al
do al
[i
] = nil end -- Kill dupe.
153 -- Flush action list (intervening C code or buffer pos overflow).
154 local function wflush(term
)
155 local offset
= actargs
[1]
156 if #actlist
== offset
then return end -- Nothing to flush.
157 if not term
then waction("STOP") end -- Terminate action list.
159 wcall("put", actargs
) -- Add call to dasm_put().
160 actargs
= { #actlist
} -- Actionlist offset is 1st arg to next dasm_put().
161 secpos
= 1 -- The actionlist offset occupies a buffer position, too.
165 local function wputb(n
)
166 if n
>= actfirst
then waction("ESC") end -- Need to escape byte.
170 ------------------------------------------------------------------------------
172 -- Global label name -> global label number. With auto assignment on 1st use.
173 local next_global
= 10
174 local map_global
= setmetatable({}, { __index
= function(t
, name
)
175 if not match(name
, "^[%a_][%w_@]*$") then werror("bad global label") end
176 local n
= next_global
177 if n
> 246 then werror("too many global labels") end
183 -- Dump global labels.
184 local function dumpglobals(out
, lvl
)
186 for name
, n
in pairs(map_global
) do t
[n
] = name
end
187 out
:write("Global labels:\n")
188 for i
=10,next_global
-1 do
189 out
:write(format(" %s\n", t
[i
]))
194 -- Write global label enum.
195 local function writeglobals(out
, prefix
)
197 for name
, n
in pairs(map_global
) do t
[n
] = name
end
198 out
:write("enum {\n")
199 for i
=10,next_global
-1 do
200 out
:write(" ", prefix
, gsub(t
[i
], "@.*", ""), ",\n")
202 out
:write(" ", prefix
, "_MAX\n};\n")
205 -- Write global label names.
206 local function writeglobalnames(out
, name
)
208 for name
, n
in pairs(map_global
) do t
[n
] = name
end
209 out
:write("static const char *const ", name
, "[] = {\n")
210 for i
=10,next_global
-1 do
211 out
:write(" \"", t
[i
], "\",\n")
213 out
:write(" (const char *)0\n};\n")
216 ------------------------------------------------------------------------------
218 -- Extern label name -> extern label number. With auto assignment on 1st use.
219 local next_extern
= -1
220 local map_extern
= setmetatable({}, { __index
= function(t
, name
)
221 -- No restrictions on the name for now.
222 local n
= next_extern
223 if n
< -256 then werror("too many extern labels") end
229 -- Dump extern labels.
230 local function dumpexterns(out
, lvl
)
232 for name
, n
in pairs(map_extern
) do t
[-n
] = name
end
233 out
:write("Extern labels:\n")
234 for i
=1,-next_extern
-1 do
235 out
:write(format(" %s\n", t
[i
]))
240 -- Write extern label names.
241 local function writeexternnames(out
, name
)
243 for name
, n
in pairs(map_extern
) do t
[-n
] = name
end
244 out
:write("static const char *const ", name
, "[] = {\n")
245 for i
=1,-next_extern
-1 do
246 out
:write(" \"", t
[i
], "\",\n")
248 out
:write(" (const char *)0\n};\n")
251 ------------------------------------------------------------------------------
253 -- Arch-specific maps.
254 local map_archdef
= {} -- Ext. register name -> int. name.
255 local map_reg_rev
= {} -- Int. register name -> ext. name.
256 local map_reg_num
= {} -- Int. register name -> register number.
257 local map_reg_opsize
= {} -- Int. register name -> operand size.
258 local map_reg_valid_base
= {} -- Int. register name -> valid base register?
259 local map_reg_valid_index
= {} -- Int. register name -> valid index register?
260 local map_reg_needrex
= {} -- Int. register name -> need rex vs. no rex.
261 local reg_list
= {} -- Canonical list of int. register names.
263 local map_type
= {} -- Type name -> { ctype, reg }
264 local ctypenum
= 0 -- Type number (for _PTx macros).
266 local addrsize
= x64
and "q" or "d" -- Size for address operands.
268 -- Helper functions to fill register maps.
269 local function mkrmap(sz
, cl
, names
)
270 local cname
= format("@%s", sz
)
271 reg_list
[#reg_list
+1] = cname
272 map_archdef
[cl
] = cname
273 map_reg_rev
[cname
] = cl
274 map_reg_num
[cname
] = -1
275 map_reg_opsize
[cname
] = sz
276 if sz
== addrsize
or sz
== "d" then
277 map_reg_valid_base
[cname
] = true
278 map_reg_valid_index
[cname
] = true
281 for n
,name
in ipairs(names
) do
282 local iname
= format("@%s%x", sz
, n
-1)
283 reg_list
[#reg_list
+1] = iname
284 map_archdef
[name
] = iname
285 map_reg_rev
[iname
] = name
286 map_reg_num
[iname
] = n
-1
287 map_reg_opsize
[iname
] = sz
288 if sz
== "b" and n
> 4 then map_reg_needrex
[iname
] = false end
289 if sz
== addrsize
or sz
== "d" then
290 map_reg_valid_base
[iname
] = true
291 map_reg_valid_index
[iname
] = true
295 for i
=0,(x64
and sz
~= "f") and 15 or 7 do
296 local needrex
= sz
== "b" and i
> 3
297 local iname
= format("@%s%x%s", sz
, i
, needrex
and "R" or "")
298 if needrex
then map_reg_needrex
[iname
] = true end
300 if sz
== "o" then name
= format("xmm%d", i
)
301 elseif sz
== "f" then name
= format("st%d", i
)
302 else name
= format("r%d%s", i
, sz
== addrsize
and "" or sz
) end
303 map_archdef
[name
] = iname
304 if not map_reg_rev
[iname
] then
305 reg_list
[#reg_list
+1] = iname
306 map_reg_rev
[iname
] = name
307 map_reg_num
[iname
] = i
308 map_reg_opsize
[iname
] = sz
309 if sz
== addrsize
or sz
== "d" then
310 map_reg_valid_base
[iname
] = true
311 map_reg_valid_index
[iname
] = true
315 reg_list
[#reg_list
+1] = ""
318 -- Integer registers (qword, dword, word and byte sized).
320 mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"})
322 mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
323 mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
324 mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
325 map_reg_valid_index
[map_archdef
.esp
] = false
326 if x64
then map_reg_valid_index
[map_archdef
.rsp
] = false end
327 map_archdef
["Ra"] = "@"..addrsize
329 -- FP registers (internally tword sized, but use "f" as operand size).
332 -- SSE registers (oword sized, but qword and dword accessible).
335 -- Operand size prefixes to codes.
337 byte
= "b", word
= "w", dword
= "d", qword
= "q", oword
= "o", tword
= "t",
341 -- Operand size code to number.
342 local map_opsizenum
= {
343 b
= 1, w
= 2, d
= 4, q
= 8, o
= 16, t
= 10,
346 -- Operand size code to name.
347 local map_opsizename
= {
348 b
= "byte", w
= "word", d
= "dword", q
= "qword", o
= "oword", t
= "tword",
352 -- Valid index register scale factors.
354 ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
359 o
= 0, no
= 1, b
= 2, nb
= 3, e
= 4, ne
= 5, be
= 6, nbe
= 7,
360 s
= 8, ns
= 9, p
= 10, np
= 11, l
= 12, nl
= 13, le
= 14, nle
= 15,
361 c
= 2, nae
= 2, nc
= 3, ae
= 3, z
= 4, nz
= 5, na
= 6, a
= 7,
362 pe
= 10, po
= 11, nge
= 12, ge
= 13, ng
= 14, g
= 15,
366 -- Reverse defines for registers.
367 function _M
.revdef(s
)
368 return gsub(s
, "@%w+", map_reg_rev
)
371 -- Dump register names and numbers
372 local function dumpregs(out
)
373 out
:write("Register names, sizes and internal numbers:\n")
374 for _
,reg
in ipairs(reg_list
) do
378 local name
= map_reg_rev
[reg
]
379 local num
= map_reg_num
[reg
]
380 local opsize
= map_opsizename
[map_reg_opsize
[reg]]
381 out
:write(format(" %-5s %-8s %s\n", name
, opsize
,
382 num
< 0 and "(variable)" or num
))
387 ------------------------------------------------------------------------------
389 -- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
390 local function wputlabel(aprefix
, imm
, num
)
391 if type(imm
) == "number" then
394 wputxb(aprefix
== "IMM_" and 0 or 1)
397 waction(aprefix
.."LG", nil, num
);
401 waction(aprefix
.."PC", imm
, num
)
405 -- Put signed byte or arg.
406 local function wputsbarg(n
)
407 if type(n
) == "number" then
408 if n
< -128 or n
> 127 then
409 werror("signed immediate byte out of range")
411 if n
< 0 then n
= n
+ 256 end
413 else waction("IMM_S", n
) end
416 -- Put unsigned byte or arg.
417 local function wputbarg(n
)
418 if type(n
) == "number" then
419 if n
< 0 or n
> 255 then
420 werror("unsigned immediate byte out of range")
423 else waction("IMM_B", n
) end
426 -- Put unsigned word or arg.
427 local function wputwarg(n
)
428 if type(n
) == "number" then
429 if n
< 0 or n
> 65535 then
430 werror("unsigned immediate word out of range")
432 local r
= n
%256; n
= (n
-r
)/256; wputb(r
); wputb(n
);
433 else waction("IMM_W", n
) end
436 -- Put signed or unsigned dword or arg.
437 local function wputdarg(n
)
439 if tn
== "number" then
440 if n
< 0 then n
= n
+ 4294967296 end
441 local r
= n
%256; n
= (n
-r
)/256; wputb(r
);
442 r
= n
%256; n
= (n
-r
)/256; wputb(r
);
443 r
= n
%256; n
= (n
-r
)/256; wputb(r
); wputb(n
);
444 elseif tn
== "table" then
445 wputlabel("IMM_", n
[1], 1)
451 -- Put operand-size dependent number or arg (defaults to dword).
452 local function wputszarg(sz
, n
)
453 if not sz
or sz
== "d" or sz
== "q" then wputdarg(n
)
454 elseif sz
== "w" then wputwarg(n
)
455 elseif sz
== "b" then wputbarg(n
)
456 elseif sz
== "s" then wputsbarg(n
)
457 else werror("bad operand size") end
460 -- Put multi-byte opcode with operand-size dependent modifications.
461 local function wputop(sz
, op
, rex
)
463 if rex
~= 0 and not x64
then werror("bad operand size") end
464 if sz
== "w" then wputb(102) end
465 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
466 if op
>= 4294967296 then r
= op
%4294967296 wputb((op
-r
)/4294967296) op
= r
end
467 if op
>= 16777216 then r
= op
% 16777216 wputb((op
-r
) / 16777216) op
= r
end
470 local opc3
= op
- op
% 256
471 if opc3
== 0x0f3a00 or opc3
== 0x0f3800 then
472 wputb(64 + rex
% 16); rex
= 0
475 r
= op
% 65536 wputb((op
-r
) / 65536) op
= r
479 local b
= (op
-r
) / 256
480 if b
== 15 and rex
~= 0 then wputb(64 + rex
% 16); rex
= 0 end
484 if rex
~= 0 then wputb(64 + rex
% 16) end
485 if sz
== "b" then op
= op
- 1 end
489 -- Put ModRM or SIB formatted byte.
490 local function wputmodrm(m
, s
, rm
, vs
, vrm
)
491 assert(m
< 4 and s
< 16 and rm
< 16, "bad modrm operands")
492 wputb(64*m
+ 8*(s
%8) + (rm
%8))
495 -- Put ModRM/SIB plus optional displacement.
496 local function wputmrmsib(t
, imark
, s
, vsreg
)
498 local reg
, xreg
= t
.reg
, t
.xreg
499 if reg
and reg
< 0 then reg
= 0; vreg
= t
.vreg
end
500 if xreg
and xreg
< 0 then xreg
= 0; vxreg
= t
.vxreg
end
501 if s
< 0 then s
= 0 end
504 if sub(t
.mode
, 1, 1) == "r" then
506 if vsreg
then waction("VREG", vsreg
); wputxb(2) end
507 if vreg
then waction("VREG", vreg
); wputxb(0) end
512 local tdisp
= type(disp
)
517 -- Indexed mode with index register only.
518 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
520 if imark
== "I" then waction("MARK") end
521 if vsreg
then waction("VREG", vsreg
); wputxb(2) end
522 wputmodrm(t
.xsc
, xreg
, 5)
523 if vxreg
then waction("VREG", vxreg
); wputxb(3) end
525 -- Pure 32 bit displacement.
526 if x64
and tdisp
~= "table" then
527 wputmodrm(0, s
, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
528 if imark
== "I" then waction("MARK") end
532 wputmodrm(0, s
, 5) -- [disp|rip-label] -> (0, s, ebp)
533 if imark
== "I" then waction("MARK") end
535 if vsreg
then waction("VREG", vsreg
); wputxb(2) end
537 if riprel
then -- Emit rip-relative displacement.
538 if match("UWSiI", imark
) then
539 werror("NYI: rip-relative displacement followed by immediate")
541 -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
542 wputlabel("REL_", disp
[1], 2)
550 if tdisp
== "number" then -- Check displacement size at assembly time.
551 if disp
== 0 and (reg
%8) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
552 if not vreg
then m
= 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
553 elseif disp
>= -128 and disp
<= 127 then m
= 1
555 elseif tdisp
== "table" then
559 -- Index register present or esp as base register: need SIB encoding.
560 if xreg
or (reg
%8) == 4 then
561 wputmodrm(m
or 2, s
, 4) -- ModRM.
562 if m
== nil or imark
== "I" then waction("MARK") end
563 if vsreg
then waction("VREG", vsreg
); wputxb(2) end
564 wputmodrm(t
.xsc
or 0, xreg
or 4, reg
) -- SIB.
565 if vxreg
then waction("VREG", vxreg
); wputxb(3) end
566 if vreg
then waction("VREG", vreg
); wputxb(1) end
568 wputmodrm(m
or 2, s
, reg
) -- ModRM.
569 if (imark
== "I" and (m
== 1 or m
== 2)) or
570 (m
== nil and (vsreg
or vreg
)) then waction("MARK") end
571 if vsreg
then waction("VREG", vsreg
); wputxb(2) end
572 if vreg
then waction("VREG", vreg
); wputxb(1) end
576 if m
== 1 then wputsbarg(disp
)
577 elseif m
== 2 then wputdarg(disp
)
578 elseif m
== nil then waction("DISP", disp
) end
581 ------------------------------------------------------------------------------
583 -- Return human-readable operand mode string.
584 local function opmodestr(op
, args
)
588 m
[#m
+1] = sub(a
.mode
, 1, 1)..(a
.opsize
or "?")
590 return op
.." "..concat(m
, ",")
593 -- Convert number to valid integer or nil.
594 local function toint(expr
)
595 local n
= tonumber(expr
)
597 if n
% 1 ~= 0 or n
< -2147483648 or n
> 4294967295 then
598 werror("bad integer number `"..expr
.."'")
604 -- Parse immediate expression.
605 local function immexpr(expr
)
607 if sub(expr
, 1, 1) == "&" then
608 return "iPJ", format("(ptrdiff_t)(%s)", sub(expr
,2))
611 local prefix
= sub(expr
, 1, 2)
612 -- =>expr (pc label reference)
613 if prefix
== "=>" then
614 return "iJ", sub(expr
, 3)
616 -- ->name (global label reference)
617 if prefix
== "->" then
618 return "iJ", map_global
[sub(expr
, 3)]
621 -- [<>][1-9] (local label reference)
622 local dir
, lnum
= match(expr
, "^([<>])([1-9])$")
623 if dir
then -- Fwd: 247-255, Bkwd: 1-9.
624 return "iJ", lnum
+ (dir
== ">" and 246 or 0)
627 local extname
= match(expr
, "^extern%s+(%S+)$")
629 return "iJ", map_extern
[extname
]
632 -- expr (interpreted as immediate)
636 -- Parse displacement expression: +-num, +-expr, +-opsize*num
637 local function dispexpr(expr
)
638 local disp
= expr
== "" and 0 or toint(expr
)
639 if disp
then return disp
end
640 local c
, dispt
= match(expr
, "^([+-])%s*(.+)$")
644 werror("bad displacement expression `"..expr
.."'")
646 local opsize
, tailops
= match(dispt
, "^(%w+)%s*%*%s*(.+)$")
647 local ops
, imm
= map_opsize
[opsize
], toint(tailops
)
649 if c
== "-" then imm
= -imm
end
650 return imm
*map_opsizenum
[ops
]
652 local mode
, iexpr
= immexpr(dispt
)
654 if c
== "-" then werror("cannot invert label reference") end
657 return expr
-- Need to return original signed expression.
660 -- Parse register or type expression.
661 local function rtexpr(expr
)
662 if not expr
then return end
663 local tname
, ovreg
= match(expr
, "^([%w_]+):(@[%w_]+)$")
664 local tp
= map_type
[tname
or expr
]
666 local reg
= ovreg
or tp
.reg
667 local rnum
= map_reg_num
[reg
]
669 werror("type `"..(tname
or expr
).."' needs a register override")
671 if not map_reg_valid_base
[reg
] then
672 werror("bad base register override `"..(map_reg_rev
[reg
] or reg
).."'")
676 return expr
, map_reg_num
[expr
]
679 -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
680 local function parseoperand(param
)
684 local opsize
, tailops
= match(param
, "^(%w+)%s*(.+)$")
686 t
.opsize
= map_opsize
[opsize
]
687 if t
.opsize
then expr
= tailops
end
690 local br
= match(expr
, "^%[%s*(.-)%s*%]$")
698 t
.mode
= x64
and "xm" or "xmO"
704 local reg
, tailr
= match(br
, "^([@%w_:]+)%s*(.*)$")
705 reg
, t
.reg
, tp
= rtexpr(reg
)
708 t
.mode
= x64
and "xm" or "xmO"
709 t
.disp
= dispexpr("+"..br
)
714 t
.vreg
, tailr
= match(tailr
, "^(%b())(.*)$")
715 if not t
.vreg
then werror("bad variable register expression") end
718 -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
719 local xsc
, tailsc
= match(tailr
, "^%*%s*([1248])%s*(.*)$")
721 if not map_reg_valid_index
[reg
] then
722 werror("bad index register `"..map_reg_rev
[reg
].."'")
729 t
.disp
= dispexpr(tailsc
)
732 if not map_reg_valid_base
[reg
] then
733 werror("bad base register `"..map_reg_rev
[reg
].."'")
736 -- [reg] or [reg+-disp]
737 t
.disp
= toint(tailr
) or (tailr
== "" and 0)
738 if t
.disp
then break end
741 local xreg
, tailx
= match(tailr
, "^+%s*([@%w_:]+)%s*(.*)$")
742 xreg
, t
.xreg
, tp
= rtexpr(xreg
)
745 t
.disp
= dispexpr(tailr
)
748 if not map_reg_valid_index
[xreg
] then
749 werror("bad index register `"..map_reg_rev
[xreg
].."'")
753 t
.vxreg
, tailx
= match(tailx
, "^(%b())(.*)$")
754 if not t
.vxreg
then werror("bad variable register expression") end
758 local xsc
, tailsc
= match(tailx
, "^%*%s*([1248])%s*(.*)$")
764 -- [...] or [...+-disp] or [...+-expr]
765 t
.disp
= dispexpr(tailx
)
768 local imm
= toint(expr
)
769 if not imm
and sub(expr
, 1, 1) == "*" and t
.opsize
then
770 imm
= toint(sub(expr
, 2))
772 imm
= imm
* map_opsizenum
[t
.opsize
]
777 if t
.opsize
then werror("bad operand size override") end
779 if imm
== 1 then m
= m
.."1" end
780 if imm
>= 4294967168 and imm
<= 4294967295 then imm
= imm
-4294967296 end
781 if imm
>= -128 and imm
<= 127 then m
= m
.."S" end
788 local reg
, tailr
= match(expr
, "^([@%w_:]+)%s*(.*)$")
789 reg
, t
.reg
, tp
= rtexpr(reg
)
792 t
.vreg
, tailr
= match(tailr
, "^(%b())(.*)$")
793 if not t
.vreg
then werror("bad variable register expression") end
797 if t
.opsize
then werror("bad operand size override") end
798 t
.opsize
= map_reg_opsize
[reg
]
799 if t
.opsize
== "f" then
800 t
.mode
= t
.reg
== 0 and "fF" or "f"
802 if reg
== "@w4" or (x64
and reg
== "@d4") then
803 wwarn("bad idea, try again with `"..(x64
and "rsp'" or "esp'"))
805 t
.mode
= t
.reg
== 0 and "rmR" or (reg
== "@b1" and "rmC" or "rm")
807 t
.needrex
= map_reg_needrex
[reg
]
811 -- type[idx], type[idx].field, type->field -> [reg+offset_expr]
812 if not tp
then werror("bad operand `"..param
.."'") end
814 t
.disp
= format(tp
.ctypefmt
, tailr
)
816 t
.mode
, t
.imm
= immexpr(expr
)
817 if sub(t
.mode
, -1) == "J" then
818 if t
.opsize
and t
.opsize
~= addrsize
then
819 werror("bad operand size override")
829 ------------------------------------------------------------------------------
830 -- x86 Template String Description
831 -- ===============================
833 -- Each template string is a list of [match:]pattern pairs,
834 -- separated by "|". The first match wins. No match means a
835 -- bad or unsupported combination of operand modes or sizes.
837 -- The match part and the ":" is omitted if the operation has
838 -- no operands. Otherwise the first N characters are matched
839 -- against the mode strings of each of the N operands.
841 -- The mode string for each operand type is (see parseoperand()):
842 -- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
843 -- FP register: "f", +"F" for st0
844 -- Index operand: "xm", +"O" for [disp] (pure offset)
845 -- Immediate: "i", +"S" for signed 8 bit, +"1" for 1,
846 -- +"I" for arg, +"P" for pointer
847 -- Any: +"J" for valid jump targets
849 -- So a match character "m" (mixed) matches both an integer register
850 -- and an index operand (to be encoded with the ModRM/SIB scheme).
851 -- But "r" matches only a register and "x" only an index operand
852 -- (e.g. for FP memory access operations).
854 -- The operand size match string starts right after the mode match
855 -- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
856 -- The effective data size of the operation is matched against this list.
858 -- If only the regular "b", "w", "d", "q", "t" operand sizes are
859 -- present, then all operands must be the same size. Unspecified sizes
860 -- are ignored, but at least one operand must have a size or the pattern
861 -- won't match (use the "byte", "word", "dword", "qword", "tword"
862 -- operand size overrides. E.g.: mov dword [eax], 1).
864 -- If the list has a "1" or "2" prefix, the operand size is taken
865 -- from the respective operand and any other operand sizes are ignored.
866 -- If the list contains only ".", all operand sizes are ignored.
867 -- If the list has a "/" prefix, the concatenated (mixed) operand sizes
868 -- are compared to the match.
870 -- E.g. "rrdw" matches for either two dword registers or two word
871 -- registers. "Fx2dq" matches an st0 operand plus an index operand
872 -- pointing to a dword (float) or qword (double).
874 -- Every character after the ":" is part of the pattern string:
875 -- Hex chars are accumulated to form the opcode (left to right).
876 -- "n" disables the standard opcode mods
877 -- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
879 -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode.
880 -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
881 -- The spare 3 bits are either filled with the last hex digit or
882 -- the result from a previous "r"/"R". The opcode is restored.
884 -- All of the following characters force a flush of the opcode:
885 -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
886 -- "S" stores a signed 8 bit immediate from the last operand.
887 -- "U" stores an unsigned 8 bit immediate from the last operand.
888 -- "W" stores an unsigned 16 bit immediate from the last operand.
889 -- "i" stores an operand sized immediate from the last operand.
890 -- "I" dito, but generates an action code to optionally modify
891 -- the opcode (+2) for a signed 8 bit immediate.
892 -- "J" generates one of the REL action codes from the last operand.
894 ------------------------------------------------------------------------------
896 -- Template strings for x86 instructions. Ordered by first opcode byte.
897 -- Unimplemented opcodes (deliberate omissions) are marked with *.
904 -- 0F: two byte opcode prefix
923 inc_1
= x64
and "m:FF0m" or "rdw:40r|m:FF0m",
924 dec_1
= x64
and "m:FF1m" or "rdw:48r|m:FF1m",
925 push_1
= (x64
and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or
926 "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
927 pop_1
= x64
and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m",
928 -- 60: *pusha, *pushad, *pushaw
929 -- 61: *popa, *popad, *popaw
931 -- 63: x86: *arpl mw,rw
932 movsxd_2
= x64
and "rm/qd:63rM",
936 a16_0
= not x64
and "67" or nil,
937 a32_0
= x64
and "67",
939 -- 69: imul rdw,mdw,idw
941 -- 6B: imul rdw,mdw,S
945 -- 6F: *outsd, *outsw
951 test_2
= "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
959 lea_2
= "rx1dq:8DrM",
963 xchg_2
= "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
974 pushfd_0
= not x64
and "9C",
975 pushfq_0
= x64
and "9C",
977 popfd_0
= not x64
and "9D",
978 popfq_0
= x64
and "9D",
981 mov_2
= "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
1025 -- D8-DF: floating point ops
1029 -- E3: *jcxz, *jecxz
1034 call_1
= x64
and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J",
1035 jmp_1
= x64
and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB
1051 -- F6: test... mb,i; div... mb
1052 -- F7: test... mdw,i; div... mdw
1069 imul_2
= "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
1070 imul_3
= "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",
1072 movzx_2
= "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
1073 movsx_2
= "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",
1075 bswap_1
= "rqd:0FC8r",
1076 bsf_2
= "rmqdw:0FBCrM",
1077 bsr_2
= "rmqdw:0FBDrM",
1078 bt_2
= "mrqdw:0FA3Rm|miqdw:0FBA4mU",
1079 btc_2
= "mrqdw:0FBBRm|miqdw:0FBA7mU",
1080 btr_2
= "mrqdw:0FB3Rm|miqdw:0FBA6mU",
1081 bts_2
= "mrqdw:0FABRm|miqdw:0FBA5mU",
1083 rdtsc_0
= "0F31", -- P1+
1084 cpuid_0
= "0FA2", -- P1+
1086 -- floating point ops
1087 fst_1
= "ff:DDD0r|xd:D92m|xq:nDD2m",
1088 fstp_1
= "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
1089 fld_1
= "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
1091 fpop_0
= "DDD8", -- Alias for fstp st0.
1093 fist_1
= "xw:nDF2m|xd:DB2m",
1094 fistp_1
= "xw:nDF3m|xd:DB3m|xq:nDF7m",
1095 fild_1
= "xw:nDF0m|xd:DB0m|xq:nDF5m",
1098 fxch_1
= "ff:D9C8r",
1099 fxch_2
= "fFf:D9C8r|Fff:D9C8R",
1101 fucom_1
= "ff:DDE0r",
1102 fucom_2
= "Fff:DDE0R",
1103 fucomp_1
= "ff:DDE8r",
1104 fucomp_2
= "Fff:DDE8R",
1105 fucomi_1
= "ff:DBE8r", -- P6+
1106 fucomi_2
= "Fff:DBE8R", -- P6+
1107 fucomip_1
= "ff:DFE8r", -- P6+
1108 fucomip_2
= "Fff:DFE8R", -- P6+
1109 fcomi_1
= "ff:DBF0r", -- P6+
1110 fcomi_2
= "Fff:DBF0R", -- P6+
1111 fcomip_1
= "ff:DFF0r", -- P6+
1112 fcomip_2
= "Fff:DFF0R", -- P6+
1116 fldcw_1
= "xw:nD95m",
1117 fstcw_1
= "xw:n9BD97m",
1118 fnstcw_1
= "xw:nD97m",
1119 fstsw_1
= "Rw:n9BDFE0|xw:n9BDD7m",
1120 fnstsw_1
= "Rw:nDFE0|xw:nDD7m",
1125 -- D9D1-D9DF: unassigned
1162 andnpd_2
= "rmo:660F55rM",
1163 andnps_2
= "rmo:0F55rM",
1164 andpd_2
= "rmo:660F54rM",
1165 andps_2
= "rmo:0F54rM",
1166 clflush_1
= "x.:0FAE7m",
1167 cmppd_3
= "rmio:660FC2rMU",
1168 cmpps_3
= "rmio:0FC2rMU",
1169 cmpsd_3
= "rrio:F20FC2rMU|rxi/oq:",
1170 cmpss_3
= "rrio:F30FC2rMU|rxi/od:",
1171 comisd_2
= "rro:660F2FrM|rx/oq:",
1172 comiss_2
= "rro:0F2FrM|rx/od:",
1173 cvtdq2pd_2
= "rro:F30FE6rM|rx/oq:",
1174 cvtdq2ps_2
= "rmo:0F5BrM",
1175 cvtpd2dq_2
= "rmo:F20FE6rM",
1176 cvtpd2ps_2
= "rmo:660F5ArM",
1177 cvtpi2pd_2
= "rx/oq:660F2ArM",
1178 cvtpi2ps_2
= "rx/oq:0F2ArM",
1179 cvtps2dq_2
= "rmo:660F5BrM",
1180 cvtps2pd_2
= "rro:0F5ArM|rx/oq:",
1181 cvtsd2si_2
= "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
1182 cvtsd2ss_2
= "rro:F20F5ArM|rx/oq:",
1183 cvtsi2sd_2
= "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
1184 cvtsi2ss_2
= "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
1185 cvtss2sd_2
= "rro:F30F5ArM|rx/od:",
1186 cvtss2si_2
= "rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
1187 cvttpd2dq_2
= "rmo:660FE6rM",
1188 cvttps2dq_2
= "rmo:F30F5BrM",
1189 cvttsd2si_2
= "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
1190 cvttss2si_2
= "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
1191 ldmxcsr_1
= "xd:0FAE2m",
1192 lfence_0
= "0FAEE8",
1193 maskmovdqu_2
= "rro:660FF7rM",
1194 mfence_0
= "0FAEF0",
1195 movapd_2
= "rmo:660F28rM|mro:660F29Rm",
1196 movaps_2
= "rmo:0F28rM|mro:0F29Rm",
1197 movd_2
= "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:",
1198 movdqa_2
= "rmo:660F6FrM|mro:660F7FRm",
1199 movdqu_2
= "rmo:F30F6FrM|mro:F30F7FRm",
1200 movhlps_2
= "rro:0F12rM",
1201 movhpd_2
= "rx/oq:660F16rM|xr/qo:n660F17Rm",
1202 movhps_2
= "rx/oq:0F16rM|xr/qo:n0F17Rm",
1203 movlhps_2
= "rro:0F16rM",
1204 movlpd_2
= "rx/oq:660F12rM|xr/qo:n660F13Rm",
1205 movlps_2
= "rx/oq:0F12rM|xr/qo:n0F13Rm",
1206 movmskpd_2
= "rr/do:660F50rM",
1207 movmskps_2
= "rr/do:0F50rM",
1208 movntdq_2
= "xro:660FE7Rm",
1209 movnti_2
= "xrqd:0FC3Rm",
1210 movntpd_2
= "xro:660F2BRm",
1211 movntps_2
= "xro:0F2BRm",
1212 movq_2
= "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
1213 movsd_2
= "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
1214 movss_2
= "rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
1215 movupd_2
= "rmo:660F10rM|mro:660F11Rm",
1216 movups_2
= "rmo:0F10rM|mro:0F11Rm",
1217 orpd_2
= "rmo:660F56rM",
1218 orps_2
= "rmo:0F56rM",
1219 packssdw_2
= "rmo:660F6BrM",
1220 packsswb_2
= "rmo:660F63rM",
1221 packuswb_2
= "rmo:660F67rM",
1222 paddb_2
= "rmo:660FFCrM",
1223 paddd_2
= "rmo:660FFErM",
1224 paddq_2
= "rmo:660FD4rM",
1225 paddsb_2
= "rmo:660FECrM",
1226 paddsw_2
= "rmo:660FEDrM",
1227 paddusb_2
= "rmo:660FDCrM",
1228 paddusw_2
= "rmo:660FDDrM",
1229 paddw_2
= "rmo:660FFDrM",
1230 pand_2
= "rmo:660FDBrM",
1231 pandn_2
= "rmo:660FDFrM",
1233 pavgb_2
= "rmo:660FE0rM",
1234 pavgw_2
= "rmo:660FE3rM",
1235 pcmpeqb_2
= "rmo:660F74rM",
1236 pcmpeqd_2
= "rmo:660F76rM",
1237 pcmpeqw_2
= "rmo:660F75rM",
1238 pcmpgtb_2
= "rmo:660F64rM",
1239 pcmpgtd_2
= "rmo:660F66rM",
1240 pcmpgtw_2
= "rmo:660F65rM",
1241 pextrw_3
= "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
1242 pinsrw_3
= "rri/od:660FC4rMU|rxi/ow:",
1243 pmaddwd_2
= "rmo:660FF5rM",
1244 pmaxsw_2
= "rmo:660FEErM",
1245 pmaxub_2
= "rmo:660FDErM",
1246 pminsw_2
= "rmo:660FEArM",
1247 pminub_2
= "rmo:660FDArM",
1248 pmovmskb_2
= "rr/do:660FD7rM",
1249 pmulhuw_2
= "rmo:660FE4rM",
1250 pmulhw_2
= "rmo:660FE5rM",
1251 pmullw_2
= "rmo:660FD5rM",
1252 pmuludq_2
= "rmo:660FF4rM",
1253 por_2
= "rmo:660FEBrM",
1254 prefetchnta_1
= "xb:n0F180m",
1255 prefetcht0_1
= "xb:n0F181m",
1256 prefetcht1_1
= "xb:n0F182m",
1257 prefetcht2_1
= "xb:n0F183m",
1258 psadbw_2
= "rmo:660FF6rM",
1259 pshufd_3
= "rmio:660F70rMU",
1260 pshufhw_3
= "rmio:F30F70rMU",
1261 pshuflw_3
= "rmio:F20F70rMU",
1262 pslld_2
= "rmo:660FF2rM|rio:660F726mU",
1263 pslldq_2
= "rio:660F737mU",
1264 psllq_2
= "rmo:660FF3rM|rio:660F736mU",
1265 psllw_2
= "rmo:660FF1rM|rio:660F716mU",
1266 psrad_2
= "rmo:660FE2rM|rio:660F724mU",
1267 psraw_2
= "rmo:660FE1rM|rio:660F714mU",
1268 psrld_2
= "rmo:660FD2rM|rio:660F722mU",
1269 psrldq_2
= "rio:660F733mU",
1270 psrlq_2
= "rmo:660FD3rM|rio:660F732mU",
1271 psrlw_2
= "rmo:660FD1rM|rio:660F712mU",
1272 psubb_2
= "rmo:660FF8rM",
1273 psubd_2
= "rmo:660FFArM",
1274 psubq_2
= "rmo:660FFBrM",
1275 psubsb_2
= "rmo:660FE8rM",
1276 psubsw_2
= "rmo:660FE9rM",
1277 psubusb_2
= "rmo:660FD8rM",
1278 psubusw_2
= "rmo:660FD9rM",
1279 psubw_2
= "rmo:660FF9rM",
1280 punpckhbw_2
= "rmo:660F68rM",
1281 punpckhdq_2
= "rmo:660F6ArM",
1282 punpckhqdq_2
= "rmo:660F6DrM",
1283 punpckhwd_2
= "rmo:660F69rM",
1284 punpcklbw_2
= "rmo:660F60rM",
1285 punpckldq_2
= "rmo:660F62rM",
1286 punpcklqdq_2
= "rmo:660F6CrM",
1287 punpcklwd_2
= "rmo:660F61rM",
1288 pxor_2
= "rmo:660FEFrM",
1289 rcpps_2
= "rmo:0F53rM",
1290 rcpss_2
= "rro:F30F53rM|rx/od:",
1291 rsqrtps_2
= "rmo:0F52rM",
1292 rsqrtss_2
= "rmo:F30F52rM",
1293 sfence_0
= "0FAEF8",
1294 shufpd_3
= "rmio:660FC6rMU",
1295 shufps_3
= "rmio:0FC6rMU",
1296 stmxcsr_1
= "xd:0FAE3m",
1297 ucomisd_2
= "rro:660F2ErM|rx/oq:",
1298 ucomiss_2
= "rro:0F2ErM|rx/od:",
1299 unpckhpd_2
= "rmo:660F15rM",
1300 unpckhps_2
= "rmo:0F15rM",
1301 unpcklpd_2
= "rmo:660F14rM",
1302 unpcklps_2
= "rmo:0F14rM",
1303 xorpd_2
= "rmo:660F57rM",
1304 xorps_2
= "rmo:0F57rM",
1307 fisttp_1
= "xw:nDF1m|xd:DB1m|xq:nDD1m",
1308 addsubpd_2
= "rmo:660FD0rM",
1309 addsubps_2
= "rmo:F20FD0rM",
1310 haddpd_2
= "rmo:660F7CrM",
1311 haddps_2
= "rmo:F20F7CrM",
1312 hsubpd_2
= "rmo:660F7DrM",
1313 hsubps_2
= "rmo:F20F7DrM",
1314 lddqu_2
= "rxo:F20FF0rM",
1315 movddup_2
= "rmo:F20F12rM",
1316 movshdup_2
= "rmo:F30F16rM",
1317 movsldup_2
= "rmo:F30F12rM",
1320 pabsb_2
= "rmo:660F381CrM",
1321 pabsd_2
= "rmo:660F381ErM",
1322 pabsw_2
= "rmo:660F381DrM",
1323 palignr_3
= "rmio:660F3A0FrMU",
1324 phaddd_2
= "rmo:660F3802rM",
1325 phaddsw_2
= "rmo:660F3803rM",
1326 phaddw_2
= "rmo:660F3801rM",
1327 phsubd_2
= "rmo:660F3806rM",
1328 phsubsw_2
= "rmo:660F3807rM",
1329 phsubw_2
= "rmo:660F3805rM",
1330 pmaddubsw_2
= "rmo:660F3804rM",
1331 pmulhrsw_2
= "rmo:660F380BrM",
1332 pshufb_2
= "rmo:660F3800rM",
1333 psignb_2
= "rmo:660F3808rM",
1334 psignd_2
= "rmo:660F380ArM",
1335 psignw_2
= "rmo:660F3809rM",
1338 blendpd_3
= "rmio:660F3A0DrMU",
1339 blendps_3
= "rmio:660F3A0CrMU",
1340 blendvpd_3
= "rmRo:660F3815rM",
1341 blendvps_3
= "rmRo:660F3814rM",
1342 dppd_3
= "rmio:660F3A41rMU",
1343 dpps_3
= "rmio:660F3A40rMU",
1344 extractps_3
= "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
1345 insertps_3
= "rrio:660F3A41rMU|rxi/od:",
1346 movntdqa_2
= "rmo:660F382ArM",
1347 mpsadbw_3
= "rmio:660F3A42rMU",
1348 packusdw_2
= "rmo:660F382BrM",
1349 pblendvb_3
= "rmRo:660F3810rM",
1350 pblendw_3
= "rmio:660F3A0ErMU",
1351 pcmpeqq_2
= "rmo:660F3829rM",
1352 pextrb_3
= "rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
1353 pextrd_3
= "mri/do:660F3A16RmU",
1354 pextrq_3
= "mri/qo:660F3A16RmU",
1355 -- pextrw is SSE2, mem operand is SSE4.1 only
1356 phminposuw_2
= "rmo:660F3841rM",
1357 pinsrb_3
= "rri/od:660F3A20nrMU|rxi/ob:",
1358 pinsrd_3
= "rmi/od:660F3A22rMU",
1359 pinsrq_3
= "rmi/oq:660F3A22rXMU",
1360 pmaxsb_2
= "rmo:660F383CrM",
1361 pmaxsd_2
= "rmo:660F383DrM",
1362 pmaxud_2
= "rmo:660F383FrM",
1363 pmaxuw_2
= "rmo:660F383ErM",
1364 pminsb_2
= "rmo:660F3838rM",
1365 pminsd_2
= "rmo:660F3839rM",
1366 pminud_2
= "rmo:660F383BrM",
1367 pminuw_2
= "rmo:660F383ArM",
1368 pmovsxbd_2
= "rro:660F3821rM|rx/od:",
1369 pmovsxbq_2
= "rro:660F3822rM|rx/ow:",
1370 pmovsxbw_2
= "rro:660F3820rM|rx/oq:",
1371 pmovsxdq_2
= "rro:660F3825rM|rx/oq:",
1372 pmovsxwd_2
= "rro:660F3823rM|rx/oq:",
1373 pmovsxwq_2
= "rro:660F3824rM|rx/od:",
1374 pmovzxbd_2
= "rro:660F3831rM|rx/od:",
1375 pmovzxbq_2
= "rro:660F3832rM|rx/ow:",
1376 pmovzxbw_2
= "rro:660F3830rM|rx/oq:",
1377 pmovzxdq_2
= "rro:660F3835rM|rx/oq:",
1378 pmovzxwd_2
= "rro:660F3833rM|rx/oq:",
1379 pmovzxwq_2
= "rro:660F3834rM|rx/od:",
1380 pmuldq_2
= "rmo:660F3828rM",
1381 pmulld_2
= "rmo:660F3840rM",
1382 ptest_2
= "rmo:660F3817rM",
1383 roundpd_3
= "rmio:660F3A09rMU",
1384 roundps_3
= "rmio:660F3A08rMU",
1385 roundsd_3
= "rrio:660F3A0BrMU|rxi/oq:",
1386 roundss_3
= "rrio:660F3A0ArMU|rxi/od:",
1389 crc32_2
= "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
1390 pcmpestri_3
= "rmio:660F3A61rMU",
1391 pcmpestrm_3
= "rmio:660F3A60rMU",
1392 pcmpgtq_2
= "rmo:660F3837rM",
1393 pcmpistri_3
= "rmio:660F3A63rMU",
1394 pcmpistrm_3
= "rmio:660F3A62rMU",
1395 popcnt_2
= "rmqdw:F30FB8rM",
1398 extrq_2
= "rro:660F79rM",
1399 extrq_3
= "riio:660F780mUU",
1400 insertq_2
= "rro:F20F79rM",
1401 insertq_4
= "rriio:F20F78rMUU",
1402 lzcnt_2
= "rmqdw:F30FBDrM",
1403 movntsd_2
= "xr/qo:nF20F2BRm",
1404 movntss_2
= "xr/do:F30F2BRm",
1405 -- popcnt is also in SSE4.2
1408 ------------------------------------------------------------------------------
1411 for name
,n
in pairs
{ add
= 0, ["or"] = 1, adc
= 2, sbb
= 3,
1412 ["and"] = 4, sub
= 5, xor
= 6, cmp
= 7 } do
1414 map_op
[name
.."_2"] = format(
1415 "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
1416 1+n8
, 3+n8
, n
, n
, 5+n8
, n
)
1420 for name
,n
in pairs
{ rol
= 0, ror
= 1, rcl
= 2, rcr
= 3,
1421 shl
= 4, shr
= 5, sar
= 7, sal
= 4 } do
1422 map_op
[name
.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n
, n
, n
)
1426 for cc
,n
in pairs(map_cc
) do
1427 map_op
["j"..cc
.."_1"] = format("J.:n0F8%XJ", n
) -- short: 7%X
1428 map_op
["set"..cc
.."_1"] = format("mb:n0F9%X2m", n
)
1429 map_op
["cmov"..cc
.."_2"] = format("rmqdw:0F4%XrM", n
) -- P6+
1432 -- FP arithmetic ops.
1433 for name
,n
in pairs
{ add
= 0, mul
= 1, com
= 2, comp
= 3,
1434 sub
= 4, subr
= 5, div
= 6, divr
= 7 } do
1435 local nc
= 192 + n
* 8
1436 local nr
= nc
+ (n
< 4 and 0 or (n
% 2 == 0 and 8 or -8))
1437 local fn
= "f"..name
1438 map_op
[fn
.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc
, n
, n
)
1439 if n
== 2 or n
== 3 then
1440 map_op
[fn
.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc
, n
, n
)
1442 map_op
[fn
.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc
, nr
, n
, n
)
1443 map_op
[fn
.."p_1"] = format("ff:DE%02Xr", nr
)
1444 map_op
[fn
.."p_2"] = format("fFf:DE%02Xr", nr
)
1446 map_op
["fi"..name
.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n
, n
)
1449 -- FP conditional moves.
1450 for cc
,n
in pairs
{ b
=0, e
=1, be
=2, u
=3, nb
=4, ne
=5, nbe
=6, nu
=7 } do
1452 local nc
= 56000 + n4
* 8 + (n
-n4
) * 64
1453 map_op
["fcmov"..cc
.."_1"] = format("ff:%04Xr", nc
) -- P6+
1454 map_op
["fcmov"..cc
.."_2"] = format("Fff:%04XR", nc
) -- P6+
1457 -- SSE FP arithmetic ops.
1458 for name
,n
in pairs
{ sqrt = 1, add
= 8, mul
= 9,
1459 sub
= 12, min = 13, div
= 14, max = 15 } do
1460 map_op
[name
.."ps_2"] = format("rmo:0F5%XrM", n
)
1461 map_op
[name
.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n
)
1462 map_op
[name
.."pd_2"] = format("rmo:660F5%XrM", n
)
1463 map_op
[name
.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n
)
1466 ------------------------------------------------------------------------------
1468 -- Process pattern string.
1469 local function dopattern(pat
, args
, sz
, op
, needrex
)
1476 -- Limit number of section buffer positions used by a single dasm_put().
1477 -- A single opcode needs a maximum of 5 positions.
1478 if secpos
+5 > maxsecpos
then wflush() end
1480 -- Process each character.
1481 for c
in gmatch(pat
.."|", ".") do
1482 if match(c
, "%x") then -- Hex digit.
1483 digit
= byte(c
) - 48
1484 if digit
> 48 then digit
= digit
- 39
1485 elseif digit
> 16 then digit
= digit
- 7 end
1486 opcode
= opcode
*16 + digit
1488 elseif c
== "n" then -- Disable operand size mods for opcode.
1490 elseif c
== "X" then -- Force REX.W.
1492 elseif c
== "r" then -- Merge 1st operand regno. into opcode.
1493 addin
= args
[1]; opcode
= opcode
+ (addin
.reg
% 8)
1494 if narg
< 2 then narg
= 2 end
1495 elseif c
== "R" then -- Merge 2nd operand regno. into opcode.
1496 addin
= args
[2]; opcode
= opcode
+ (addin
.reg
% 8)
1498 elseif c
== "m" or c
== "M" then -- Encode ModRM/SIB.
1502 opcode
= opcode
- (s
%8) -- Undo regno opcode merge.
1504 s
= opcode
% 16 -- Undo last digit.
1505 opcode
= (opcode
- s
) / 16
1507 local nn
= c
== "m" and 1 or 2
1509 if narg
<= nn
then narg
= nn
+ 1 end
1510 if szov
== "q" and rex
== 0 then rex
= rex
+ 8 end
1511 if t
.reg
and t
.reg
> 7 then rex
= rex
+ 1 end
1512 if t
.xreg
and t
.xreg
> 7 then rex
= rex
+ 2 end
1513 if s
> 7 then rex
= rex
+ 4 end
1514 if needrex
then rex
= rex
+ 16 end
1515 wputop(szov
, opcode
, rex
); opcode
= nil
1516 local imark
= sub(pat
, -1) -- Force a mark (ugly).
1517 -- Put ModRM/SIB with regno/last digit as spare.
1518 wputmrmsib(t
, imark
, s
, addin
and addin
.vreg
)
1521 if opcode
then -- Flush opcode.
1522 if szov
== "q" and rex
== 0 then rex
= rex
+ 8 end
1523 if needrex
then rex
= rex
+ 16 end
1524 if addin
and addin
.reg
== -1 then
1525 wputop(szov
, opcode
- 7, rex
)
1526 waction("VREG", addin
.vreg
); wputxb(0)
1528 if addin
and addin
.reg
> 7 then rex
= rex
+ 1 end
1529 wputop(szov
, opcode
, rex
)
1533 if c
== "|" then break end
1534 if c
== "o" then -- Offset (pure 32 bit displacement).
1535 wputdarg(args
[1].disp
); if narg
< 2 then narg
= 2 end
1536 elseif c
== "O" then
1537 wputdarg(args
[2].disp
); narg
= 3
1539 -- Anything else is an immediate operand.
1540 local a
= args
[narg
]
1542 local mode
, imm
= a
.mode
, a
.imm
1543 if mode
== "iJ" and not match("iIJ", c
) then
1544 werror("bad operand size for label")
1548 elseif c
== "U" then
1550 elseif c
== "W" then
1552 elseif c
== "i" or c
== "I" then
1553 if mode
== "iJ" then
1554 wputlabel("IMM_", imm
, 1)
1555 elseif mode
== "iI" and c
== "I" then
1556 waction(sz
== "w" and "IMM_WB" or "IMM_DB", imm
)
1560 elseif c
== "J" then
1561 if mode
== "iPJ" then
1562 waction("REL_A", imm
) -- !x64 (secpos)
1564 wputlabel("REL_", imm
, 2)
1567 werror("bad char `"..c
.."' in pattern `"..pat
.."' for `"..op
.."'")
1574 ------------------------------------------------------------------------------
1576 -- Mapping of operand modes to short names. Suppress output with '#'.
1577 local map_modename
= {
1578 r
= "reg", R
= "eax", C
= "cl", x
= "mem", m
= "mrm", i
= "imm",
1579 f
= "stx", F
= "st0", J
= "lbl", ["1"] = "1",
1580 I
= "#", S
= "#", O
= "#",
1583 -- Return a table/string showing all possible operand modes.
1584 local function templatehelp(template
, nparams
)
1585 if nparams
== 0 then return "" end
1587 for tm
in gmatch(template
, "[^%|]+") do
1588 local s
= map_modename
[sub(tm
, 1, 1)]
1589 s
= s
..gsub(sub(tm
, 2, nparams
), ".", function(c
)
1590 return ", "..map_modename
[c
]
1592 if not match(s
, "#") then t
[#t
+1] = s
end
1597 -- Match operand modes against mode match part of template.
1598 local function matchtm(tm
, args
)
1600 if not match(args
[i
].mode
, sub(tm
, i
, i
)) then return end
1605 -- Handle opcodes defined with template strings.
1606 map_op
[".template__"] = function(params
, template
, nparams
)
1607 if not params
then return templatehelp(template
, nparams
) end
1610 -- Zero-operand opcodes have no match part.
1611 if #params
== 0 then
1612 dopattern(template
, args
, "d", params
.op
, nil)
1616 -- Determine common operand size (coerce undefined size) or flag as mixed.
1617 local sz
, szmix
, needrex
1618 for i
,p
in ipairs(params
) do
1619 args
[i
] = parseoperand(p
)
1620 local nsz
= args
[i
].opsize
1622 if sz
and sz
~= nsz
then szmix
= true else sz
= nsz
end
1624 local nrex
= args
[i
].needrex
1626 if needrex
== nil then
1628 elseif needrex
~= nrex
then
1629 werror("bad mix of byte-addressable registers")
1634 -- Try all match:pattern pairs (separated by '|').
1635 local gotmatch
, lastpat
1636 for tm
in gmatch(template
, "[^%|]+") do
1637 -- Split off size match (starts after mode match) and pattern string.
1638 local szm
, pat
= match(tm
, "^(.-):(.*)$", #args
+1)
1639 if pat
== "" then pat
= lastpat
else lastpat
= pat
end
1640 if matchtm(tm
, args
) then
1641 local prefix
= sub(szm
, 1, 1)
1642 if prefix
== "/" then -- Match both operand sizes.
1643 if args
[1].opsize
== sub(szm
, 2, 2) and
1644 args
[2].opsize
== sub(szm
, 3, 3) then
1645 dopattern(pat
, args
, sz
, params
.op
, needrex
) -- Process pattern.
1648 else -- Match common operand size.
1650 if szm
== "" then szm
= x64
and "qdwb" or "dwb" end -- Default sizes.
1651 if prefix
== "1" then szp
= args
[1].opsize
; szmix
= nil
1652 elseif prefix
== "2" then szp
= args
[2].opsize
; szmix
= nil end
1653 if not szmix
and (prefix
== "." or match(szm
, szp
or "#")) then
1654 dopattern(pat
, args
, szp
, params
.op
, needrex
) -- Process pattern.
1662 local msg
= "bad operand mode"
1665 msg
= "mixed operand size"
1667 msg
= sz
and "bad operand size" or "missing operand size"
1671 werror(msg
.." in `"..opmodestr(params
.op
, args
).."'")
1674 ------------------------------------------------------------------------------
1676 -- x64-specific opcode for 64 bit immediates and displacements.
1678 function map_op
.mov64_2(params
)
1679 if not params
then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
1680 if secpos
+2 > maxsecpos
then wflush() end
1681 local opcode
, op64
, sz
, rex
1682 local op64
= match(params
[1], "^%[%s*(.-)%s*%]$")
1684 local a
= parseoperand(params
[2])
1685 if a
.mode
~= "rmR" then werror("bad operand mode") end
1687 rex
= sz
== "q" and 8 or 0
1690 op64
= match(params
[2], "^%[%s*(.-)%s*%]$")
1691 local a
= parseoperand(params
[1])
1693 if a
.mode
~= "rmR" then werror("bad operand mode") end
1695 rex
= sz
== "q" and 8 or 0
1698 if sub(a
.mode
, 1, 1) ~= "r" or a
.opsize
~= "q" then
1699 werror("bad operand mode")
1702 opcode
= 0xb8 + (a
.reg
%8) -- !x64: no VREG support.
1703 rex
= a
.reg
> 7 and 9 or 8
1706 wputop(sz
, opcode
, rex
)
1707 waction("IMM_D", format("(unsigned int)(%s)", op64
))
1708 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64
))
1712 ------------------------------------------------------------------------------
1714 -- Pseudo-opcodes for data storage.
1715 local function op_data(params
)
1716 if not params
then return "imm..." end
1717 local sz
= sub(params
.op
, 2, 2)
1718 if sz
== "a" then sz
= addrsize
end
1719 for _
,p
in ipairs(params
) do
1720 local a
= parseoperand(p
)
1721 if sub(a
.mode
, 1, 1) ~= "i" or (a
.opsize
and a
.opsize
~= sz
) then
1722 werror("bad mode or size in `"..p
.."'")
1724 if a
.mode
== "iJ" then
1725 wputlabel("IMM_", a
.imm
, 1)
1727 wputszarg(sz
, a
.imm
)
1729 if secpos
+2 > maxsecpos
then wflush() end
1733 map_op
[".byte_*"] = op_data
1734 map_op
[".sbyte_*"] = op_data
1735 map_op
[".word_*"] = op_data
1736 map_op
[".dword_*"] = op_data
1737 map_op
[".aword_*"] = op_data
1739 ------------------------------------------------------------------------------
1741 -- Pseudo-opcode to mark the position where the action list is to be emitted.
1742 map_op
[".actionlist_1"] = function(params
)
1743 if not params
then return "cvar" end
1744 local name
= params
[1] -- No syntax check. You get to keep the pieces.
1745 wline(function(out
) writeactions(out
, name
) end)
1748 -- Pseudo-opcode to mark the position where the global enum is to be emitted.
1749 map_op
[".globals_1"] = function(params
)
1750 if not params
then return "prefix" end
1751 local prefix
= params
[1] -- No syntax check. You get to keep the pieces.
1752 wline(function(out
) writeglobals(out
, prefix
) end)
1755 -- Pseudo-opcode to mark the position where the global names are to be emitted.
1756 map_op
[".globalnames_1"] = function(params
)
1757 if not params
then return "cvar" end
1758 local name
= params
[1] -- No syntax check. You get to keep the pieces.
1759 wline(function(out
) writeglobalnames(out
, name
) end)
1762 -- Pseudo-opcode to mark the position where the extern names are to be emitted.
1763 map_op
[".externnames_1"] = function(params
)
1764 if not params
then return "cvar" end
1765 local name
= params
[1] -- No syntax check. You get to keep the pieces.
1766 wline(function(out
) writeexternnames(out
, name
) end)
1769 ------------------------------------------------------------------------------
1771 -- Label pseudo-opcode (converted from trailing colon form).
1772 map_op
[".label_2"] = function(params
)
1773 if not params
then return "[1-9] | ->global | =>pcexpr [, addr]" end
1774 if secpos
+2 > maxsecpos
then wflush() end
1775 local a
= parseoperand(params
[1])
1776 local mode
, imm
= a
.mode
, a
.imm
1777 if type(imm
) == "number" and (mode
== "iJ" or (imm
>= 1 and imm
<= 9)) then
1778 -- Local label (1: ... 9:) or global label (->global:).
1779 waction("LABEL_LG", nil, 1)
1781 elseif mode
== "iJ" then
1782 -- PC label (=>pcexpr:).
1783 waction("LABEL_PC", imm
)
1785 werror("bad label definition")
1787 -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
1788 local addr
= params
[2]
1790 local a
= parseoperand(addr
)
1791 if a
.mode
== "iPJ" then
1792 waction("SETLABEL", a
.imm
)
1794 werror("bad label assignment")
1798 map_op
[".label_1"] = map_op
[".label_2"]
1800 ------------------------------------------------------------------------------
1802 -- Alignment pseudo-opcode.
1803 map_op
[".align_1"] = function(params
)
1804 if not params
then return "numpow2" end
1805 if secpos
+1 > maxsecpos
then wflush() end
1806 local align
= tonumber(params
[1]) or map_opsizenum
[map_opsize
[params
[1]]
]
1809 -- Must be a power of 2 in the range (2 ... 256).
1813 waction("ALIGN", nil, 1)
1814 wputxb(align
-1) -- Action byte is 2**n-1.
1819 werror("bad alignment")
1822 -- Spacing pseudo-opcode.
1823 map_op
[".space_2"] = function(params
)
1824 if not params
then return "num [, filler]" end
1825 if secpos
+1 > maxsecpos
then wflush() end
1826 waction("SPACE", params
[1])
1827 local fill
= params
[2]
1829 fill
= tonumber(fill
)
1830 if not fill
or fill
< 0 or fill
> 255 then werror("bad filler") end
1834 map_op
[".space_1"] = map_op
[".space_2"]
1836 ------------------------------------------------------------------------------
1838 -- Pseudo-opcode for (primitive) type definitions (map to C types).
1839 map_op
[".type_3"] = function(params
, nparams
)
1841 return nparams
== 2 and "name, ctype" or "name, ctype, reg"
1843 local name
, ctype
, reg
= params
[1], params
[2], params
[3]
1844 if not match(name
, "^[%a_][%w_]*$") then
1845 werror("bad type name `"..name
.."'")
1847 local tp
= map_type
[name
]
1849 werror("duplicate type `"..name
.."'")
1851 if reg
and not map_reg_valid_base
[reg
] then
1852 werror("bad base register `"..(map_reg_rev
[reg
] or reg
).."'")
1854 -- Add #type to defines. A bit unclean to put it in map_archdef.
1855 map_archdef
["#"..name
] = "sizeof("..ctype
..")"
1856 -- Add new type and emit shortcut define.
1857 local num
= ctypenum
+ 1
1860 ctypefmt
= format("Dt%X(%%s)", num
),
1863 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num
, ctype
))
1866 map_op
[".type_2"] = map_op
[".type_3"]
1868 -- Dump type definitions.
1869 local function dumptypes(out
, lvl
)
1871 for name
in pairs(map_type
) do t
[#t
+1] = name
end
1873 out
:write("Type definitions:\n")
1874 for _
,name
in ipairs(t
) do
1875 local tp
= map_type
[name
]
1876 local reg
= tp
.reg
and map_reg_rev
[tp
.reg
] or ""
1877 out
:write(format(" %-20s %-20s %s\n", name
, tp
.ctype
, reg
))
1882 ------------------------------------------------------------------------------
1884 -- Set the current section.
1885 function _M
.section(num
)
1888 wflush(true) -- SECTION is a terminal action.
1891 ------------------------------------------------------------------------------
1893 -- Dump architecture description.
1894 function _M
.dumparch(out
)
1895 out
:write(format("DynASM %s version %s, released %s\n\n",
1896 _info
.arch
, _info
.version
, _info
.release
))
1901 -- Dump all user defined elements.
1902 function _M
.dumpdef(out
, lvl
)
1904 dumpglobals(out
, lvl
)
1905 dumpexterns(out
, lvl
)
1908 ------------------------------------------------------------------------------
1910 -- Pass callbacks from/to the DynASM core.
1911 function _M
.passcb(wl
, we
, wf
, ww
)
1912 wline
, werror
, wfatal
, wwarn
= wl
, we
, wf
, ww
1916 -- Setup the arch-specific module.
1917 function _M
.setup(arch
, opt
)
1918 g_arch
, g_opt
= arch
, opt
1921 -- Merge the core maps and the arch-specific maps.
1922 function _M
.mergemaps(map_coreop
, map_def
)
1923 setmetatable(map_op
, { __index
= map_coreop
})
1924 setmetatable(map_def
, { __index
= map_archdef
})
1925 return map_op
, map_def
1930 ------------------------------------------------------------------------------