1 ------------------------------------------------------------------------------
2 -- DynASM x86/x64 module.
4 -- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5 -- See dynasm.lua for full copyright notice.
6 ------------------------------------------------------------------------------
10 -- Module information:
12 arch
= x64
and "x64" or "x86",
13 description
= "DynASM x86/x64 module",
16 release
= "2011-05-05",
21 -- Exported glue functions for the arch-specific module.
22 local _M
= { _info
= _info
}
24 -- Cache library functions.
25 local type, tonumber, pairs
, ipairs
= type, tonumber, pairs
, ipairs
26 local assert, unpack
, setmetatable
= assert, unpack
or table.unpack
, setmetatable
28 local sub
, format, byte
, char
= _s
.sub
, _s
.format, _s
.byte
, _s
.char
29 local find
, match
, gmatch
, gsub = _s
.find
, _s
.match
, _s
.gmatch
, _s
.gsub
30 local concat
, sort = table.concat
, table.sort
31 local bit
= bit
or require("bit")
32 local band
, shl
, shr
= bit
.band
, bit
.lshift
, bit
.rshift
34 -- Inherited tables and callbacks.
36 local wline
, werror
, wfatal
, wwarn
39 -- CHECK: Keep this in sync with the C code!
40 local action_names
= {
41 -- int arg, 1 buffer pos:
42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
44 "VREG", "SPACE", -- !x64: VREG support NYI.
45 -- ptrdiff_t arg, 1 buffer pos (address): !x64
47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
49 -- action arg (1 byte) or int arg, 1 buffer pos (link):
51 -- action arg (1 byte) or int arg, 1 buffer pos (offset):
52 "LABEL_LG", "LABEL_PC",
53 -- action arg (1 byte), 1 buffer pos (offset):
55 -- action args (2 bytes), no buffer pos.
57 -- action arg (1 byte), no buffer pos.
59 -- no action arg, no buffer pos.
61 -- action arg (1 byte), no buffer pos, terminal action:
63 -- no args, no buffer pos, terminal action:
67 -- Maximum number of section buffer positions for dasm_put().
68 -- CHECK: Keep this in sync with the C code!
69 local maxsecpos
= 25 -- Keep this low, to avoid excessively long C lines.
71 -- Action name -> action number (dynamically generated below).
73 -- First action number. Everything below does not need to be escaped.
74 local actfirst
= 256-#action_names
76 -- Action list buffer and string (only used to remove dupes).
80 -- Argument list for next dasm_put(). Start with offset 0 into action list.
83 -- Current number of section buffer positions for dasm_put().
86 ------------------------------------------------------------------------------
88 -- Compute action numbers for action names.
89 for n
,name
in ipairs(action_names
) do
90 local num
= actfirst
+ n
- 1
91 map_action
[name
] = num
94 -- Dump action names and numbers.
95 local function dumpactions(out
)
96 out
:write("DynASM encoding engine action codes:\n")
97 for n
,name
in ipairs(action_names
) do
98 local num
= map_action
[name
]
99 out
:write(format(" %-10s %02X %d\n", name
, num
, num
))
104 -- Write action list buffer as a huge static C array.
105 local function writeactions(out
, name
)
107 local last
= actlist
[nn
] or 255
108 actlist
[nn
] = nil -- Remove last byte.
109 if nn
== 0 then nn
= 1 end
110 out
:write("static const unsigned char ", name
, "[", nn
, "] = {\n")
112 for n
,b
in ipairs(actlist
) do
115 assert(out
:write(s
, "\n"))
119 out
:write(s
, last
, "\n};\n\n") -- Add last byte back.
122 ------------------------------------------------------------------------------
124 -- Add byte to action list.
125 local function wputxb(n
)
126 assert(n
>= 0 and n
<= 255 and n
% 1 == 0, "byte out of range")
127 actlist
[#actlist
+1] = n
130 -- Add action to list with optional arg. Advance buffer pos, too.
131 local function waction(action
, a
, num
)
132 wputxb(assert(map_action
[action
], "bad action name `"..action
.."'"))
133 if a
then actargs
[#actargs
+1] = a
end
134 if a
or num
then secpos
= secpos
+ (num
or 1) end
137 -- Add call to embedded DynASM C code.
138 local function wcall(func
, args
)
139 wline(format("dasm_%s(Dst, %s);", func
, concat(args
, ", ")), true)
142 -- Delete duplicate action list chunks. A tad slow, but so what.
143 local function dedupechunk(offset
)
144 local al
, as
= actlist
, actstr
145 local chunk
= char(unpack(al
, offset
+1, #al
))
146 local orig
= find(as
, chunk
, 1, true)
148 actargs
[1] = orig
-1 -- Replace with original offset.
149 for i
=offset
+1,#al
do al
[i
] = nil end -- Kill dupe.
155 -- Flush action list (intervening C code or buffer pos overflow).
156 local function wflush(term
)
157 local offset
= actargs
[1]
158 if #actlist
== offset
then return end -- Nothing to flush.
159 if not term
then waction("STOP") end -- Terminate action list.
161 wcall("put", actargs
) -- Add call to dasm_put().
162 actargs
= { #actlist
} -- Actionlist offset is 1st arg to next dasm_put().
163 secpos
= 1 -- The actionlist offset occupies a buffer position, too.
167 local function wputb(n
)
168 if n
>= actfirst
then waction("ESC") end -- Need to escape byte.
172 ------------------------------------------------------------------------------
174 -- Global label name -> global label number. With auto assignment on 1st use.
175 local next_global
= 10
176 local map_global
= setmetatable({}, { __index
= function(t
, name
)
177 if not match(name
, "^[%a_][%w_@]*$") then werror("bad global label") end
178 local n
= next_global
179 if n
> 246 then werror("too many global labels") end
185 -- Dump global labels.
186 local function dumpglobals(out
, lvl
)
188 for name
, n
in pairs(map_global
) do t
[n
] = name
end
189 out
:write("Global labels:\n")
190 for i
=10,next_global
-1 do
191 out
:write(format(" %s\n", t
[i
]))
196 -- Write global label enum.
197 local function writeglobals(out
, prefix
)
199 for name
, n
in pairs(map_global
) do t
[n
] = name
end
200 out
:write("enum {\n")
201 for i
=10,next_global
-1 do
202 out
:write(" ", prefix
, gsub(t
[i
], "@.*", ""), ",\n")
204 out
:write(" ", prefix
, "_MAX\n};\n")
207 -- Write global label names.
208 local function writeglobalnames(out
, name
)
210 for name
, n
in pairs(map_global
) do t
[n
] = name
end
211 out
:write("static const char *const ", name
, "[] = {\n")
212 for i
=10,next_global
-1 do
213 out
:write(" \"", t
[i
], "\",\n")
215 out
:write(" (const char *)0\n};\n")
218 ------------------------------------------------------------------------------
220 -- Extern label name -> extern label number. With auto assignment on 1st use.
221 local next_extern
= -1
222 local map_extern
= setmetatable({}, { __index
= function(t
, name
)
223 -- No restrictions on the name for now.
224 local n
= next_extern
225 if n
< -256 then werror("too many extern labels") end
231 -- Dump extern labels.
232 local function dumpexterns(out
, lvl
)
234 for name
, n
in pairs(map_extern
) do t
[-n
] = name
end
235 out
:write("Extern labels:\n")
236 for i
=1,-next_extern
-1 do
237 out
:write(format(" %s\n", t
[i
]))
242 -- Write extern label names.
243 local function writeexternnames(out
, name
)
245 for name
, n
in pairs(map_extern
) do t
[-n
] = name
end
246 out
:write("static const char *const ", name
, "[] = {\n")
247 for i
=1,-next_extern
-1 do
248 out
:write(" \"", t
[i
], "\",\n")
250 out
:write(" (const char *)0\n};\n")
253 ------------------------------------------------------------------------------
255 -- Arch-specific maps.
256 local map_archdef
= {} -- Ext. register name -> int. name.
257 local map_reg_rev
= {} -- Int. register name -> ext. name.
258 local map_reg_num
= {} -- Int. register name -> register number.
259 local map_reg_opsize
= {} -- Int. register name -> operand size.
260 local map_reg_valid_base
= {} -- Int. register name -> valid base register?
261 local map_reg_valid_index
= {} -- Int. register name -> valid index register?
262 local map_reg_needrex
= {} -- Int. register name -> need rex vs. no rex.
263 local reg_list
= {} -- Canonical list of int. register names.
265 local map_type
= {} -- Type name -> { ctype, reg }
266 local ctypenum
= 0 -- Type number (for _PTx macros).
268 local addrsize
= x64
and "q" or "d" -- Size for address operands.
270 -- Helper functions to fill register maps.
271 local function mkrmap(sz
, cl
, names
)
272 local cname
= format("@%s", sz
)
273 reg_list
[#reg_list
+1] = cname
274 map_archdef
[cl
] = cname
275 map_reg_rev
[cname
] = cl
276 map_reg_num
[cname
] = -1
277 map_reg_opsize
[cname
] = sz
278 if sz
== addrsize
or sz
== "d" then
279 map_reg_valid_base
[cname
] = true
280 map_reg_valid_index
[cname
] = true
283 for n
,name
in ipairs(names
) do
284 local iname
= format("@%s%x", sz
, n
-1)
285 reg_list
[#reg_list
+1] = iname
286 map_archdef
[name
] = iname
287 map_reg_rev
[iname
] = name
288 map_reg_num
[iname
] = n
-1
289 map_reg_opsize
[iname
] = sz
290 if sz
== "b" and n
> 4 then map_reg_needrex
[iname
] = false end
291 if sz
== addrsize
or sz
== "d" then
292 map_reg_valid_base
[iname
] = true
293 map_reg_valid_index
[iname
] = true
297 for i
=0,(x64
and sz
~= "f") and 15 or 7 do
298 local needrex
= sz
== "b" and i
> 3
299 local iname
= format("@%s%x%s", sz
, i
, needrex
and "R" or "")
300 if needrex
then map_reg_needrex
[iname
] = true end
302 if sz
== "o" then name
= format("xmm%d", i
)
303 elseif sz
== "f" then name
= format("st%d", i
)
304 else name
= format("r%d%s", i
, sz
== addrsize
and "" or sz
) end
305 map_archdef
[name
] = iname
306 if not map_reg_rev
[iname
] then
307 reg_list
[#reg_list
+1] = iname
308 map_reg_rev
[iname
] = name
309 map_reg_num
[iname
] = i
310 map_reg_opsize
[iname
] = sz
311 if sz
== addrsize
or sz
== "d" then
312 map_reg_valid_base
[iname
] = true
313 map_reg_valid_index
[iname
] = true
317 reg_list
[#reg_list
+1] = ""
320 -- Integer registers (qword, dword, word and byte sized).
322 mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"})
324 mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
325 mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
326 mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
327 map_reg_valid_index
[map_archdef
.esp
] = false
328 if x64
then map_reg_valid_index
[map_archdef
.rsp
] = false end
329 map_archdef
["Ra"] = "@"..addrsize
331 -- FP registers (internally tword sized, but use "f" as operand size).
334 -- SSE registers (oword sized, but qword and dword accessible).
337 -- Operand size prefixes to codes.
339 byte
= "b", word
= "w", dword
= "d", qword
= "q", oword
= "o", tword
= "t",
343 -- Operand size code to number.
344 local map_opsizenum
= {
345 b
= 1, w
= 2, d
= 4, q
= 8, o
= 16, t
= 10,
348 -- Operand size code to name.
349 local map_opsizename
= {
350 b
= "byte", w
= "word", d
= "dword", q
= "qword", o
= "oword", t
= "tword",
354 -- Valid index register scale factors.
356 ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
361 o
= 0, no
= 1, b
= 2, nb
= 3, e
= 4, ne
= 5, be
= 6, nbe
= 7,
362 s
= 8, ns
= 9, p
= 10, np
= 11, l
= 12, nl
= 13, le
= 14, nle
= 15,
363 c
= 2, nae
= 2, nc
= 3, ae
= 3, z
= 4, nz
= 5, na
= 6, a
= 7,
364 pe
= 10, po
= 11, nge
= 12, ge
= 13, ng
= 14, g
= 15,
368 -- Reverse defines for registers.
369 function _M
.revdef(s
)
370 return gsub(s
, "@%w+", map_reg_rev
)
373 -- Dump register names and numbers
374 local function dumpregs(out
)
375 out
:write("Register names, sizes and internal numbers:\n")
376 for _
,reg
in ipairs(reg_list
) do
380 local name
= map_reg_rev
[reg
]
381 local num
= map_reg_num
[reg
]
382 local opsize
= map_opsizename
[map_reg_opsize
[reg]]
383 out
:write(format(" %-5s %-8s %s\n", name
, opsize
,
384 num
< 0 and "(variable)" or num
))
389 ------------------------------------------------------------------------------
391 -- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
392 local function wputlabel(aprefix
, imm
, num
)
393 if type(imm
) == "number" then
396 wputxb(aprefix
== "IMM_" and 0 or 1)
399 waction(aprefix
.."LG", nil, num
);
403 waction(aprefix
.."PC", imm
, num
)
407 -- Put signed byte or arg.
408 local function wputsbarg(n
)
409 if type(n
) == "number" then
410 if n
< -128 or n
> 127 then
411 werror("signed immediate byte out of range")
413 if n
< 0 then n
= n
+ 256 end
415 else waction("IMM_S", n
) end
418 -- Put unsigned byte or arg.
419 local function wputbarg(n
)
420 if type(n
) == "number" then
421 if n
< 0 or n
> 255 then
422 werror("unsigned immediate byte out of range")
425 else waction("IMM_B", n
) end
428 -- Put unsigned word or arg.
429 local function wputwarg(n
)
430 if type(n
) == "number" then
431 if shr(n
, 16) ~= 0 then
432 werror("unsigned immediate word out of range")
434 wputb(band(n
, 255)); wputb(shr(n
, 8));
435 else waction("IMM_W", n
) end
438 -- Put signed or unsigned dword or arg.
439 local function wputdarg(n
)
441 if tn
== "number" then
443 wputb(band(shr(n
, 8), 255))
444 wputb(band(shr(n
, 16), 255))
446 elseif tn
== "table" then
447 wputlabel("IMM_", n
[1], 1)
453 -- Put operand-size dependent number or arg (defaults to dword).
454 local function wputszarg(sz
, n
)
455 if not sz
or sz
== "d" or sz
== "q" then wputdarg(n
)
456 elseif sz
== "w" then wputwarg(n
)
457 elseif sz
== "b" then wputbarg(n
)
458 elseif sz
== "s" then wputsbarg(n
)
459 else werror("bad operand size") end
462 -- Put multi-byte opcode with operand-size dependent modifications.
463 local function wputop(sz
, op
, rex
)
465 if rex
~= 0 and not x64
then werror("bad operand size") end
466 if sz
== "w" then wputb(102) end
467 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
468 if op
>= 4294967296 then r
= op
%4294967296 wputb((op
-r
)/4294967296) op
= r
end
469 if op
>= 16777216 then wputb(shr(op
, 24)); op
= band(op
, 0xffffff) end
472 local opc3
= band(op
, 0xffff00)
473 if opc3
== 0x0f3a00 or opc3
== 0x0f3800 then
474 wputb(64 + band(rex
, 15)); rex
= 0
477 wputb(shr(op
, 16)); op
= band(op
, 0xffff)
481 if b
== 15 and rex
~= 0 then wputb(64 + band(rex
, 15)); rex
= 0 end
485 if rex
~= 0 then wputb(64 + band(rex
, 15)) end
486 if sz
== "b" then op
= op
- 1 end
490 -- Put ModRM or SIB formatted byte.
491 local function wputmodrm(m
, s
, rm
, vs
, vrm
)
492 assert(m
< 4 and s
< 16 and rm
< 16, "bad modrm operands")
493 wputb(shl(m
, 6) + shl(band(s
, 7), 3) + band(rm
, 7))
496 -- Put ModRM/SIB plus optional displacement.
497 local function wputmrmsib(t
, imark
, s
, vsreg
)
499 local reg
, xreg
= t
.reg
, t
.xreg
500 if reg
and reg
< 0 then reg
= 0; vreg
= t
.vreg
end
501 if xreg
and xreg
< 0 then xreg
= 0; vxreg
= t
.vxreg
end
502 if s
< 0 then s
= 0 end
505 if sub(t
.mode
, 1, 1) == "r" then
507 if vsreg
then waction("VREG", vsreg
); wputxb(2) end
508 if vreg
then waction("VREG", vreg
); wputxb(0) end
513 local tdisp
= type(disp
)
518 -- Indexed mode with index register only.
519 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
521 if imark
== "I" then waction("MARK") end
522 if vsreg
then waction("VREG", vsreg
); wputxb(2) end
523 wputmodrm(t
.xsc
, xreg
, 5)
524 if vxreg
then waction("VREG", vxreg
); wputxb(3) end
526 -- Pure 32 bit displacement.
527 if x64
and tdisp
~= "table" then
528 wputmodrm(0, s
, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
529 if imark
== "I" then waction("MARK") end
533 wputmodrm(0, s
, 5) -- [disp|rip-label] -> (0, s, ebp)
534 if imark
== "I" then waction("MARK") end
536 if vsreg
then waction("VREG", vsreg
); wputxb(2) end
538 if riprel
then -- Emit rip-relative displacement.
539 if match("UWSiI", imark
) then
540 werror("NYI: rip-relative displacement followed by immediate")
542 -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
543 wputlabel("REL_", disp
[1], 2)
551 if tdisp
== "number" then -- Check displacement size at assembly time.
552 if disp
== 0 and band(reg
, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
553 if not vreg
then m
= 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
554 elseif disp
>= -128 and disp
<= 127 then m
= 1
556 elseif tdisp
== "table" then
560 -- Index register present or esp as base register: need SIB encoding.
561 if xreg
or band(reg
, 7) == 4 then
562 wputmodrm(m
or 2, s
, 4) -- ModRM.
563 if m
== nil or imark
== "I" then waction("MARK") end
564 if vsreg
then waction("VREG", vsreg
); wputxb(2) end
565 wputmodrm(t
.xsc
or 0, xreg
or 4, reg
) -- SIB.
566 if vxreg
then waction("VREG", vxreg
); wputxb(3) end
567 if vreg
then waction("VREG", vreg
); wputxb(1) end
569 wputmodrm(m
or 2, s
, reg
) -- ModRM.
570 if (imark
== "I" and (m
== 1 or m
== 2)) or
571 (m
== nil and (vsreg
or vreg
)) then waction("MARK") end
572 if vsreg
then waction("VREG", vsreg
); wputxb(2) end
573 if vreg
then waction("VREG", vreg
); wputxb(1) end
577 if m
== 1 then wputsbarg(disp
)
578 elseif m
== 2 then wputdarg(disp
)
579 elseif m
== nil then waction("DISP", disp
) end
582 ------------------------------------------------------------------------------
584 -- Return human-readable operand mode string.
585 local function opmodestr(op
, args
)
589 m
[#m
+1] = sub(a
.mode
, 1, 1)..(a
.opsize
or "?")
591 return op
.." "..concat(m
, ",")
594 -- Convert number to valid integer or nil.
595 local function toint(expr
)
596 local n
= tonumber(expr
)
598 if n
% 1 ~= 0 or n
< -2147483648 or n
> 4294967295 then
599 werror("bad integer number `"..expr
.."'")
605 -- Parse immediate expression.
606 local function immexpr(expr
)
608 if sub(expr
, 1, 1) == "&" then
609 return "iPJ", format("(ptrdiff_t)(%s)", sub(expr
,2))
612 local prefix
= sub(expr
, 1, 2)
613 -- =>expr (pc label reference)
614 if prefix
== "=>" then
615 return "iJ", sub(expr
, 3)
617 -- ->name (global label reference)
618 if prefix
== "->" then
619 return "iJ", map_global
[sub(expr
, 3)]
622 -- [<>][1-9] (local label reference)
623 local dir
, lnum
= match(expr
, "^([<>])([1-9])$")
624 if dir
then -- Fwd: 247-255, Bkwd: 1-9.
625 return "iJ", lnum
+ (dir
== ">" and 246 or 0)
628 local extname
= match(expr
, "^extern%s+(%S+)$")
630 return "iJ", map_extern
[extname
]
633 -- expr (interpreted as immediate)
637 -- Parse displacement expression: +-num, +-expr, +-opsize*num
638 local function dispexpr(expr
)
639 local disp
= expr
== "" and 0 or toint(expr
)
640 if disp
then return disp
end
641 local c
, dispt
= match(expr
, "^([+-])%s*(.+)$")
645 werror("bad displacement expression `"..expr
.."'")
647 local opsize
, tailops
= match(dispt
, "^(%w+)%s*%*%s*(.+)$")
648 local ops
, imm
= map_opsize
[opsize
], toint(tailops
)
650 if c
== "-" then imm
= -imm
end
651 return imm
*map_opsizenum
[ops
]
653 local mode
, iexpr
= immexpr(dispt
)
655 if c
== "-" then werror("cannot invert label reference") end
658 return expr
-- Need to return original signed expression.
661 -- Parse register or type expression.
662 local function rtexpr(expr
)
663 if not expr
then return end
664 local tname
, ovreg
= match(expr
, "^([%w_]+):(@[%w_]+)$")
665 local tp
= map_type
[tname
or expr
]
667 local reg
= ovreg
or tp
.reg
668 local rnum
= map_reg_num
[reg
]
670 werror("type `"..(tname
or expr
).."' needs a register override")
672 if not map_reg_valid_base
[reg
] then
673 werror("bad base register override `"..(map_reg_rev
[reg
] or reg
).."'")
677 return expr
, map_reg_num
[expr
]
680 -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
681 local function parseoperand(param
)
685 local opsize
, tailops
= match(param
, "^(%w+)%s*(.+)$")
687 t
.opsize
= map_opsize
[opsize
]
688 if t
.opsize
then expr
= tailops
end
691 local br
= match(expr
, "^%[%s*(.-)%s*%]$")
699 t
.mode
= x64
and "xm" or "xmO"
705 local reg
, tailr
= match(br
, "^([@%w_:]+)%s*(.*)$")
706 reg
, t
.reg
, tp
= rtexpr(reg
)
709 t
.mode
= x64
and "xm" or "xmO"
710 t
.disp
= dispexpr("+"..br
)
715 t
.vreg
, tailr
= match(tailr
, "^(%b())(.*)$")
716 if not t
.vreg
then werror("bad variable register expression") end
719 -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
720 local xsc
, tailsc
= match(tailr
, "^%*%s*([1248])%s*(.*)$")
722 if not map_reg_valid_index
[reg
] then
723 werror("bad index register `"..map_reg_rev
[reg
].."'")
730 t
.disp
= dispexpr(tailsc
)
733 if not map_reg_valid_base
[reg
] then
734 werror("bad base register `"..map_reg_rev
[reg
].."'")
737 -- [reg] or [reg+-disp]
738 t
.disp
= toint(tailr
) or (tailr
== "" and 0)
739 if t
.disp
then break end
742 local xreg
, tailx
= match(tailr
, "^+%s*([@%w_:]+)%s*(.*)$")
743 xreg
, t
.xreg
, tp
= rtexpr(xreg
)
746 t
.disp
= dispexpr(tailr
)
749 if not map_reg_valid_index
[xreg
] then
750 werror("bad index register `"..map_reg_rev
[xreg
].."'")
754 t
.vxreg
, tailx
= match(tailx
, "^(%b())(.*)$")
755 if not t
.vxreg
then werror("bad variable register expression") end
759 local xsc
, tailsc
= match(tailx
, "^%*%s*([1248])%s*(.*)$")
765 -- [...] or [...+-disp] or [...+-expr]
766 t
.disp
= dispexpr(tailx
)
769 local imm
= toint(expr
)
770 if not imm
and sub(expr
, 1, 1) == "*" and t
.opsize
then
771 imm
= toint(sub(expr
, 2))
773 imm
= imm
* map_opsizenum
[t
.opsize
]
778 if t
.opsize
then werror("bad operand size override") end
780 if imm
== 1 then m
= m
.."1" end
781 if imm
>= 4294967168 and imm
<= 4294967295 then imm
= imm
-4294967296 end
782 if imm
>= -128 and imm
<= 127 then m
= m
.."S" end
789 local reg
, tailr
= match(expr
, "^([@%w_:]+)%s*(.*)$")
790 reg
, t
.reg
, tp
= rtexpr(reg
)
793 t
.vreg
, tailr
= match(tailr
, "^(%b())(.*)$")
794 if not t
.vreg
then werror("bad variable register expression") end
798 if t
.opsize
then werror("bad operand size override") end
799 t
.opsize
= map_reg_opsize
[reg
]
800 if t
.opsize
== "f" then
801 t
.mode
= t
.reg
== 0 and "fF" or "f"
803 if reg
== "@w4" or (x64
and reg
== "@d4") then
804 wwarn("bad idea, try again with `"..(x64
and "rsp'" or "esp'"))
806 t
.mode
= t
.reg
== 0 and "rmR" or (reg
== "@b1" and "rmC" or "rm")
808 t
.needrex
= map_reg_needrex
[reg
]
812 -- type[idx], type[idx].field, type->field -> [reg+offset_expr]
813 if not tp
then werror("bad operand `"..param
.."'") end
815 t
.disp
= format(tp
.ctypefmt
, tailr
)
817 t
.mode
, t
.imm
= immexpr(expr
)
818 if sub(t
.mode
, -1) == "J" then
819 if t
.opsize
and t
.opsize
~= addrsize
then
820 werror("bad operand size override")
830 ------------------------------------------------------------------------------
831 -- x86 Template String Description
832 -- ===============================
834 -- Each template string is a list of [match:]pattern pairs,
835 -- separated by "|". The first match wins. No match means a
836 -- bad or unsupported combination of operand modes or sizes.
838 -- The match part and the ":" is omitted if the operation has
839 -- no operands. Otherwise the first N characters are matched
840 -- against the mode strings of each of the N operands.
842 -- The mode string for each operand type is (see parseoperand()):
843 -- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
844 -- FP register: "f", +"F" for st0
845 -- Index operand: "xm", +"O" for [disp] (pure offset)
846 -- Immediate: "i", +"S" for signed 8 bit, +"1" for 1,
847 -- +"I" for arg, +"P" for pointer
848 -- Any: +"J" for valid jump targets
850 -- So a match character "m" (mixed) matches both an integer register
851 -- and an index operand (to be encoded with the ModRM/SIB scheme).
852 -- But "r" matches only a register and "x" only an index operand
853 -- (e.g. for FP memory access operations).
855 -- The operand size match string starts right after the mode match
856 -- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
857 -- The effective data size of the operation is matched against this list.
859 -- If only the regular "b", "w", "d", "q", "t" operand sizes are
860 -- present, then all operands must be the same size. Unspecified sizes
861 -- are ignored, but at least one operand must have a size or the pattern
862 -- won't match (use the "byte", "word", "dword", "qword", "tword"
863 -- operand size overrides. E.g.: mov dword [eax], 1).
865 -- If the list has a "1" or "2" prefix, the operand size is taken
866 -- from the respective operand and any other operand sizes are ignored.
867 -- If the list contains only ".", all operand sizes are ignored.
868 -- If the list has a "/" prefix, the concatenated (mixed) operand sizes
869 -- are compared to the match.
871 -- E.g. "rrdw" matches for either two dword registers or two word
872 -- registers. "Fx2dq" matches an st0 operand plus an index operand
873 -- pointing to a dword (float) or qword (double).
875 -- Every character after the ":" is part of the pattern string:
876 -- Hex chars are accumulated to form the opcode (left to right).
877 -- "n" disables the standard opcode mods
878 -- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
880 -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode.
881 -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
882 -- The spare 3 bits are either filled with the last hex digit or
883 -- the result from a previous "r"/"R". The opcode is restored.
885 -- All of the following characters force a flush of the opcode:
886 -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
887 -- "S" stores a signed 8 bit immediate from the last operand.
888 -- "U" stores an unsigned 8 bit immediate from the last operand.
889 -- "W" stores an unsigned 16 bit immediate from the last operand.
890 -- "i" stores an operand sized immediate from the last operand.
891 -- "I" dito, but generates an action code to optionally modify
892 -- the opcode (+2) for a signed 8 bit immediate.
893 -- "J" generates one of the REL action codes from the last operand.
895 ------------------------------------------------------------------------------
897 -- Template strings for x86 instructions. Ordered by first opcode byte.
898 -- Unimplemented opcodes (deliberate omissions) are marked with *.
905 -- 0F: two byte opcode prefix
924 inc_1
= x64
and "m:FF0m" or "rdw:40r|m:FF0m",
925 dec_1
= x64
and "m:FF1m" or "rdw:48r|m:FF1m",
926 push_1
= (x64
and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or
927 "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
928 pop_1
= x64
and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m",
929 -- 60: *pusha, *pushad, *pushaw
930 -- 61: *popa, *popad, *popaw
932 -- 63: x86: *arpl mw,rw
933 movsxd_2
= x64
and "rm/qd:63rM",
937 a16_0
= not x64
and "67" or nil,
938 a32_0
= x64
and "67",
940 -- 69: imul rdw,mdw,idw
942 -- 6B: imul rdw,mdw,S
946 -- 6F: *outsd, *outsw
952 test_2
= "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
960 lea_2
= "rx1dq:8DrM",
964 xchg_2
= "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
975 pushfd_0
= not x64
and "9C",
976 pushfq_0
= x64
and "9C",
978 popfd_0
= not x64
and "9D",
979 popfq_0
= x64
and "9D",
982 mov_2
= "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
1026 -- D8-DF: floating point ops
1030 -- E3: *jcxz, *jecxz
1035 call_1
= x64
and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J",
1036 jmp_1
= x64
and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB
1052 -- F6: test... mb,i; div... mb
1053 -- F7: test... mdw,i; div... mdw
1070 imul_2
= "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
1071 imul_3
= "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",
1073 movzx_2
= "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
1074 movsx_2
= "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",
1076 bswap_1
= "rqd:0FC8r",
1077 bsf_2
= "rmqdw:0FBCrM",
1078 bsr_2
= "rmqdw:0FBDrM",
1079 bt_2
= "mrqdw:0FA3Rm|miqdw:0FBA4mU",
1080 btc_2
= "mrqdw:0FBBRm|miqdw:0FBA7mU",
1081 btr_2
= "mrqdw:0FB3Rm|miqdw:0FBA6mU",
1082 bts_2
= "mrqdw:0FABRm|miqdw:0FBA5mU",
1084 shld_3
= "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
1085 shrd_3
= "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
1087 rdtsc_0
= "0F31", -- P1+
1088 rdpmc_0
= "0F33", -- P6+
1089 cpuid_0
= "0FA2", -- P1+
1091 -- floating point ops
1092 fst_1
= "ff:DDD0r|xd:D92m|xq:nDD2m",
1093 fstp_1
= "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
1094 fld_1
= "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
1096 fpop_0
= "DDD8", -- Alias for fstp st0.
1098 fist_1
= "xw:nDF2m|xd:DB2m",
1099 fistp_1
= "xw:nDF3m|xd:DB3m|xq:nDF7m",
1100 fild_1
= "xw:nDF0m|xd:DB0m|xq:nDF5m",
1103 fxch_1
= "ff:D9C8r",
1104 fxch_2
= "fFf:D9C8r|Fff:D9C8R",
1106 fucom_1
= "ff:DDE0r",
1107 fucom_2
= "Fff:DDE0R",
1108 fucomp_1
= "ff:DDE8r",
1109 fucomp_2
= "Fff:DDE8R",
1110 fucomi_1
= "ff:DBE8r", -- P6+
1111 fucomi_2
= "Fff:DBE8R", -- P6+
1112 fucomip_1
= "ff:DFE8r", -- P6+
1113 fucomip_2
= "Fff:DFE8R", -- P6+
1114 fcomi_1
= "ff:DBF0r", -- P6+
1115 fcomi_2
= "Fff:DBF0R", -- P6+
1116 fcomip_1
= "ff:DFF0r", -- P6+
1117 fcomip_2
= "Fff:DFF0R", -- P6+
1121 fldenv_1
= "x.:D94m",
1122 fnstenv_1
= "x.:D96m",
1123 fstenv_1
= "x.:9BD96m",
1124 fldcw_1
= "xw:nD95m",
1125 fstcw_1
= "xw:n9BD97m",
1126 fnstcw_1
= "xw:nD97m",
1127 fstsw_1
= "Rw:n9BDFE0|xw:n9BDD7m",
1128 fnstsw_1
= "Rw:nDFE0|xw:nDD7m",
1133 -- D9D1-D9DF: unassigned
1170 andnpd_2
= "rmo:660F55rM",
1171 andnps_2
= "rmo:0F55rM",
1172 andpd_2
= "rmo:660F54rM",
1173 andps_2
= "rmo:0F54rM",
1174 clflush_1
= "x.:0FAE7m",
1175 cmppd_3
= "rmio:660FC2rMU",
1176 cmpps_3
= "rmio:0FC2rMU",
1177 cmpsd_3
= "rrio:F20FC2rMU|rxi/oq:",
1178 cmpss_3
= "rrio:F30FC2rMU|rxi/od:",
1179 comisd_2
= "rro:660F2FrM|rx/oq:",
1180 comiss_2
= "rro:0F2FrM|rx/od:",
1181 cvtdq2pd_2
= "rro:F30FE6rM|rx/oq:",
1182 cvtdq2ps_2
= "rmo:0F5BrM",
1183 cvtpd2dq_2
= "rmo:F20FE6rM",
1184 cvtpd2ps_2
= "rmo:660F5ArM",
1185 cvtpi2pd_2
= "rx/oq:660F2ArM",
1186 cvtpi2ps_2
= "rx/oq:0F2ArM",
1187 cvtps2dq_2
= "rmo:660F5BrM",
1188 cvtps2pd_2
= "rro:0F5ArM|rx/oq:",
1189 cvtsd2si_2
= "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
1190 cvtsd2ss_2
= "rro:F20F5ArM|rx/oq:",
1191 cvtsi2sd_2
= "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
1192 cvtsi2ss_2
= "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
1193 cvtss2sd_2
= "rro:F30F5ArM|rx/od:",
1194 cvtss2si_2
= "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
1195 cvttpd2dq_2
= "rmo:660FE6rM",
1196 cvttps2dq_2
= "rmo:F30F5BrM",
1197 cvttsd2si_2
= "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
1198 cvttss2si_2
= "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
1199 fxsave_1
= "x.:0FAE0m",
1200 fxrstor_1
= "x.:0FAE1m",
1201 ldmxcsr_1
= "xd:0FAE2m",
1202 lfence_0
= "0FAEE8",
1203 maskmovdqu_2
= "rro:660FF7rM",
1204 mfence_0
= "0FAEF0",
1205 movapd_2
= "rmo:660F28rM|mro:660F29Rm",
1206 movaps_2
= "rmo:0F28rM|mro:0F29Rm",
1207 movd_2
= "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:",
1208 movdqa_2
= "rmo:660F6FrM|mro:660F7FRm",
1209 movdqu_2
= "rmo:F30F6FrM|mro:F30F7FRm",
1210 movhlps_2
= "rro:0F12rM",
1211 movhpd_2
= "rx/oq:660F16rM|xr/qo:n660F17Rm",
1212 movhps_2
= "rx/oq:0F16rM|xr/qo:n0F17Rm",
1213 movlhps_2
= "rro:0F16rM",
1214 movlpd_2
= "rx/oq:660F12rM|xr/qo:n660F13Rm",
1215 movlps_2
= "rx/oq:0F12rM|xr/qo:n0F13Rm",
1216 movmskpd_2
= "rr/do:660F50rM",
1217 movmskps_2
= "rr/do:0F50rM",
1218 movntdq_2
= "xro:660FE7Rm",
1219 movnti_2
= "xrqd:0FC3Rm",
1220 movntpd_2
= "xro:660F2BRm",
1221 movntps_2
= "xro:0F2BRm",
1222 movq_2
= "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
1223 movsd_2
= "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
1224 movss_2
= "rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
1225 movupd_2
= "rmo:660F10rM|mro:660F11Rm",
1226 movups_2
= "rmo:0F10rM|mro:0F11Rm",
1227 orpd_2
= "rmo:660F56rM",
1228 orps_2
= "rmo:0F56rM",
1229 packssdw_2
= "rmo:660F6BrM",
1230 packsswb_2
= "rmo:660F63rM",
1231 packuswb_2
= "rmo:660F67rM",
1232 paddb_2
= "rmo:660FFCrM",
1233 paddd_2
= "rmo:660FFErM",
1234 paddq_2
= "rmo:660FD4rM",
1235 paddsb_2
= "rmo:660FECrM",
1236 paddsw_2
= "rmo:660FEDrM",
1237 paddusb_2
= "rmo:660FDCrM",
1238 paddusw_2
= "rmo:660FDDrM",
1239 paddw_2
= "rmo:660FFDrM",
1240 pand_2
= "rmo:660FDBrM",
1241 pandn_2
= "rmo:660FDFrM",
1243 pavgb_2
= "rmo:660FE0rM",
1244 pavgw_2
= "rmo:660FE3rM",
1245 pcmpeqb_2
= "rmo:660F74rM",
1246 pcmpeqd_2
= "rmo:660F76rM",
1247 pcmpeqw_2
= "rmo:660F75rM",
1248 pcmpgtb_2
= "rmo:660F64rM",
1249 pcmpgtd_2
= "rmo:660F66rM",
1250 pcmpgtw_2
= "rmo:660F65rM",
1251 pextrw_3
= "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
1252 pinsrw_3
= "rri/od:660FC4rMU|rxi/ow:",
1253 pmaddwd_2
= "rmo:660FF5rM",
1254 pmaxsw_2
= "rmo:660FEErM",
1255 pmaxub_2
= "rmo:660FDErM",
1256 pminsw_2
= "rmo:660FEArM",
1257 pminub_2
= "rmo:660FDArM",
1258 pmovmskb_2
= "rr/do:660FD7rM",
1259 pmulhuw_2
= "rmo:660FE4rM",
1260 pmulhw_2
= "rmo:660FE5rM",
1261 pmullw_2
= "rmo:660FD5rM",
1262 pmuludq_2
= "rmo:660FF4rM",
1263 por_2
= "rmo:660FEBrM",
1264 prefetchnta_1
= "xb:n0F180m",
1265 prefetcht0_1
= "xb:n0F181m",
1266 prefetcht1_1
= "xb:n0F182m",
1267 prefetcht2_1
= "xb:n0F183m",
1268 psadbw_2
= "rmo:660FF6rM",
1269 pshufd_3
= "rmio:660F70rMU",
1270 pshufhw_3
= "rmio:F30F70rMU",
1271 pshuflw_3
= "rmio:F20F70rMU",
1272 pslld_2
= "rmo:660FF2rM|rio:660F726mU",
1273 pslldq_2
= "rio:660F737mU",
1274 psllq_2
= "rmo:660FF3rM|rio:660F736mU",
1275 psllw_2
= "rmo:660FF1rM|rio:660F716mU",
1276 psrad_2
= "rmo:660FE2rM|rio:660F724mU",
1277 psraw_2
= "rmo:660FE1rM|rio:660F714mU",
1278 psrld_2
= "rmo:660FD2rM|rio:660F722mU",
1279 psrldq_2
= "rio:660F733mU",
1280 psrlq_2
= "rmo:660FD3rM|rio:660F732mU",
1281 psrlw_2
= "rmo:660FD1rM|rio:660F712mU",
1282 psubb_2
= "rmo:660FF8rM",
1283 psubd_2
= "rmo:660FFArM",
1284 psubq_2
= "rmo:660FFBrM",
1285 psubsb_2
= "rmo:660FE8rM",
1286 psubsw_2
= "rmo:660FE9rM",
1287 psubusb_2
= "rmo:660FD8rM",
1288 psubusw_2
= "rmo:660FD9rM",
1289 psubw_2
= "rmo:660FF9rM",
1290 punpckhbw_2
= "rmo:660F68rM",
1291 punpckhdq_2
= "rmo:660F6ArM",
1292 punpckhqdq_2
= "rmo:660F6DrM",
1293 punpckhwd_2
= "rmo:660F69rM",
1294 punpcklbw_2
= "rmo:660F60rM",
1295 punpckldq_2
= "rmo:660F62rM",
1296 punpcklqdq_2
= "rmo:660F6CrM",
1297 punpcklwd_2
= "rmo:660F61rM",
1298 pxor_2
= "rmo:660FEFrM",
1299 rcpps_2
= "rmo:0F53rM",
1300 rcpss_2
= "rro:F30F53rM|rx/od:",
1301 rsqrtps_2
= "rmo:0F52rM",
1302 rsqrtss_2
= "rmo:F30F52rM",
1303 sfence_0
= "0FAEF8",
1304 shufpd_3
= "rmio:660FC6rMU",
1305 shufps_3
= "rmio:0FC6rMU",
1306 stmxcsr_1
= "xd:0FAE3m",
1307 ucomisd_2
= "rro:660F2ErM|rx/oq:",
1308 ucomiss_2
= "rro:0F2ErM|rx/od:",
1309 unpckhpd_2
= "rmo:660F15rM",
1310 unpckhps_2
= "rmo:0F15rM",
1311 unpcklpd_2
= "rmo:660F14rM",
1312 unpcklps_2
= "rmo:0F14rM",
1313 xorpd_2
= "rmo:660F57rM",
1314 xorps_2
= "rmo:0F57rM",
1317 fisttp_1
= "xw:nDF1m|xd:DB1m|xq:nDD1m",
1318 addsubpd_2
= "rmo:660FD0rM",
1319 addsubps_2
= "rmo:F20FD0rM",
1320 haddpd_2
= "rmo:660F7CrM",
1321 haddps_2
= "rmo:F20F7CrM",
1322 hsubpd_2
= "rmo:660F7DrM",
1323 hsubps_2
= "rmo:F20F7DrM",
1324 lddqu_2
= "rxo:F20FF0rM",
1325 movddup_2
= "rmo:F20F12rM",
1326 movshdup_2
= "rmo:F30F16rM",
1327 movsldup_2
= "rmo:F30F12rM",
1330 pabsb_2
= "rmo:660F381CrM",
1331 pabsd_2
= "rmo:660F381ErM",
1332 pabsw_2
= "rmo:660F381DrM",
1333 palignr_3
= "rmio:660F3A0FrMU",
1334 phaddd_2
= "rmo:660F3802rM",
1335 phaddsw_2
= "rmo:660F3803rM",
1336 phaddw_2
= "rmo:660F3801rM",
1337 phsubd_2
= "rmo:660F3806rM",
1338 phsubsw_2
= "rmo:660F3807rM",
1339 phsubw_2
= "rmo:660F3805rM",
1340 pmaddubsw_2
= "rmo:660F3804rM",
1341 pmulhrsw_2
= "rmo:660F380BrM",
1342 pshufb_2
= "rmo:660F3800rM",
1343 psignb_2
= "rmo:660F3808rM",
1344 psignd_2
= "rmo:660F380ArM",
1345 psignw_2
= "rmo:660F3809rM",
1348 blendpd_3
= "rmio:660F3A0DrMU",
1349 blendps_3
= "rmio:660F3A0CrMU",
1350 blendvpd_3
= "rmRo:660F3815rM",
1351 blendvps_3
= "rmRo:660F3814rM",
1352 dppd_3
= "rmio:660F3A41rMU",
1353 dpps_3
= "rmio:660F3A40rMU",
1354 extractps_3
= "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
1355 insertps_3
= "rrio:660F3A41rMU|rxi/od:",
1356 movntdqa_2
= "rxo:660F382ArM",
1357 mpsadbw_3
= "rmio:660F3A42rMU",
1358 packusdw_2
= "rmo:660F382BrM",
1359 pblendvb_3
= "rmRo:660F3810rM",
1360 pblendw_3
= "rmio:660F3A0ErMU",
1361 pcmpeqq_2
= "rmo:660F3829rM",
1362 pextrb_3
= "rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
1363 pextrd_3
= "mri/do:660F3A16RmU",
1364 pextrq_3
= "mri/qo:660F3A16RmU",
1365 -- pextrw is SSE2, mem operand is SSE4.1 only
1366 phminposuw_2
= "rmo:660F3841rM",
1367 pinsrb_3
= "rri/od:660F3A20nrMU|rxi/ob:",
1368 pinsrd_3
= "rmi/od:660F3A22rMU",
1369 pinsrq_3
= "rmi/oq:660F3A22rXMU",
1370 pmaxsb_2
= "rmo:660F383CrM",
1371 pmaxsd_2
= "rmo:660F383DrM",
1372 pmaxud_2
= "rmo:660F383FrM",
1373 pmaxuw_2
= "rmo:660F383ErM",
1374 pminsb_2
= "rmo:660F3838rM",
1375 pminsd_2
= "rmo:660F3839rM",
1376 pminud_2
= "rmo:660F383BrM",
1377 pminuw_2
= "rmo:660F383ArM",
1378 pmovsxbd_2
= "rro:660F3821rM|rx/od:",
1379 pmovsxbq_2
= "rro:660F3822rM|rx/ow:",
1380 pmovsxbw_2
= "rro:660F3820rM|rx/oq:",
1381 pmovsxdq_2
= "rro:660F3825rM|rx/oq:",
1382 pmovsxwd_2
= "rro:660F3823rM|rx/oq:",
1383 pmovsxwq_2
= "rro:660F3824rM|rx/od:",
1384 pmovzxbd_2
= "rro:660F3831rM|rx/od:",
1385 pmovzxbq_2
= "rro:660F3832rM|rx/ow:",
1386 pmovzxbw_2
= "rro:660F3830rM|rx/oq:",
1387 pmovzxdq_2
= "rro:660F3835rM|rx/oq:",
1388 pmovzxwd_2
= "rro:660F3833rM|rx/oq:",
1389 pmovzxwq_2
= "rro:660F3834rM|rx/od:",
1390 pmuldq_2
= "rmo:660F3828rM",
1391 pmulld_2
= "rmo:660F3840rM",
1392 ptest_2
= "rmo:660F3817rM",
1393 roundpd_3
= "rmio:660F3A09rMU",
1394 roundps_3
= "rmio:660F3A08rMU",
1395 roundsd_3
= "rrio:660F3A0BrMU|rxi/oq:",
1396 roundss_3
= "rrio:660F3A0ArMU|rxi/od:",
1399 crc32_2
= "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
1400 pcmpestri_3
= "rmio:660F3A61rMU",
1401 pcmpestrm_3
= "rmio:660F3A60rMU",
1402 pcmpgtq_2
= "rmo:660F3837rM",
1403 pcmpistri_3
= "rmio:660F3A63rMU",
1404 pcmpistrm_3
= "rmio:660F3A62rMU",
1405 popcnt_2
= "rmqdw:F30FB8rM",
1408 extrq_2
= "rro:660F79rM",
1409 extrq_3
= "riio:660F780mUU",
1410 insertq_2
= "rro:F20F79rM",
1411 insertq_4
= "rriio:F20F78rMUU",
1412 lzcnt_2
= "rmqdw:F30FBDrM",
1413 movntsd_2
= "xr/qo:nF20F2BRm",
1414 movntss_2
= "xr/do:F30F2BRm",
1415 -- popcnt is also in SSE4.2
1418 ------------------------------------------------------------------------------
1421 for name
,n
in pairs
{ add
= 0, ["or"] = 1, adc
= 2, sbb
= 3,
1422 ["and"] = 4, sub
= 5, xor
= 6, cmp
= 7 } do
1423 local n8
= shl(n
, 3)
1424 map_op
[name
.."_2"] = format(
1425 "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
1426 1+n8
, 3+n8
, n
, n
, 5+n8
, n
)
1430 for name
,n
in pairs
{ rol
= 0, ror
= 1, rcl
= 2, rcr
= 3,
1431 shl
= 4, shr
= 5, sar
= 7, sal
= 4 } do
1432 map_op
[name
.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n
, n
, n
)
1436 for cc
,n
in pairs(map_cc
) do
1437 map_op
["j"..cc
.."_1"] = format("J.:n0F8%XJ", n
) -- short: 7%X
1438 map_op
["set"..cc
.."_1"] = format("mb:n0F9%X2m", n
)
1439 map_op
["cmov"..cc
.."_2"] = format("rmqdw:0F4%XrM", n
) -- P6+
1442 -- FP arithmetic ops.
1443 for name
,n
in pairs
{ add
= 0, mul
= 1, com
= 2, comp
= 3,
1444 sub
= 4, subr
= 5, div
= 6, divr
= 7 } do
1445 local nc
= 0xc0 + shl(n
, 3)
1446 local nr
= nc
+ (n
< 4 and 0 or (n
% 2 == 0 and 8 or -8))
1447 local fn
= "f"..name
1448 map_op
[fn
.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc
, n
, n
)
1449 if n
== 2 or n
== 3 then
1450 map_op
[fn
.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc
, n
, n
)
1452 map_op
[fn
.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc
, nr
, n
, n
)
1453 map_op
[fn
.."p_1"] = format("ff:DE%02Xr", nr
)
1454 map_op
[fn
.."p_2"] = format("fFf:DE%02Xr", nr
)
1456 map_op
["fi"..name
.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n
, n
)
1459 -- FP conditional moves.
1460 for cc
,n
in pairs
{ b
=0, e
=1, be
=2, u
=3, nb
=4, ne
=5, nbe
=6, nu
=7 } do
1461 local nc
= 0xdac0 + shl(band(n
, 3), 3) + shl(band(n
, 4), 6)
1462 map_op
["fcmov"..cc
.."_1"] = format("ff:%04Xr", nc
) -- P6+
1463 map_op
["fcmov"..cc
.."_2"] = format("Fff:%04XR", nc
) -- P6+
1466 -- SSE FP arithmetic ops.
1467 for name
,n
in pairs
{ sqrt = 1, add
= 8, mul
= 9,
1468 sub
= 12, min = 13, div
= 14, max = 15 } do
1469 map_op
[name
.."ps_2"] = format("rmo:0F5%XrM", n
)
1470 map_op
[name
.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n
)
1471 map_op
[name
.."pd_2"] = format("rmo:660F5%XrM", n
)
1472 map_op
[name
.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n
)
1475 ------------------------------------------------------------------------------
1477 -- Process pattern string.
1478 local function dopattern(pat
, args
, sz
, op
, needrex
)
1485 -- Limit number of section buffer positions used by a single dasm_put().
1486 -- A single opcode needs a maximum of 5 positions.
1487 if secpos
+5 > maxsecpos
then wflush() end
1489 -- Process each character.
1490 for c
in gmatch(pat
.."|", ".") do
1491 if match(c
, "%x") then -- Hex digit.
1492 digit
= byte(c
) - 48
1493 if digit
> 48 then digit
= digit
- 39
1494 elseif digit
> 16 then digit
= digit
- 7 end
1495 opcode
= opcode
*16 + digit
1497 elseif c
== "n" then -- Disable operand size mods for opcode.
1499 elseif c
== "X" then -- Force REX.W.
1501 elseif c
== "r" then -- Merge 1st operand regno. into opcode.
1502 addin
= args
[1]; opcode
= opcode
+ (addin
.reg
% 8)
1503 if narg
< 2 then narg
= 2 end
1504 elseif c
== "R" then -- Merge 2nd operand regno. into opcode.
1505 addin
= args
[2]; opcode
= opcode
+ (addin
.reg
% 8)
1507 elseif c
== "m" or c
== "M" then -- Encode ModRM/SIB.
1511 opcode
= opcode
- band(s
, 7) -- Undo regno opcode merge.
1513 s
= band(opcode
, 15) -- Undo last digit.
1514 opcode
= shr(opcode
, 4)
1516 local nn
= c
== "m" and 1 or 2
1518 if narg
<= nn
then narg
= nn
+ 1 end
1519 if szov
== "q" and rex
== 0 then rex
= rex
+ 8 end
1520 if t
.reg
and t
.reg
> 7 then rex
= rex
+ 1 end
1521 if t
.xreg
and t
.xreg
> 7 then rex
= rex
+ 2 end
1522 if s
> 7 then rex
= rex
+ 4 end
1523 if needrex
then rex
= rex
+ 16 end
1524 wputop(szov
, opcode
, rex
); opcode
= nil
1525 local imark
= sub(pat
, -1) -- Force a mark (ugly).
1526 -- Put ModRM/SIB with regno/last digit as spare.
1527 wputmrmsib(t
, imark
, s
, addin
and addin
.vreg
)
1530 if opcode
then -- Flush opcode.
1531 if szov
== "q" and rex
== 0 then rex
= rex
+ 8 end
1532 if needrex
then rex
= rex
+ 16 end
1533 if addin
and addin
.reg
== -1 then
1534 wputop(szov
, opcode
- 7, rex
)
1535 waction("VREG", addin
.vreg
); wputxb(0)
1537 if addin
and addin
.reg
> 7 then rex
= rex
+ 1 end
1538 wputop(szov
, opcode
, rex
)
1542 if c
== "|" then break end
1543 if c
== "o" then -- Offset (pure 32 bit displacement).
1544 wputdarg(args
[1].disp
); if narg
< 2 then narg
= 2 end
1545 elseif c
== "O" then
1546 wputdarg(args
[2].disp
); narg
= 3
1548 -- Anything else is an immediate operand.
1549 local a
= args
[narg
]
1551 local mode
, imm
= a
.mode
, a
.imm
1552 if mode
== "iJ" and not match("iIJ", c
) then
1553 werror("bad operand size for label")
1557 elseif c
== "U" then
1559 elseif c
== "W" then
1561 elseif c
== "i" or c
== "I" then
1562 if mode
== "iJ" then
1563 wputlabel("IMM_", imm
, 1)
1564 elseif mode
== "iI" and c
== "I" then
1565 waction(sz
== "w" and "IMM_WB" or "IMM_DB", imm
)
1569 elseif c
== "J" then
1570 if mode
== "iPJ" then
1571 waction("REL_A", imm
) -- !x64 (secpos)
1573 wputlabel("REL_", imm
, 2)
1576 werror("bad char `"..c
.."' in pattern `"..pat
.."' for `"..op
.."'")
1583 ------------------------------------------------------------------------------
1585 -- Mapping of operand modes to short names. Suppress output with '#'.
1586 local map_modename
= {
1587 r
= "reg", R
= "eax", C
= "cl", x
= "mem", m
= "mrm", i
= "imm",
1588 f
= "stx", F
= "st0", J
= "lbl", ["1"] = "1",
1589 I
= "#", S
= "#", O
= "#",
1592 -- Return a table/string showing all possible operand modes.
1593 local function templatehelp(template
, nparams
)
1594 if nparams
== 0 then return "" end
1596 for tm
in gmatch(template
, "[^%|]+") do
1597 local s
= map_modename
[sub(tm
, 1, 1)]
1598 s
= s
..gsub(sub(tm
, 2, nparams
), ".", function(c
)
1599 return ", "..map_modename
[c
]
1601 if not match(s
, "#") then t
[#t
+1] = s
end
1606 -- Match operand modes against mode match part of template.
1607 local function matchtm(tm
, args
)
1609 if not match(args
[i
].mode
, sub(tm
, i
, i
)) then return end
1614 -- Handle opcodes defined with template strings.
1615 map_op
[".template__"] = function(params
, template
, nparams
)
1616 if not params
then return templatehelp(template
, nparams
) end
1619 -- Zero-operand opcodes have no match part.
1620 if #params
== 0 then
1621 dopattern(template
, args
, "d", params
.op
, nil)
1625 -- Determine common operand size (coerce undefined size) or flag as mixed.
1626 local sz
, szmix
, needrex
1627 for i
,p
in ipairs(params
) do
1628 args
[i
] = parseoperand(p
)
1629 local nsz
= args
[i
].opsize
1631 if sz
and sz
~= nsz
then szmix
= true else sz
= nsz
end
1633 local nrex
= args
[i
].needrex
1635 if needrex
== nil then
1637 elseif needrex
~= nrex
then
1638 werror("bad mix of byte-addressable registers")
1643 -- Try all match:pattern pairs (separated by '|').
1644 local gotmatch
, lastpat
1645 for tm
in gmatch(template
, "[^%|]+") do
1646 -- Split off size match (starts after mode match) and pattern string.
1647 local szm
, pat
= match(tm
, "^(.-):(.*)$", #args
+1)
1648 if pat
== "" then pat
= lastpat
else lastpat
= pat
end
1649 if matchtm(tm
, args
) then
1650 local prefix
= sub(szm
, 1, 1)
1651 if prefix
== "/" then -- Match both operand sizes.
1652 if args
[1].opsize
== sub(szm
, 2, 2) and
1653 args
[2].opsize
== sub(szm
, 3, 3) then
1654 dopattern(pat
, args
, sz
, params
.op
, needrex
) -- Process pattern.
1657 else -- Match common operand size.
1659 if szm
== "" then szm
= x64
and "qdwb" or "dwb" end -- Default sizes.
1660 if prefix
== "1" then szp
= args
[1].opsize
; szmix
= nil
1661 elseif prefix
== "2" then szp
= args
[2].opsize
; szmix
= nil end
1662 if not szmix
and (prefix
== "." or match(szm
, szp
or "#")) then
1663 dopattern(pat
, args
, szp
, params
.op
, needrex
) -- Process pattern.
1671 local msg
= "bad operand mode"
1674 msg
= "mixed operand size"
1676 msg
= sz
and "bad operand size" or "missing operand size"
1680 werror(msg
.." in `"..opmodestr(params
.op
, args
).."'")
1683 ------------------------------------------------------------------------------
1685 -- x64-specific opcode for 64 bit immediates and displacements.
1687 function map_op
.mov64_2(params
)
1688 if not params
then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
1689 if secpos
+2 > maxsecpos
then wflush() end
1690 local opcode
, op64
, sz
, rex
, vreg
1691 local op64
= match(params
[1], "^%[%s*(.-)%s*%]$")
1693 local a
= parseoperand(params
[2])
1694 if a
.mode
~= "rmR" then werror("bad operand mode") end
1696 rex
= sz
== "q" and 8 or 0
1699 op64
= match(params
[2], "^%[%s*(.-)%s*%]$")
1700 local a
= parseoperand(params
[1])
1702 if a
.mode
~= "rmR" then werror("bad operand mode") end
1704 rex
= sz
== "q" and 8 or 0
1707 if sub(a
.mode
, 1, 1) ~= "r" or a
.opsize
~= "q" then
1708 werror("bad operand mode")
1715 opcode
= 0xb8 + band(a
.reg
, 7)
1717 rex
= a
.reg
> 7 and 9 or 8
1720 wputop(sz
, opcode
, rex
)
1721 if vreg
then waction("VREG", vreg
); wputxb(0) end
1722 waction("IMM_D", format("(unsigned int)(%s)", op64
))
1723 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64
))
1727 ------------------------------------------------------------------------------
1729 -- Pseudo-opcodes for data storage.
1730 local function op_data(params
)
1731 if not params
then return "imm..." end
1732 local sz
= sub(params
.op
, 2, 2)
1733 if sz
== "a" then sz
= addrsize
end
1734 for _
,p
in ipairs(params
) do
1735 local a
= parseoperand(p
)
1736 if sub(a
.mode
, 1, 1) ~= "i" or (a
.opsize
and a
.opsize
~= sz
) then
1737 werror("bad mode or size in `"..p
.."'")
1739 if a
.mode
== "iJ" then
1740 wputlabel("IMM_", a
.imm
, 1)
1742 wputszarg(sz
, a
.imm
)
1744 if secpos
+2 > maxsecpos
then wflush() end
1748 map_op
[".byte_*"] = op_data
1749 map_op
[".sbyte_*"] = op_data
1750 map_op
[".word_*"] = op_data
1751 map_op
[".dword_*"] = op_data
1752 map_op
[".aword_*"] = op_data
1754 ------------------------------------------------------------------------------
1756 -- Pseudo-opcode to mark the position where the action list is to be emitted.
1757 map_op
[".actionlist_1"] = function(params
)
1758 if not params
then return "cvar" end
1759 local name
= params
[1] -- No syntax check. You get to keep the pieces.
1760 wline(function(out
) writeactions(out
, name
) end)
1763 -- Pseudo-opcode to mark the position where the global enum is to be emitted.
1764 map_op
[".globals_1"] = function(params
)
1765 if not params
then return "prefix" end
1766 local prefix
= params
[1] -- No syntax check. You get to keep the pieces.
1767 wline(function(out
) writeglobals(out
, prefix
) end)
1770 -- Pseudo-opcode to mark the position where the global names are to be emitted.
1771 map_op
[".globalnames_1"] = function(params
)
1772 if not params
then return "cvar" end
1773 local name
= params
[1] -- No syntax check. You get to keep the pieces.
1774 wline(function(out
) writeglobalnames(out
, name
) end)
1777 -- Pseudo-opcode to mark the position where the extern names are to be emitted.
1778 map_op
[".externnames_1"] = function(params
)
1779 if not params
then return "cvar" end
1780 local name
= params
[1] -- No syntax check. You get to keep the pieces.
1781 wline(function(out
) writeexternnames(out
, name
) end)
1784 ------------------------------------------------------------------------------
1786 -- Label pseudo-opcode (converted from trailing colon form).
1787 map_op
[".label_2"] = function(params
)
1788 if not params
then return "[1-9] | ->global | =>pcexpr [, addr]" end
1789 if secpos
+2 > maxsecpos
then wflush() end
1790 local a
= parseoperand(params
[1])
1791 local mode
, imm
= a
.mode
, a
.imm
1792 if type(imm
) == "number" and (mode
== "iJ" or (imm
>= 1 and imm
<= 9)) then
1793 -- Local label (1: ... 9:) or global label (->global:).
1794 waction("LABEL_LG", nil, 1)
1796 elseif mode
== "iJ" then
1797 -- PC label (=>pcexpr:).
1798 waction("LABEL_PC", imm
)
1800 werror("bad label definition")
1802 -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
1803 local addr
= params
[2]
1805 local a
= parseoperand(addr
)
1806 if a
.mode
== "iPJ" then
1807 waction("SETLABEL", a
.imm
)
1809 werror("bad label assignment")
1813 map_op
[".label_1"] = map_op
[".label_2"]
1815 ------------------------------------------------------------------------------
1817 -- Alignment pseudo-opcode.
1818 map_op
[".align_1"] = function(params
)
1819 if not params
then return "numpow2" end
1820 if secpos
+1 > maxsecpos
then wflush() end
1821 local align
= tonumber(params
[1]) or map_opsizenum
[map_opsize
[params
[1]]
]
1824 -- Must be a power of 2 in the range (2 ... 256).
1828 waction("ALIGN", nil, 1)
1829 wputxb(align
-1) -- Action byte is 2**n-1.
1834 werror("bad alignment")
1837 -- Spacing pseudo-opcode.
1838 map_op
[".space_2"] = function(params
)
1839 if not params
then return "num [, filler]" end
1840 if secpos
+1 > maxsecpos
then wflush() end
1841 waction("SPACE", params
[1])
1842 local fill
= params
[2]
1844 fill
= tonumber(fill
)
1845 if not fill
or fill
< 0 or fill
> 255 then werror("bad filler") end
1849 map_op
[".space_1"] = map_op
[".space_2"]
1851 ------------------------------------------------------------------------------
1853 -- Pseudo-opcode for (primitive) type definitions (map to C types).
1854 map_op
[".type_3"] = function(params
, nparams
)
1856 return nparams
== 2 and "name, ctype" or "name, ctype, reg"
1858 local name
, ctype
, reg
= params
[1], params
[2], params
[3]
1859 if not match(name
, "^[%a_][%w_]*$") then
1860 werror("bad type name `"..name
.."'")
1862 local tp
= map_type
[name
]
1864 werror("duplicate type `"..name
.."'")
1866 if reg
and not map_reg_valid_base
[reg
] then
1867 werror("bad base register `"..(map_reg_rev
[reg
] or reg
).."'")
1869 -- Add #type to defines. A bit unclean to put it in map_archdef.
1870 map_archdef
["#"..name
] = "sizeof("..ctype
..")"
1871 -- Add new type and emit shortcut define.
1872 local num
= ctypenum
+ 1
1875 ctypefmt
= format("Dt%X(%%s)", num
),
1878 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num
, ctype
))
1881 map_op
[".type_2"] = map_op
[".type_3"]
1883 -- Dump type definitions.
1884 local function dumptypes(out
, lvl
)
1886 for name
in pairs(map_type
) do t
[#t
+1] = name
end
1888 out
:write("Type definitions:\n")
1889 for _
,name
in ipairs(t
) do
1890 local tp
= map_type
[name
]
1891 local reg
= tp
.reg
and map_reg_rev
[tp
.reg
] or ""
1892 out
:write(format(" %-20s %-20s %s\n", name
, tp
.ctype
, reg
))
1897 ------------------------------------------------------------------------------
1899 -- Set the current section.
1900 function _M
.section(num
)
1903 wflush(true) -- SECTION is a terminal action.
1906 ------------------------------------------------------------------------------
1908 -- Dump architecture description.
1909 function _M
.dumparch(out
)
1910 out
:write(format("DynASM %s version %s, released %s\n\n",
1911 _info
.arch
, _info
.version
, _info
.release
))
1916 -- Dump all user defined elements.
1917 function _M
.dumpdef(out
, lvl
)
1919 dumpglobals(out
, lvl
)
1920 dumpexterns(out
, lvl
)
1923 ------------------------------------------------------------------------------
1925 -- Pass callbacks from/to the DynASM core.
1926 function _M
.passcb(wl
, we
, wf
, ww
)
1927 wline
, werror
, wfatal
, wwarn
= wl
, we
, wf
, ww
1931 -- Setup the arch-specific module.
1932 function _M
.setup(arch
, opt
)
1933 g_arch
, g_opt
= arch
, opt
1936 -- Merge the core maps and the arch-specific maps.
1937 function _M
.mergemaps(map_coreop
, map_def
)
1938 setmetatable(map_op
, { __index
= map_coreop
})
1939 setmetatable(map_def
, { __index
= map_archdef
})
1940 return map_op
, map_def
1945 ------------------------------------------------------------------------------