1 ------------------------------------------------------------------------------
2 -- DynASM x86/x64 module.
4 -- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
5 -- See dynasm.lua for full copyright notice.
6 ------------------------------------------------------------------------------
10 -- Module information:
12 arch
= x64
and "x64" or "x86",
13 description
= "DynASM x86/x64 module",
16 release
= "2015-10-18",
21 -- Exported glue functions for the arch-specific module.
22 local _M
= { _info
= _info
}
24 -- Cache library functions.
25 local type, tonumber, pairs
, ipairs
= type, tonumber, pairs
, ipairs
26 local assert, unpack
, setmetatable
= assert, unpack
or table.unpack
, setmetatable
28 local sub
, format, byte
, char
= _s
.sub
, _s
.format, _s
.byte
, _s
.char
29 local find
, match
, gmatch
, gsub = _s
.find
, _s
.match
, _s
.gmatch
, _s
.gsub
30 local concat
, sort, remove = table.concat
, table.sort, table.remove
31 local bit
= bit
or require("bit")
32 local band
, bxor
, shl
, shr
= bit
.band
, bit
.bxor
, bit
.lshift
, bit
.rshift
34 -- Inherited tables and callbacks.
36 local wline
, werror
, wfatal
, wwarn
39 -- CHECK: Keep this in sync with the C code!
40 local action_names
= {
41 -- int arg, 1 buffer pos:
42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
45 -- ptrdiff_t arg, 1 buffer pos (address): !x64
47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
49 -- action arg (1 byte) or int arg, 1 buffer pos (link):
51 -- action arg (1 byte) or int arg, 1 buffer pos (offset):
52 "LABEL_LG", "LABEL_PC",
53 -- action arg (1 byte), 1 buffer pos (offset):
55 -- action args (2 bytes), no buffer pos.
57 -- action arg (1 byte), no buffer pos.
59 -- no action arg, no buffer pos.
61 -- action arg (1 byte), no buffer pos, terminal action:
63 -- no args, no buffer pos, terminal action:
67 -- Maximum number of section buffer positions for dasm_put().
68 -- CHECK: Keep this in sync with the C code!
69 local maxsecpos
= 25 -- Keep this low, to avoid excessively long C lines.
71 -- Action name -> action number (dynamically generated below).
73 -- First action number. Everything below does not need to be escaped.
74 local actfirst
= 256-#action_names
76 -- Action list buffer and string (only used to remove dupes).
80 -- Argument list for next dasm_put(). Start with offset 0 into action list.
83 -- Current number of section buffer positions for dasm_put().
86 -- VREG kind encodings, pre-shifted by 5 bits.
88 ["modrm.rm.m"] = 0x00,
89 ["modrm.rm.r"] = 0x20,
98 -- Current number of VREG actions contributing to REX/VEX shrinkage.
99 local vreg_shrink_count
= 0
101 ------------------------------------------------------------------------------
103 -- Compute action numbers for action names.
104 for n
,name
in ipairs(action_names
) do
105 local num
= actfirst
+ n
- 1
106 map_action
[name
] = num
109 -- Dump action names and numbers.
110 local function dumpactions(out
)
111 out
:write("DynASM encoding engine action codes:\n")
112 for n
,name
in ipairs(action_names
) do
113 local num
= map_action
[name
]
114 out
:write(format(" %-10s %02X %d\n", name
, num
, num
))
119 -- Write action list buffer as a huge static C array.
120 local function writeactions(out
, name
)
122 local last
= actlist
[nn
] or 255
123 actlist
[nn
] = nil -- Remove last byte.
124 if nn
== 0 then nn
= 1 end
125 out
:write("static const unsigned char ", name
, "[", nn
, "] = {\n")
127 for n
,b
in ipairs(actlist
) do
130 assert(out
:write(s
, "\n"))
134 out
:write(s
, last
, "\n};\n\n") -- Add last byte back.
137 ------------------------------------------------------------------------------
139 -- Add byte to action list.
140 local function wputxb(n
)
141 assert(n
>= 0 and n
<= 255 and n
% 1 == 0, "byte out of range")
142 actlist
[#actlist
+1] = n
145 -- Add action to list with optional arg. Advance buffer pos, too.
146 local function waction(action
, a
, num
)
147 wputxb(assert(map_action
[action
], "bad action name `"..action
.."'"))
148 if a
then actargs
[#actargs
+1] = a
end
149 if a
or num
then secpos
= secpos
+ (num
or 1) end
152 -- Optionally add a VREG action.
153 local function wvreg(kind
, vreg
, psz
, sk
, defer
)
154 if not vreg
then return end
155 waction("VREG", vreg
)
156 local b
= assert(map_vreg
[kind
], "bad vreg kind `"..vreg
.."'")
157 if b
< (sk
or 0) then
158 vreg_shrink_count
= vreg_shrink_count
+ 1
161 b
= b
+ vreg_shrink_count
* 8
162 vreg_shrink_count
= 0
164 wputxb(b
+ (psz
or 0))
167 -- Add call to embedded DynASM C code.
168 local function wcall(func
, args
)
169 wline(format("dasm_%s(Dst, %s);", func
, concat(args
, ", ")), true)
172 -- Delete duplicate action list chunks. A tad slow, but so what.
173 local function dedupechunk(offset
)
174 local al
, as
= actlist
, actstr
175 local chunk
= char(unpack(al
, offset
+1, #al
))
176 local orig
= find(as
, chunk
, 1, true)
178 actargs
[1] = orig
-1 -- Replace with original offset.
179 for i
=offset
+1,#al
do al
[i
] = nil end -- Kill dupe.
185 -- Flush action list (intervening C code or buffer pos overflow).
186 local function wflush(term
)
187 local offset
= actargs
[1]
188 if #actlist
== offset
then return end -- Nothing to flush.
189 if not term
then waction("STOP") end -- Terminate action list.
191 wcall("put", actargs
) -- Add call to dasm_put().
192 actargs
= { #actlist
} -- Actionlist offset is 1st arg to next dasm_put().
193 secpos
= 1 -- The actionlist offset occupies a buffer position, too.
197 local function wputb(n
)
198 if n
>= actfirst
then waction("ESC") end -- Need to escape byte.
202 ------------------------------------------------------------------------------
204 -- Global label name -> global label number. With auto assignment on 1st use.
205 local next_global
= 10
206 local map_global
= setmetatable({}, { __index
= function(t
, name
)
207 if not match(name
, "^[%a_][%w_@]*$") then werror("bad global label") end
208 local n
= next_global
209 if n
> 246 then werror("too many global labels") end
215 -- Dump global labels.
216 local function dumpglobals(out
, lvl
)
218 for name
, n
in pairs(map_global
) do t
[n
] = name
end
219 out
:write("Global labels:\n")
220 for i
=10,next_global
-1 do
221 out
:write(format(" %s\n", t
[i
]))
226 -- Write global label enum.
227 local function writeglobals(out
, prefix
)
229 for name
, n
in pairs(map_global
) do t
[n
] = name
end
230 out
:write("enum {\n")
231 for i
=10,next_global
-1 do
232 out
:write(" ", prefix
, gsub(t
[i
], "@.*", ""), ",\n")
234 out
:write(" ", prefix
, "_MAX\n};\n")
237 -- Write global label names.
238 local function writeglobalnames(out
, name
)
240 for name
, n
in pairs(map_global
) do t
[n
] = name
end
241 out
:write("static const char *const ", name
, "[] = {\n")
242 for i
=10,next_global
-1 do
243 out
:write(" \"", t
[i
], "\",\n")
245 out
:write(" (const char *)0\n};\n")
248 ------------------------------------------------------------------------------
250 -- Extern label name -> extern label number. With auto assignment on 1st use.
251 local next_extern
= -1
252 local map_extern
= setmetatable({}, { __index
= function(t
, name
)
253 -- No restrictions on the name for now.
254 local n
= next_extern
255 if n
< -256 then werror("too many extern labels") end
261 -- Dump extern labels.
262 local function dumpexterns(out
, lvl
)
264 for name
, n
in pairs(map_extern
) do t
[-n
] = name
end
265 out
:write("Extern labels:\n")
266 for i
=1,-next_extern
-1 do
267 out
:write(format(" %s\n", t
[i
]))
272 -- Write extern label names.
273 local function writeexternnames(out
, name
)
275 for name
, n
in pairs(map_extern
) do t
[-n
] = name
end
276 out
:write("static const char *const ", name
, "[] = {\n")
277 for i
=1,-next_extern
-1 do
278 out
:write(" \"", t
[i
], "\",\n")
280 out
:write(" (const char *)0\n};\n")
283 ------------------------------------------------------------------------------
285 -- Arch-specific maps.
286 local map_archdef
= {} -- Ext. register name -> int. name.
287 local map_reg_rev
= {} -- Int. register name -> ext. name.
288 local map_reg_num
= {} -- Int. register name -> register number.
289 local map_reg_opsize
= {} -- Int. register name -> operand size.
290 local map_reg_valid_base
= {} -- Int. register name -> valid base register?
291 local map_reg_valid_index
= {} -- Int. register name -> valid index register?
292 local map_reg_needrex
= {} -- Int. register name -> need rex vs. no rex.
293 local reg_list
= {} -- Canonical list of int. register names.
295 local map_type
= {} -- Type name -> { ctype, reg }
296 local ctypenum
= 0 -- Type number (for _PTx macros).
298 local addrsize
= x64
and "q" or "d" -- Size for address operands.
300 -- Helper functions to fill register maps.
301 local function mkrmap(sz
, cl
, names
)
302 local cname
= format("@%s", sz
)
303 reg_list
[#reg_list
+1] = cname
304 map_archdef
[cl
] = cname
305 map_reg_rev
[cname
] = cl
306 map_reg_num
[cname
] = -1
307 map_reg_opsize
[cname
] = sz
308 if sz
== addrsize
or sz
== "d" then
309 map_reg_valid_base
[cname
] = true
310 map_reg_valid_index
[cname
] = true
313 for n
,name
in ipairs(names
) do
314 local iname
= format("@%s%x", sz
, n
-1)
315 reg_list
[#reg_list
+1] = iname
316 map_archdef
[name
] = iname
317 map_reg_rev
[iname
] = name
318 map_reg_num
[iname
] = n
-1
319 map_reg_opsize
[iname
] = sz
320 if sz
== "b" and n
> 4 then map_reg_needrex
[iname
] = false end
321 if sz
== addrsize
or sz
== "d" then
322 map_reg_valid_base
[iname
] = true
323 map_reg_valid_index
[iname
] = true
327 for i
=0,(x64
and sz
~= "f") and 15 or 7 do
328 local needrex
= sz
== "b" and i
> 3
329 local iname
= format("@%s%x%s", sz
, i
, needrex
and "R" or "")
330 if needrex
then map_reg_needrex
[iname
] = true end
332 if sz
== "o" or sz
== "y" then name
= format("%s%d", cl
, i
)
333 elseif sz
== "f" then name
= format("st%d", i
)
334 else name
= format("r%d%s", i
, sz
== addrsize
and "" or sz
) end
335 map_archdef
[name
] = iname
336 if not map_reg_rev
[iname
] then
337 reg_list
[#reg_list
+1] = iname
338 map_reg_rev
[iname
] = name
339 map_reg_num
[iname
] = i
340 map_reg_opsize
[iname
] = sz
341 if sz
== addrsize
or sz
== "d" then
342 map_reg_valid_base
[iname
] = true
343 map_reg_valid_index
[iname
] = true
347 reg_list
[#reg_list
+1] = ""
350 -- Integer registers (qword, dword, word and byte sized).
352 mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"})
354 mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
355 mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
356 mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
357 map_reg_valid_index
[map_archdef
.esp
] = false
358 if x64
then map_reg_valid_index
[map_archdef
.rsp
] = false end
359 if x64
then map_reg_needrex
[map_archdef
.Rb
] = true end
360 map_archdef
["Ra"] = "@"..addrsize
362 -- FP registers (internally tword sized, but use "f" as operand size).
365 -- SSE registers (oword sized, but qword and dword accessible).
368 -- AVX registers (yword sized, but oword, qword and dword accessible).
371 -- Operand size prefixes to codes.
373 byte
= "b", word
= "w", dword
= "d", qword
= "q", oword
= "o", yword
= "y",
374 tword
= "t", aword
= addrsize
,
377 -- Operand size code to number.
378 local map_opsizenum
= {
379 b
= 1, w
= 2, d
= 4, q
= 8, o
= 16, y
= 32, t
= 10,
382 -- Operand size code to name.
383 local map_opsizename
= {
384 b
= "byte", w
= "word", d
= "dword", q
= "qword", o
= "oword", y
= "yword",
385 t
= "tword", f
= "fpword",
388 -- Valid index register scale factors.
390 ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
395 o
= 0, no
= 1, b
= 2, nb
= 3, e
= 4, ne
= 5, be
= 6, nbe
= 7,
396 s
= 8, ns
= 9, p
= 10, np
= 11, l
= 12, nl
= 13, le
= 14, nle
= 15,
397 c
= 2, nae
= 2, nc
= 3, ae
= 3, z
= 4, nz
= 5, na
= 6, a
= 7,
398 pe
= 10, po
= 11, nge
= 12, ge
= 13, ng
= 14, g
= 15,
402 -- Reverse defines for registers.
403 function _M
.revdef(s
)
404 return gsub(s
, "@%w+", map_reg_rev
)
407 -- Dump register names and numbers
408 local function dumpregs(out
)
409 out
:write("Register names, sizes and internal numbers:\n")
410 for _
,reg
in ipairs(reg_list
) do
414 local name
= map_reg_rev
[reg
]
415 local num
= map_reg_num
[reg
]
416 local opsize
= map_opsizename
[map_reg_opsize
[reg]]
417 out
:write(format(" %-5s %-8s %s\n", name
, opsize
,
418 num
< 0 and "(variable)" or num
))
423 ------------------------------------------------------------------------------
425 -- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
426 local function wputlabel(aprefix
, imm
, num
)
427 if type(imm
) == "number" then
430 wputxb(aprefix
== "IMM_" and 0 or 1)
433 waction(aprefix
.."LG", nil, num
);
437 waction(aprefix
.."PC", imm
, num
)
441 -- Put signed byte or arg.
442 local function wputsbarg(n
)
443 if type(n
) == "number" then
444 if n
< -128 or n
> 127 then
445 werror("signed immediate byte out of range")
447 if n
< 0 then n
= n
+ 256 end
449 else waction("IMM_S", n
) end
452 -- Put unsigned byte or arg.
453 local function wputbarg(n
)
454 if type(n
) == "number" then
455 if n
< 0 or n
> 255 then
456 werror("unsigned immediate byte out of range")
459 else waction("IMM_B", n
) end
462 -- Put unsigned word or arg.
463 local function wputwarg(n
)
464 if type(n
) == "number" then
465 if shr(n
, 16) ~= 0 then
466 werror("unsigned immediate word out of range")
468 wputb(band(n
, 255)); wputb(shr(n
, 8));
469 else waction("IMM_W", n
) end
472 -- Put signed or unsigned dword or arg.
473 local function wputdarg(n
)
475 if tn
== "number" then
477 wputb(band(shr(n
, 8), 255))
478 wputb(band(shr(n
, 16), 255))
480 elseif tn
== "table" then
481 wputlabel("IMM_", n
[1], 1)
487 -- Put operand-size dependent number or arg (defaults to dword).
488 local function wputszarg(sz
, n
)
489 if not sz
or sz
== "d" or sz
== "q" then wputdarg(n
)
490 elseif sz
== "w" then wputwarg(n
)
491 elseif sz
== "b" then wputbarg(n
)
492 elseif sz
== "s" then wputsbarg(n
)
493 else werror("bad operand size") end
496 -- Put multi-byte opcode with operand-size dependent modifications.
497 local function wputop(sz
, op
, rex
, vex
, vregr
, vregxb
)
498 local psz
, sk
= 0, nil
501 if vex
.m
== 1 and band(rex
, 11) == 0 then
502 if x64
and vregxb
then
503 sk
= map_vreg
["modrm.reg"]
506 tail
= shl(bxor(band(rex
, 4), 4), 5)
512 wputb(shl(bxor(band(rex
, 7), 7), 5) + vex
.m
)
513 tail
= shl(band(rex
, 8), 4)
516 local reg
, vreg
= 0, nil
519 if not reg
then werror("bad vex operand") end
520 if reg
< 0 then reg
= 0; vreg
= vex
.v
.vreg
end
522 if sz
== "y" or vex
.l
then tail
= tail
+ 4 end
523 wputb(tail
+ shl(bxor(reg
, 15), 3) + vex
.p
)
526 if op
>= 256 then werror("bad vex opcode") end
529 if not x64
then werror("bad operand size") end
530 elseif (vregr
or vregxb
) and x64
then
532 sk
= map_vreg
["vex.v"]
536 if sz
== "w" then wputb(102) end
537 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
538 if op
>= 4294967296 then r
= op
%4294967296 wputb((op
-r
)/4294967296) op
= r
end
539 if op
>= 16777216 then wputb(shr(op
, 24)); op
= band(op
, 0xffffff) end
542 local opc3
= band(op
, 0xffff00)
543 if opc3
== 0x0f3a00 or opc3
== 0x0f3800 then
544 wputb(64 + band(rex
, 15)); rex
= 0; psz
= 2
547 wputb(shr(op
, 16)); op
= band(op
, 0xffff); psz
= psz
+ 1
551 if b
== 15 and rex
~= 0 then wputb(64 + band(rex
, 15)); rex
= 0; psz
= 2 end
552 wputb(b
); op
= band(op
, 255); psz
= psz
+ 1
554 if rex
~= 0 then wputb(64 + band(rex
, 15)); psz
= 2 end
555 if sz
== "b" then op
= op
- 1 end
560 -- Put ModRM or SIB formatted byte.
561 local function wputmodrm(m
, s
, rm
, vs
, vrm
)
562 assert(m
< 4 and s
< 16 and rm
< 16, "bad modrm operands")
563 wputb(shl(m
, 6) + shl(band(s
, 7), 3) + band(rm
, 7))
566 -- Put ModRM/SIB plus optional displacement.
567 local function wputmrmsib(t
, imark
, s
, vsreg
, psz
, sk
)
569 local reg
, xreg
= t
.reg
, t
.xreg
570 if reg
and reg
< 0 then reg
= 0; vreg
= t
.vreg
end
571 if xreg
and xreg
< 0 then xreg
= 0; vxreg
= t
.vxreg
end
572 if s
< 0 then s
= 0 end
575 if sub(t
.mode
, 1, 1) == "r" then
577 wvreg("modrm.reg", vsreg
, psz
+1, sk
, vreg
)
578 wvreg("modrm.rm.r", vreg
, psz
+1, sk
)
583 local tdisp
= type(disp
)
588 -- Indexed mode with index register only.
589 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
591 if imark
== "I" then waction("MARK") end
592 wvreg("modrm.reg", vsreg
, psz
+1, sk
, vxreg
)
593 wputmodrm(t
.xsc
, xreg
, 5)
594 wvreg("sib.index", vxreg
, psz
+2, sk
)
596 -- Pure 32 bit displacement.
597 if x64
and tdisp
~= "table" then
598 wputmodrm(0, s
, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
599 wvreg("modrm.reg", vsreg
, psz
+1, sk
)
600 if imark
== "I" then waction("MARK") end
604 wputmodrm(0, s
, 5) -- [disp|rip-label] -> (0, s, ebp)
605 wvreg("modrm.reg", vsreg
, psz
+1, sk
)
606 if imark
== "I" then waction("MARK") end
609 if riprel
then -- Emit rip-relative displacement.
610 if match("UWSiI", imark
) then
611 werror("NYI: rip-relative displacement followed by immediate")
613 -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
614 wputlabel("REL_", disp
[1], 2)
622 if tdisp
== "number" then -- Check displacement size at assembly time.
623 if disp
== 0 and band(reg
, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
624 if not vreg
then m
= 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
625 elseif disp
>= -128 and disp
<= 127 then m
= 1
627 elseif tdisp
== "table" then
631 -- Index register present or esp as base register: need SIB encoding.
632 if xreg
or band(reg
, 7) == 4 then
633 wputmodrm(m
or 2, s
, 4) -- ModRM.
634 if m
== nil or imark
== "I" then waction("MARK") end
635 wvreg("modrm.reg", vsreg
, psz
+1, sk
, vxreg
or vreg
)
636 wputmodrm(t
.xsc
or 0, xreg
or 4, reg
) -- SIB.
637 wvreg("sib.index", vxreg
, psz
+2, sk
, vreg
)
638 wvreg("sib.base", vreg
, psz
+2, sk
)
640 wputmodrm(m
or 2, s
, reg
) -- ModRM.
641 if (imark
== "I" and (m
== 1 or m
== 2)) or
642 (m
== nil and (vsreg
or vreg
)) then waction("MARK") end
643 wvreg("modrm.reg", vsreg
, psz
+1, sk
, vreg
)
644 wvreg("modrm.rm.m", vreg
, psz
+1, sk
)
648 if m
== 1 then wputsbarg(disp
)
649 elseif m
== 2 then wputdarg(disp
)
650 elseif m
== nil then waction("DISP", disp
) end
653 ------------------------------------------------------------------------------
655 -- Return human-readable operand mode string.
656 local function opmodestr(op
, args
)
660 m
[#m
+1] = sub(a
.mode
, 1, 1)..(a
.opsize
or "?")
662 return op
.." "..concat(m
, ",")
665 -- Convert number to valid integer or nil.
666 local function toint(expr
)
667 local n
= tonumber(expr
)
669 if n
% 1 ~= 0 or n
< -2147483648 or n
> 4294967295 then
670 werror("bad integer number `"..expr
.."'")
676 -- Parse immediate expression.
677 local function immexpr(expr
)
679 if sub(expr
, 1, 1) == "&" then
680 return "iPJ", format("(ptrdiff_t)(%s)", sub(expr
,2))
683 local prefix
= sub(expr
, 1, 2)
684 -- =>expr (pc label reference)
685 if prefix
== "=>" then
686 return "iJ", sub(expr
, 3)
688 -- ->name (global label reference)
689 if prefix
== "->" then
690 return "iJ", map_global
[sub(expr
, 3)]
693 -- [<>][1-9] (local label reference)
694 local dir
, lnum
= match(expr
, "^([<>])([1-9])$")
695 if dir
then -- Fwd: 247-255, Bkwd: 1-9.
696 return "iJ", lnum
+ (dir
== ">" and 246 or 0)
699 local extname
= match(expr
, "^extern%s+(%S+)$")
701 return "iJ", map_extern
[extname
]
704 -- expr (interpreted as immediate)
708 -- Parse displacement expression: +-num, +-expr, +-opsize*num
709 local function dispexpr(expr
)
710 local disp
= expr
== "" and 0 or toint(expr
)
711 if disp
then return disp
end
712 local c
, dispt
= match(expr
, "^([+-])%s*(.+)$")
716 werror("bad displacement expression `"..expr
.."'")
718 local opsize
, tailops
= match(dispt
, "^(%w+)%s*%*%s*(.+)$")
719 local ops
, imm
= map_opsize
[opsize
], toint(tailops
)
721 if c
== "-" then imm
= -imm
end
722 return imm
*map_opsizenum
[ops
]
724 local mode
, iexpr
= immexpr(dispt
)
726 if c
== "-" then werror("cannot invert label reference") end
729 return expr
-- Need to return original signed expression.
732 -- Parse register or type expression.
733 local function rtexpr(expr
)
734 if not expr
then return end
735 local tname
, ovreg
= match(expr
, "^([%w_]+):(@[%w_]+)$")
736 local tp
= map_type
[tname
or expr
]
738 local reg
= ovreg
or tp
.reg
739 local rnum
= map_reg_num
[reg
]
741 werror("type `"..(tname
or expr
).."' needs a register override")
743 if not map_reg_valid_base
[reg
] then
744 werror("bad base register override `"..(map_reg_rev
[reg
] or reg
).."'")
748 return expr
, map_reg_num
[expr
]
751 -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
752 local function parseoperand(param
)
756 local opsize
, tailops
= match(param
, "^(%w+)%s*(.+)$")
758 t
.opsize
= map_opsize
[opsize
]
759 if t
.opsize
then expr
= tailops
end
762 local br
= match(expr
, "^%[%s*(.-)%s*%]$")
770 t
.mode
= x64
and "xm" or "xmO"
776 local reg
, tailr
= match(br
, "^([@%w_:]+)%s*(.*)$")
777 reg
, t
.reg
, tp
= rtexpr(reg
)
780 t
.mode
= x64
and "xm" or "xmO"
781 t
.disp
= dispexpr("+"..br
)
786 t
.vreg
, tailr
= match(tailr
, "^(%b())(.*)$")
787 if not t
.vreg
then werror("bad variable register expression") end
790 -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
791 local xsc
, tailsc
= match(tailr
, "^%*%s*([1248])%s*(.*)$")
793 if not map_reg_valid_index
[reg
] then
794 werror("bad index register `"..map_reg_rev
[reg
].."'")
801 t
.disp
= dispexpr(tailsc
)
804 if not map_reg_valid_base
[reg
] then
805 werror("bad base register `"..map_reg_rev
[reg
].."'")
808 -- [reg] or [reg+-disp]
809 t
.disp
= toint(tailr
) or (tailr
== "" and 0)
810 if t
.disp
then break end
813 local xreg
, tailx
= match(tailr
, "^+%s*([@%w_:]+)%s*(.*)$")
814 xreg
, t
.xreg
, tp
= rtexpr(xreg
)
817 t
.disp
= dispexpr(tailr
)
820 if not map_reg_valid_index
[xreg
] then
821 werror("bad index register `"..map_reg_rev
[xreg
].."'")
825 t
.vxreg
, tailx
= match(tailx
, "^(%b())(.*)$")
826 if not t
.vxreg
then werror("bad variable register expression") end
830 local xsc
, tailsc
= match(tailx
, "^%*%s*([1248])%s*(.*)$")
836 -- [...] or [...+-disp] or [...+-expr]
837 t
.disp
= dispexpr(tailx
)
840 local imm
= toint(expr
)
841 if not imm
and sub(expr
, 1, 1) == "*" and t
.opsize
then
842 imm
= toint(sub(expr
, 2))
844 imm
= imm
* map_opsizenum
[t
.opsize
]
849 if t
.opsize
then werror("bad operand size override") end
851 if imm
== 1 then m
= m
.."1" end
852 if imm
>= 4294967168 and imm
<= 4294967295 then imm
= imm
-4294967296 end
853 if imm
>= -128 and imm
<= 127 then m
= m
.."S" end
860 local reg
, tailr
= match(expr
, "^([@%w_:]+)%s*(.*)$")
861 reg
, t
.reg
, tp
= rtexpr(reg
)
864 t
.vreg
, tailr
= match(tailr
, "^(%b())(.*)$")
865 if not t
.vreg
then werror("bad variable register expression") end
869 if t
.opsize
then werror("bad operand size override") end
870 t
.opsize
= map_reg_opsize
[reg
]
871 if t
.opsize
== "f" then
872 t
.mode
= t
.reg
== 0 and "fF" or "f"
874 if reg
== "@w4" or (x64
and reg
== "@d4") then
875 wwarn("bad idea, try again with `"..(x64
and "rsp'" or "esp'"))
877 t
.mode
= t
.reg
== 0 and "rmR" or (reg
== "@b1" and "rmC" or "rm")
879 t
.needrex
= map_reg_needrex
[reg
]
883 -- type[idx], type[idx].field, type->field -> [reg+offset_expr]
884 if not tp
then werror("bad operand `"..param
.."'") end
886 t
.disp
= format(tp
.ctypefmt
, tailr
)
888 t
.mode
, t
.imm
= immexpr(expr
)
889 if sub(t
.mode
, -1) == "J" then
890 if t
.opsize
and t
.opsize
~= addrsize
then
891 werror("bad operand size override")
901 ------------------------------------------------------------------------------
902 -- x86 Template String Description
903 -- ===============================
905 -- Each template string is a list of [match:]pattern pairs,
906 -- separated by "|". The first match wins. No match means a
907 -- bad or unsupported combination of operand modes or sizes.
909 -- The match part and the ":" is omitted if the operation has
910 -- no operands. Otherwise the first N characters are matched
911 -- against the mode strings of each of the N operands.
913 -- The mode string for each operand type is (see parseoperand()):
914 -- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
915 -- FP register: "f", +"F" for st0
916 -- Index operand: "xm", +"O" for [disp] (pure offset)
917 -- Immediate: "i", +"S" for signed 8 bit, +"1" for 1,
918 -- +"I" for arg, +"P" for pointer
919 -- Any: +"J" for valid jump targets
921 -- So a match character "m" (mixed) matches both an integer register
922 -- and an index operand (to be encoded with the ModRM/SIB scheme).
923 -- But "r" matches only a register and "x" only an index operand
924 -- (e.g. for FP memory access operations).
926 -- The operand size match string starts right after the mode match
927 -- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
928 -- The effective data size of the operation is matched against this list.
930 -- If only the regular "b", "w", "d", "q", "t" operand sizes are
931 -- present, then all operands must be the same size. Unspecified sizes
932 -- are ignored, but at least one operand must have a size or the pattern
933 -- won't match (use the "byte", "word", "dword", "qword", "tword"
934 -- operand size overrides. E.g.: mov dword [eax], 1).
936 -- If the list has a "1" or "2" prefix, the operand size is taken
937 -- from the respective operand and any other operand sizes are ignored.
938 -- If the list contains only ".", all operand sizes are ignored.
939 -- If the list has a "/" prefix, the concatenated (mixed) operand sizes
940 -- are compared to the match.
942 -- E.g. "rrdw" matches for either two dword registers or two word
943 -- registers. "Fx2dq" matches an st0 operand plus an index operand
944 -- pointing to a dword (float) or qword (double).
946 -- Every character after the ":" is part of the pattern string:
947 -- Hex chars are accumulated to form the opcode (left to right).
948 -- "n" disables the standard opcode mods
949 -- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
951 -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode.
952 -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
953 -- The spare 3 bits are either filled with the last hex digit or
954 -- the result from a previous "r"/"R". The opcode is restored.
955 -- "u" Use VEX encoding, vvvv unused.
956 -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
957 -- removed from the list used by future characters).
960 -- All of the following characters force a flush of the opcode:
961 -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
962 -- "s" stores a 4 bit immediate from the last register operand,
963 -- followed by 4 zero bits.
964 -- "S" stores a signed 8 bit immediate from the last operand.
965 -- "U" stores an unsigned 8 bit immediate from the last operand.
966 -- "W" stores an unsigned 16 bit immediate from the last operand.
967 -- "i" stores an operand sized immediate from the last operand.
968 -- "I" dito, but generates an action code to optionally modify
969 -- the opcode (+2) for a signed 8 bit immediate.
970 -- "J" generates one of the REL action codes from the last operand.
972 ------------------------------------------------------------------------------
974 -- Template strings for x86 instructions. Ordered by first opcode byte.
975 -- Unimplemented opcodes (deliberate omissions) are marked with *.
982 -- 0F: two byte opcode prefix
1001 inc_1
= x64
and "m:FF0m" or "rdw:40r|m:FF0m",
1002 dec_1
= x64
and "m:FF1m" or "rdw:48r|m:FF1m",
1003 push_1
= (x64
and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or
1004 "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
1005 pop_1
= x64
and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m",
1006 -- 60: *pusha, *pushad, *pushaw
1007 -- 61: *popa, *popad, *popaw
1009 -- 63: x86: *arpl mw,rw
1010 movsxd_2
= x64
and "rm/qd:63rM",
1014 a16_0
= not x64
and "67" or nil,
1015 a32_0
= x64
and "67",
1017 -- 69: imul rdw,mdw,idw
1019 -- 6B: imul rdw,mdw,S
1023 -- 6F: *outsd, *outsw
1029 test_2
= "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
1037 lea_2
= "rx1dq:8DrM",
1041 xchg_2
= "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
1052 pushfd_0
= not x64
and "9C",
1053 pushfq_0
= x64
and "9C",
1055 popfd_0
= not x64
and "9D",
1056 popfq_0
= x64
and "9D",
1059 mov_2
= "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
1103 -- D8-DF: floating point ops
1107 -- E3: *jcxz, *jecxz
1112 call_1
= x64
and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J",
1113 jmp_1
= x64
and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB
1129 -- F6: test... mb,i; div... mb
1130 -- F7: test... mdw,i; div... mdw
1147 imul_2
= "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
1148 imul_3
= "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",
1150 movzx_2
= "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
1151 movsx_2
= "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",
1153 bswap_1
= "rqd:0FC8r",
1154 bsf_2
= "rmqdw:0FBCrM",
1155 bsr_2
= "rmqdw:0FBDrM",
1156 bt_2
= "mrqdw:0FA3Rm|miqdw:0FBA4mU",
1157 btc_2
= "mrqdw:0FBBRm|miqdw:0FBA7mU",
1158 btr_2
= "mrqdw:0FB3Rm|miqdw:0FBA6mU",
1159 bts_2
= "mrqdw:0FABRm|miqdw:0FBA5mU",
1161 shld_3
= "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
1162 shrd_3
= "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
1164 rdtsc_0
= "0F31", -- P1+
1165 rdpmc_0
= "0F33", -- P6+
1166 cpuid_0
= "0FA2", -- P1+
1168 -- floating point ops
1169 fst_1
= "ff:DDD0r|xd:D92m|xq:nDD2m",
1170 fstp_1
= "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
1171 fld_1
= "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
1173 fpop_0
= "DDD8", -- Alias for fstp st0.
1175 fist_1
= "xw:nDF2m|xd:DB2m",
1176 fistp_1
= "xw:nDF3m|xd:DB3m|xq:nDF7m",
1177 fild_1
= "xw:nDF0m|xd:DB0m|xq:nDF5m",
1180 fxch_1
= "ff:D9C8r",
1181 fxch_2
= "fFf:D9C8r|Fff:D9C8R",
1183 fucom_1
= "ff:DDE0r",
1184 fucom_2
= "Fff:DDE0R",
1185 fucomp_1
= "ff:DDE8r",
1186 fucomp_2
= "Fff:DDE8R",
1187 fucomi_1
= "ff:DBE8r", -- P6+
1188 fucomi_2
= "Fff:DBE8R", -- P6+
1189 fucomip_1
= "ff:DFE8r", -- P6+
1190 fucomip_2
= "Fff:DFE8R", -- P6+
1191 fcomi_1
= "ff:DBF0r", -- P6+
1192 fcomi_2
= "Fff:DBF0R", -- P6+
1193 fcomip_1
= "ff:DFF0r", -- P6+
1194 fcomip_2
= "Fff:DFF0R", -- P6+
1198 fldenv_1
= "x.:D94m",
1199 fnstenv_1
= "x.:D96m",
1200 fstenv_1
= "x.:9BD96m",
1201 fldcw_1
= "xw:nD95m",
1202 fstcw_1
= "xw:n9BD97m",
1203 fnstcw_1
= "xw:nD97m",
1204 fstsw_1
= "Rw:n9BDFE0|xw:n9BDD7m",
1205 fnstsw_1
= "Rw:nDFE0|xw:nDD7m",
1210 -- D9D1-D9DF: unassigned
1247 andnpd_2
= "rmo:660F55rM",
1248 andnps_2
= "rmo:0F55rM",
1249 andpd_2
= "rmo:660F54rM",
1250 andps_2
= "rmo:0F54rM",
1251 clflush_1
= "x.:0FAE7m",
1252 cmppd_3
= "rmio:660FC2rMU",
1253 cmpps_3
= "rmio:0FC2rMU",
1254 cmpsd_3
= "rrio:F20FC2rMU|rxi/oq:",
1255 cmpss_3
= "rrio:F30FC2rMU|rxi/od:",
1256 comisd_2
= "rro:660F2FrM|rx/oq:",
1257 comiss_2
= "rro:0F2FrM|rx/od:",
1258 cvtdq2pd_2
= "rro:F30FE6rM|rx/oq:",
1259 cvtdq2ps_2
= "rmo:0F5BrM",
1260 cvtpd2dq_2
= "rmo:F20FE6rM",
1261 cvtpd2ps_2
= "rmo:660F5ArM",
1262 cvtpi2pd_2
= "rx/oq:660F2ArM",
1263 cvtpi2ps_2
= "rx/oq:0F2ArM",
1264 cvtps2dq_2
= "rmo:660F5BrM",
1265 cvtps2pd_2
= "rro:0F5ArM|rx/oq:",
1266 cvtsd2si_2
= "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
1267 cvtsd2ss_2
= "rro:F20F5ArM|rx/oq:",
1268 cvtsi2sd_2
= "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
1269 cvtsi2ss_2
= "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
1270 cvtss2sd_2
= "rro:F30F5ArM|rx/od:",
1271 cvtss2si_2
= "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
1272 cvttpd2dq_2
= "rmo:660FE6rM",
1273 cvttps2dq_2
= "rmo:F30F5BrM",
1274 cvttsd2si_2
= "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
1275 cvttss2si_2
= "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
1276 fxsave_1
= "x.:0FAE0m",
1277 fxrstor_1
= "x.:0FAE1m",
1278 ldmxcsr_1
= "xd:0FAE2m",
1279 lfence_0
= "0FAEE8",
1280 maskmovdqu_2
= "rro:660FF7rM",
1281 mfence_0
= "0FAEF0",
1282 movapd_2
= "rmo:660F28rM|mro:660F29Rm",
1283 movaps_2
= "rmo:0F28rM|mro:0F29Rm",
1284 movd_2
= "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:",
1285 movdqa_2
= "rmo:660F6FrM|mro:660F7FRm",
1286 movdqu_2
= "rmo:F30F6FrM|mro:F30F7FRm",
1287 movhlps_2
= "rro:0F12rM",
1288 movhpd_2
= "rx/oq:660F16rM|xr/qo:n660F17Rm",
1289 movhps_2
= "rx/oq:0F16rM|xr/qo:n0F17Rm",
1290 movlhps_2
= "rro:0F16rM",
1291 movlpd_2
= "rx/oq:660F12rM|xr/qo:n660F13Rm",
1292 movlps_2
= "rx/oq:0F12rM|xr/qo:n0F13Rm",
1293 movmskpd_2
= "rr/do:660F50rM",
1294 movmskps_2
= "rr/do:0F50rM",
1295 movntdq_2
= "xro:660FE7Rm",
1296 movnti_2
= "xrqd:0FC3Rm",
1297 movntpd_2
= "xro:660F2BRm",
1298 movntps_2
= "xro:0F2BRm",
1299 movq_2
= "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
1300 movsd_2
= "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
1301 movss_2
= "rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
1302 movupd_2
= "rmo:660F10rM|mro:660F11Rm",
1303 movups_2
= "rmo:0F10rM|mro:0F11Rm",
1304 orpd_2
= "rmo:660F56rM",
1305 orps_2
= "rmo:0F56rM",
1307 pextrw_3
= "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
1308 pinsrw_3
= "rri/od:660FC4rMU|rxi/ow:",
1309 pmovmskb_2
= "rr/do:660FD7rM",
1310 prefetchnta_1
= "xb:n0F180m",
1311 prefetcht0_1
= "xb:n0F181m",
1312 prefetcht1_1
= "xb:n0F182m",
1313 prefetcht2_1
= "xb:n0F183m",
1314 pshufd_3
= "rmio:660F70rMU",
1315 pshufhw_3
= "rmio:F30F70rMU",
1316 pshuflw_3
= "rmio:F20F70rMU",
1317 pslld_2
= "rmo:660FF2rM|rio:660F726mU",
1318 pslldq_2
= "rio:660F737mU",
1319 psllq_2
= "rmo:660FF3rM|rio:660F736mU",
1320 psllw_2
= "rmo:660FF1rM|rio:660F716mU",
1321 psrad_2
= "rmo:660FE2rM|rio:660F724mU",
1322 psraw_2
= "rmo:660FE1rM|rio:660F714mU",
1323 psrld_2
= "rmo:660FD2rM|rio:660F722mU",
1324 psrldq_2
= "rio:660F733mU",
1325 psrlq_2
= "rmo:660FD3rM|rio:660F732mU",
1326 psrlw_2
= "rmo:660FD1rM|rio:660F712mU",
1327 rcpps_2
= "rmo:0F53rM",
1328 rcpss_2
= "rro:F30F53rM|rx/od:",
1329 rsqrtps_2
= "rmo:0F52rM",
1330 rsqrtss_2
= "rmo:F30F52rM",
1331 sfence_0
= "0FAEF8",
1332 shufpd_3
= "rmio:660FC6rMU",
1333 shufps_3
= "rmio:0FC6rMU",
1334 stmxcsr_1
= "xd:0FAE3m",
1335 ucomisd_2
= "rro:660F2ErM|rx/oq:",
1336 ucomiss_2
= "rro:0F2ErM|rx/od:",
1337 unpckhpd_2
= "rmo:660F15rM",
1338 unpckhps_2
= "rmo:0F15rM",
1339 unpcklpd_2
= "rmo:660F14rM",
1340 unpcklps_2
= "rmo:0F14rM",
1341 xorpd_2
= "rmo:660F57rM",
1342 xorps_2
= "rmo:0F57rM",
1345 fisttp_1
= "xw:nDF1m|xd:DB1m|xq:nDD1m",
1346 addsubpd_2
= "rmo:660FD0rM",
1347 addsubps_2
= "rmo:F20FD0rM",
1348 haddpd_2
= "rmo:660F7CrM",
1349 haddps_2
= "rmo:F20F7CrM",
1350 hsubpd_2
= "rmo:660F7DrM",
1351 hsubps_2
= "rmo:F20F7DrM",
1352 lddqu_2
= "rxo:F20FF0rM",
1353 movddup_2
= "rmo:F20F12rM",
1354 movshdup_2
= "rmo:F30F16rM",
1355 movsldup_2
= "rmo:F30F12rM",
1358 pabsb_2
= "rmo:660F381CrM",
1359 pabsd_2
= "rmo:660F381ErM",
1360 pabsw_2
= "rmo:660F381DrM",
1361 palignr_3
= "rmio:660F3A0FrMU",
1362 phaddd_2
= "rmo:660F3802rM",
1363 phaddsw_2
= "rmo:660F3803rM",
1364 phaddw_2
= "rmo:660F3801rM",
1365 phsubd_2
= "rmo:660F3806rM",
1366 phsubsw_2
= "rmo:660F3807rM",
1367 phsubw_2
= "rmo:660F3805rM",
1368 pmaddubsw_2
= "rmo:660F3804rM",
1369 pmulhrsw_2
= "rmo:660F380BrM",
1370 pshufb_2
= "rmo:660F3800rM",
1371 psignb_2
= "rmo:660F3808rM",
1372 psignd_2
= "rmo:660F380ArM",
1373 psignw_2
= "rmo:660F3809rM",
1376 blendpd_3
= "rmio:660F3A0DrMU",
1377 blendps_3
= "rmio:660F3A0CrMU",
1378 blendvpd_3
= "rmRo:660F3815rM",
1379 blendvps_3
= "rmRo:660F3814rM",
1380 dppd_3
= "rmio:660F3A41rMU",
1381 dpps_3
= "rmio:660F3A40rMU",
1382 extractps_3
= "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
1383 insertps_3
= "rrio:660F3A41rMU|rxi/od:",
1384 movntdqa_2
= "rxo:660F382ArM",
1385 mpsadbw_3
= "rmio:660F3A42rMU",
1386 packusdw_2
= "rmo:660F382BrM",
1387 pblendvb_3
= "rmRo:660F3810rM",
1388 pblendw_3
= "rmio:660F3A0ErMU",
1389 pcmpeqq_2
= "rmo:660F3829rM",
1390 pextrb_3
= "rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
1391 pextrd_3
= "mri/do:660F3A16RmU",
1392 pextrq_3
= "mri/qo:660F3A16RmU",
1393 -- pextrw is SSE2, mem operand is SSE4.1 only
1394 phminposuw_2
= "rmo:660F3841rM",
1395 pinsrb_3
= "rri/od:660F3A20nrMU|rxi/ob:",
1396 pinsrd_3
= "rmi/od:660F3A22rMU",
1397 pinsrq_3
= "rmi/oq:660F3A22rXMU",
1398 pmaxsb_2
= "rmo:660F383CrM",
1399 pmaxsd_2
= "rmo:660F383DrM",
1400 pmaxud_2
= "rmo:660F383FrM",
1401 pmaxuw_2
= "rmo:660F383ErM",
1402 pminsb_2
= "rmo:660F3838rM",
1403 pminsd_2
= "rmo:660F3839rM",
1404 pminud_2
= "rmo:660F383BrM",
1405 pminuw_2
= "rmo:660F383ArM",
1406 pmovsxbd_2
= "rro:660F3821rM|rx/od:",
1407 pmovsxbq_2
= "rro:660F3822rM|rx/ow:",
1408 pmovsxbw_2
= "rro:660F3820rM|rx/oq:",
1409 pmovsxdq_2
= "rro:660F3825rM|rx/oq:",
1410 pmovsxwd_2
= "rro:660F3823rM|rx/oq:",
1411 pmovsxwq_2
= "rro:660F3824rM|rx/od:",
1412 pmovzxbd_2
= "rro:660F3831rM|rx/od:",
1413 pmovzxbq_2
= "rro:660F3832rM|rx/ow:",
1414 pmovzxbw_2
= "rro:660F3830rM|rx/oq:",
1415 pmovzxdq_2
= "rro:660F3835rM|rx/oq:",
1416 pmovzxwd_2
= "rro:660F3833rM|rx/oq:",
1417 pmovzxwq_2
= "rro:660F3834rM|rx/od:",
1418 pmuldq_2
= "rmo:660F3828rM",
1419 pmulld_2
= "rmo:660F3840rM",
1420 ptest_2
= "rmo:660F3817rM",
1421 roundpd_3
= "rmio:660F3A09rMU",
1422 roundps_3
= "rmio:660F3A08rMU",
1423 roundsd_3
= "rrio:660F3A0BrMU|rxi/oq:",
1424 roundss_3
= "rrio:660F3A0ArMU|rxi/od:",
1427 crc32_2
= "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
1428 pcmpestri_3
= "rmio:660F3A61rMU",
1429 pcmpestrm_3
= "rmio:660F3A60rMU",
1430 pcmpgtq_2
= "rmo:660F3837rM",
1431 pcmpistri_3
= "rmio:660F3A63rMU",
1432 pcmpistrm_3
= "rmio:660F3A62rMU",
1433 popcnt_2
= "rmqdw:F30FB8rM",
1436 extrq_2
= "rro:660F79rM",
1437 extrq_3
= "riio:660F780mUU",
1438 insertq_2
= "rro:F20F79rM",
1439 insertq_4
= "rriio:F20F78rMUU",
1440 lzcnt_2
= "rmqdw:F30FBDrM",
1441 movntsd_2
= "xr/qo:nF20F2BRm",
1442 movntss_2
= "xr/do:F30F2BRm",
1443 -- popcnt is also in SSE4.2
1446 aesdec_2
= "rmo:660F38DErM",
1447 aesdeclast_2
= "rmo:660F38DFrM",
1448 aesenc_2
= "rmo:660F38DCrM",
1449 aesenclast_2
= "rmo:660F38DDrM",
1450 aesimc_2
= "rmo:660F38DBrM",
1451 aeskeygenassist_3
= "rmio:660F3ADFrMU",
1452 pclmulqdq_3
= "rmio:660F3A44rMU",
1455 vaddsubpd_3
= "rrmoy:660FVD0rM",
1456 vaddsubps_3
= "rrmoy:F20FVD0rM",
1457 vandpd_3
= "rrmoy:660FV54rM",
1458 vandps_3
= "rrmoy:0FV54rM",
1459 vandnpd_3
= "rrmoy:660FV55rM",
1460 vandnps_3
= "rrmoy:0FV55rM",
1461 vblendpd_4
= "rrmioy:660F3AV0DrMU",
1462 vblendps_4
= "rrmioy:660F3AV0CrMU",
1463 vblendvpd_4
= "rrmroy:660F3AV4BrMs",
1464 vblendvps_4
= "rrmroy:660F3AV4ArMs",
1465 vbroadcastf128_2
= "rx/yo:660F38u1ArM",
1466 vcmppd_4
= "rrmioy:660FVC2rMU",
1467 vcmpps_4
= "rrmioy:0FVC2rMU",
1468 vcmpsd_4
= "rrrio:F20FVC2rMU|rrxi/ooq:",
1469 vcmpss_4
= "rrrio:F30FVC2rMU|rrxi/ood:",
1470 vcomisd_2
= "rro:660Fu2FrM|rx/oq:",
1471 vcomiss_2
= "rro:0Fu2FrM|rx/od:",
1472 vcvtdq2pd_2
= "rro:F30FuE6rM|rx/oq:|rm/yo:",
1473 vcvtdq2ps_2
= "rmoy:0Fu5BrM",
1474 vcvtpd2dq_2
= "rmoy:F20FuE6rM",
1475 vcvtpd2ps_2
= "rmoy:660Fu5ArM",
1476 vcvtps2dq_2
= "rmoy:660Fu5BrM",
1477 vcvtps2pd_2
= "rro:0Fu5ArM|rx/oq:|rm/yo:",
1478 vcvtsd2si_2
= "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
1479 vcvtsd2ss_3
= "rrro:F20FV5ArM|rrx/ooq:",
1480 vcvtsi2sd_3
= "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
1481 vcvtsi2ss_3
= "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
1482 vcvtss2sd_3
= "rrro:F30FV5ArM|rrx/ood:",
1483 vcvtss2si_2
= "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
1484 vcvttpd2dq_2
= "rmo:660FuE6rM|rm/oy:660FuLE6rM",
1485 vcvttps2dq_2
= "rmoy:F30Fu5BrM",
1486 vcvttsd2si_2
= "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
1487 vcvttss2si_2
= "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
1488 vdppd_4
= "rrmio:660F3AV41rMU",
1489 vdpps_4
= "rrmioy:660F3AV40rMU",
1490 vextractf128_3
= "mri/oy:660F3AuL19RmU",
1491 vextractps_3
= "mri/do:660F3Au17RmU",
1492 vhaddpd_3
= "rrmoy:660FV7CrM",
1493 vhaddps_3
= "rrmoy:F20FV7CrM",
1494 vhsubpd_3
= "rrmoy:660FV7DrM",
1495 vhsubps_3
= "rrmoy:F20FV7DrM",
1496 vinsertf128_4
= "rrmi/yyo:660F3AV18rMU",
1497 vinsertps_4
= "rrrio:660F3AV21rMU|rrxi/ood:",
1498 vldmxcsr_1
= "xd:0FuAE2m",
1499 vmaskmovps_3
= "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
1500 vmaskmovpd_3
= "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
1501 vmovapd_2
= "rmoy:660Fu28rM|mroy:660Fu29Rm",
1502 vmovaps_2
= "rmoy:0Fu28rM|mroy:0Fu29Rm",
1503 vmovd_2
= "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
1504 vmovq_2
= "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
1505 vmovddup_2
= "rmy:F20Fu12rM|rro:|rx/oq:",
1506 vmovhlps_3
= "rrro:0FV12rM",
1507 vmovhpd_2
= "xr/qo:660Fu17Rm",
1508 vmovhpd_3
= "rrx/ooq:660FV16rM",
1509 vmovhps_2
= "xr/qo:0Fu17Rm",
1510 vmovhps_3
= "rrx/ooq:0FV16rM",
1511 vmovlhps_3
= "rrro:0FV16rM",
1512 vmovlpd_2
= "xr/qo:660Fu13Rm",
1513 vmovlpd_3
= "rrx/ooq:660FV12rM",
1514 vmovlps_2
= "xr/qo:0Fu13Rm",
1515 vmovlps_3
= "rrx/ooq:0FV12rM",
1516 vmovmskpd_2
= "rr/do:660Fu50rM|rr/dy:660FuL50rM",
1517 vmovmskps_2
= "rr/do:0Fu50rM|rr/dy:0FuL50rM",
1518 vmovntpd_2
= "xroy:660Fu2BRm",
1519 vmovntps_2
= "xroy:0Fu2BRm",
1520 vmovsd_2
= "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
1521 vmovsd_3
= "rrro:F20FV10rM",
1522 vmovshdup_2
= "rmoy:F30Fu16rM",
1523 vmovsldup_2
= "rmoy:F30Fu12rM",
1524 vmovss_2
= "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
1525 vmovss_3
= "rrro:F30FV10rM",
1526 vmovupd_2
= "rmoy:660Fu10rM|mroy:660Fu11Rm",
1527 vmovups_2
= "rmoy:0Fu10rM|mroy:0Fu11Rm",
1528 vorpd_3
= "rrmoy:660FV56rM",
1529 vorps_3
= "rrmoy:0FV56rM",
1530 vpermilpd_3
= "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
1531 vpermilps_3
= "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
1532 vperm2f128_4
= "rrmiy:660F3AV06rMU",
1533 vptestpd_2
= "rmoy:660F38u0FrM",
1534 vptestps_2
= "rmoy:660F38u0ErM",
1535 vrcpps_2
= "rmoy:0Fu53rM",
1536 vrcpss_3
= "rrro:F30FV53rM|rrx/ood:",
1537 vrsqrtps_2
= "rmoy:0Fu52rM",
1538 vrsqrtss_3
= "rrro:F30FV52rM|rrx/ood:",
1539 vroundpd_3
= "rmioy:660F3AV09rMU",
1540 vroundps_3
= "rmioy:660F3AV08rMU",
1541 vroundsd_4
= "rrrio:660F3AV0BrMU|rrxi/ooq:",
1542 vroundss_4
= "rrrio:660F3AV0ArMU|rrxi/ood:",
1543 vshufpd_4
= "rrmioy:660FVC6rMU",
1544 vshufps_4
= "rrmioy:0FVC6rMU",
1545 vsqrtps_2
= "rmoy:0Fu51rM",
1546 vsqrtss_2
= "rro:F30Fu51rM|rx/od:",
1547 vsqrtpd_2
= "rmoy:660Fu51rM",
1548 vsqrtsd_2
= "rro:F20Fu51rM|rx/oq:",
1549 vstmxcsr_1
= "xd:0FuAE3m",
1550 vucomisd_2
= "rro:660Fu2ErM|rx/oq:",
1551 vucomiss_2
= "rro:0Fu2ErM|rx/od:",
1552 vunpckhpd_3
= "rrmoy:660FV15rM",
1553 vunpckhps_3
= "rrmoy:0FV15rM",
1554 vunpcklpd_3
= "rrmoy:660FV14rM",
1555 vunpcklps_3
= "rrmoy:0FV14rM",
1556 vxorpd_3
= "rrmoy:660FV57rM",
1557 vxorps_3
= "rrmoy:0FV57rM",
1558 vzeroall_0
= "0FuL77",
1559 vzeroupper_0
= "0Fu77",
1562 vbroadcastss_2
= "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
1563 vbroadcastsd_2
= "rx/yq:660F38u19rM|rr/yo:",
1564 -- *vgather* (!vsib)
1565 vpermpd_3
= "rmiy:660F3AuX01rMU",
1566 vpermps_3
= "rrmy:660F38V16rM",
1568 -- AVX, AVX2 integer ops
1569 -- In general, xmm requires AVX, ymm requires AVX2.
1570 vaesdec_3
= "rrmo:660F38VDErM",
1571 vaesdeclast_3
= "rrmo:660F38VDFrM",
1572 vaesenc_3
= "rrmo:660F38VDCrM",
1573 vaesenclast_3
= "rrmo:660F38VDDrM",
1574 vaesimc_2
= "rmo:660F38uDBrM",
1575 vaeskeygenassist_3
= "rmio:660F3AuDFrMU",
1576 vlddqu_2
= "rxoy:F20FuF0rM",
1577 vmaskmovdqu_2
= "rro:660FuF7rM",
1578 vmovdqa_2
= "rmoy:660Fu6FrM|mroy:660Fu7FRm",
1579 vmovdqu_2
= "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
1580 vmovntdq_2
= "xroy:660FuE7Rm",
1581 vmovntdqa_2
= "rxoy:660F38u2ArM",
1582 vmpsadbw_4
= "rrmioy:660F3AV42rMU",
1583 vpabsb_2
= "rmoy:660F38u1CrM",
1584 vpabsd_2
= "rmoy:660F38u1ErM",
1585 vpabsw_2
= "rmoy:660F38u1DrM",
1586 vpackusdw_3
= "rrmoy:660F38V2BrM",
1587 vpalignr_4
= "rrmioy:660F3AV0FrMU",
1588 vpblendvb_4
= "rrmroy:660F3AV4CrMs",
1589 vpblendw_4
= "rrmioy:660F3AV0ErMU",
1590 vpclmulqdq_4
= "rrmio:660F3AV44rMU",
1591 vpcmpeqq_3
= "rrmoy:660F38V29rM",
1592 vpcmpestri_3
= "rmio:660F3Au61rMU",
1593 vpcmpestrm_3
= "rmio:660F3Au60rMU",
1594 vpcmpgtq_3
= "rrmoy:660F38V37rM",
1595 vpcmpistri_3
= "rmio:660F3Au63rMU",
1596 vpcmpistrm_3
= "rmio:660F3Au62rMU",
1597 vpextrb_3
= "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
1598 vpextrw_3
= "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
1599 vpextrd_3
= "mri/do:660F3Au16RmU",
1600 vpextrq_3
= "mri/qo:660F3Au16RmU",
1601 vphaddw_3
= "rrmoy:660F38V01rM",
1602 vphaddd_3
= "rrmoy:660F38V02rM",
1603 vphaddsw_3
= "rrmoy:660F38V03rM",
1604 vphminposuw_2
= "rmo:660F38u41rM",
1605 vphsubw_3
= "rrmoy:660F38V05rM",
1606 vphsubd_3
= "rrmoy:660F38V06rM",
1607 vphsubsw_3
= "rrmoy:660F38V07rM",
1608 vpinsrb_4
= "rrri/ood:660F3AV20rMU|rrxi/oob:",
1609 vpinsrw_4
= "rrri/ood:660FVC4rMU|rrxi/oow:",
1610 vpinsrd_4
= "rrmi/ood:660F3AV22rMU",
1611 vpinsrq_4
= "rrmi/ooq:660F3AVX22rMU",
1612 vpmaddubsw_3
= "rrmoy:660F38V04rM",
1613 vpmaxsb_3
= "rrmoy:660F38V3CrM",
1614 vpmaxsd_3
= "rrmoy:660F38V3DrM",
1615 vpmaxuw_3
= "rrmoy:660F38V3ErM",
1616 vpmaxud_3
= "rrmoy:660F38V3FrM",
1617 vpminsb_3
= "rrmoy:660F38V38rM",
1618 vpminsd_3
= "rrmoy:660F38V39rM",
1619 vpminuw_3
= "rrmoy:660F38V3ArM",
1620 vpminud_3
= "rrmoy:660F38V3BrM",
1621 vpmovmskb_2
= "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
1622 vpmovsxbw_2
= "rroy:660F38u20rM|rx/oq:|rx/yo:",
1623 vpmovsxbd_2
= "rroy:660F38u21rM|rx/od:|rx/yq:",
1624 vpmovsxbq_2
= "rroy:660F38u22rM|rx/ow:|rx/yd:",
1625 vpmovsxwd_2
= "rroy:660F38u23rM|rx/oq:|rx/yo:",
1626 vpmovsxwq_2
= "rroy:660F38u24rM|rx/od:|rx/yq:",
1627 vpmovsxdq_2
= "rroy:660F38u25rM|rx/oq:|rx/yo:",
1628 vpmovzxbw_2
= "rroy:660F38u30rM|rx/oq:|rx/yo:",
1629 vpmovzxbd_2
= "rroy:660F38u31rM|rx/od:|rx/yq:",
1630 vpmovzxbq_2
= "rroy:660F38u32rM|rx/ow:|rx/yd:",
1631 vpmovzxwd_2
= "rroy:660F38u33rM|rx/oq:|rx/yo:",
1632 vpmovzxwq_2
= "rroy:660F38u34rM|rx/od:|rx/yq:",
1633 vpmovzxdq_2
= "rroy:660F38u35rM|rx/oq:|rx/yo:",
1634 vpmuldq_3
= "rrmoy:660F38V28rM",
1635 vpmulhrsw_3
= "rrmoy:660F38V0BrM",
1636 vpmulld_3
= "rrmoy:660F38V40rM",
1637 vpshufb_3
= "rrmoy:660F38V00rM",
1638 vpshufd_3
= "rmioy:660Fu70rMU",
1639 vpshufhw_3
= "rmioy:F30Fu70rMU",
1640 vpshuflw_3
= "rmioy:F20Fu70rMU",
1641 vpsignb_3
= "rrmoy:660F38V08rM",
1642 vpsignw_3
= "rrmoy:660F38V09rM",
1643 vpsignd_3
= "rrmoy:660F38V0ArM",
1644 vpslldq_3
= "rrioy:660Fv737mU",
1645 vpsllw_3
= "rrmoy:660FVF1rM|rrioy:660Fv716mU",
1646 vpslld_3
= "rrmoy:660FVF2rM|rrioy:660Fv726mU",
1647 vpsllq_3
= "rrmoy:660FVF3rM|rrioy:660Fv736mU",
1648 vpsraw_3
= "rrmoy:660FVE1rM|rrioy:660Fv714mU",
1649 vpsrad_3
= "rrmoy:660FVE2rM|rrioy:660Fv724mU",
1650 vpsrldq_3
= "rrioy:660Fv733mU",
1651 vpsrlw_3
= "rrmoy:660FVD1rM|rrioy:660Fv712mU",
1652 vpsrld_3
= "rrmoy:660FVD2rM|rrioy:660Fv722mU",
1653 vpsrlq_3
= "rrmoy:660FVD3rM|rrioy:660Fv732mU",
1654 vptest_2
= "rmoy:660F38u17rM",
1657 vbroadcasti128_2
= "rx/yo:660F38u5ArM",
1658 vinserti128_4
= "rrmi/yyo:660F3AV38rMU",
1659 vextracti128_3
= "mri/oy:660F3AuL39RmU",
1660 vpblendd_4
= "rrmioy:660F3AV02rMU",
1661 vpbroadcastb_2
= "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
1662 vpbroadcastw_2
= "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
1663 vpbroadcastd_2
= "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
1664 vpbroadcastq_2
= "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
1665 vpermd_3
= "rrmy:660F38V36rM",
1666 vpermq_3
= "rmiy:660F3AuX00rMU",
1667 -- *vpgather* (!vsib)
1668 vperm2i128_4
= "rrmiy:660F3AV46rMU",
1669 vpmaskmovd_3
= "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
1670 vpmaskmovq_3
= "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
1671 vpsllvd_3
= "rrmoy:660F38V47rM",
1672 vpsllvq_3
= "rrmoy:660F38VX47rM",
1673 vpsravd_3
= "rrmoy:660F38V46rM",
1674 vpsrlvd_3
= "rrmoy:660F38V45rM",
1675 vpsrlvq_3
= "rrmoy:660F38VX45rM",
1678 ------------------------------------------------------------------------------
1681 for name
,n
in pairs
{ add
= 0, ["or"] = 1, adc
= 2, sbb
= 3,
1682 ["and"] = 4, sub
= 5, xor
= 6, cmp
= 7 } do
1683 local n8
= shl(n
, 3)
1684 map_op
[name
.."_2"] = format(
1685 "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
1686 1+n8
, 3+n8
, n
, n
, 5+n8
, n
)
1690 for name
,n
in pairs
{ rol
= 0, ror
= 1, rcl
= 2, rcr
= 3,
1691 shl
= 4, shr
= 5, sar
= 7, sal
= 4 } do
1692 map_op
[name
.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n
, n
, n
)
1696 for cc
,n
in pairs(map_cc
) do
1697 map_op
["j"..cc
.."_1"] = format("J.:n0F8%XJ", n
) -- short: 7%X
1698 map_op
["set"..cc
.."_1"] = format("mb:n0F9%X2m", n
)
1699 map_op
["cmov"..cc
.."_2"] = format("rmqdw:0F4%XrM", n
) -- P6+
1702 -- FP arithmetic ops.
1703 for name
,n
in pairs
{ add
= 0, mul
= 1, com
= 2, comp
= 3,
1704 sub
= 4, subr
= 5, div
= 6, divr
= 7 } do
1705 local nc
= 0xc0 + shl(n
, 3)
1706 local nr
= nc
+ (n
< 4 and 0 or (n
% 2 == 0 and 8 or -8))
1707 local fn
= "f"..name
1708 map_op
[fn
.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc
, n
, n
)
1709 if n
== 2 or n
== 3 then
1710 map_op
[fn
.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc
, n
, n
)
1712 map_op
[fn
.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc
, nr
, n
, n
)
1713 map_op
[fn
.."p_1"] = format("ff:DE%02Xr", nr
)
1714 map_op
[fn
.."p_2"] = format("fFf:DE%02Xr", nr
)
1716 map_op
["fi"..name
.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n
, n
)
1719 -- FP conditional moves.
1720 for cc
,n
in pairs
{ b
=0, e
=1, be
=2, u
=3, nb
=4, ne
=5, nbe
=6, nu
=7 } do
1721 local nc
= 0xdac0 + shl(band(n
, 3), 3) + shl(band(n
, 4), 6)
1722 map_op
["fcmov"..cc
.."_1"] = format("ff:%04Xr", nc
) -- P6+
1723 map_op
["fcmov"..cc
.."_2"] = format("Fff:%04XR", nc
) -- P6+
1726 -- SSE / AVX FP arithmetic ops.
1727 for name
,n
in pairs
{ sqrt = 1, add
= 8, mul
= 9,
1728 sub
= 12, min = 13, div
= 14, max = 15 } do
1729 map_op
[name
.."ps_2"] = format("rmo:0F5%XrM", n
)
1730 map_op
[name
.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n
)
1731 map_op
[name
.."pd_2"] = format("rmo:660F5%XrM", n
)
1732 map_op
[name
.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n
)
1734 map_op
["v"..name
.."ps_3"] = format("rrmoy:0FV5%XrM", n
)
1735 map_op
["v"..name
.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n
)
1736 map_op
["v"..name
.."pd_3"] = format("rrmoy:660FV5%XrM", n
)
1737 map_op
["v"..name
.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n
)
1741 -- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
1742 for name
,n
in pairs
{
1743 paddb
= 0xFC, paddw
= 0xFD, paddd
= 0xFE, paddq
= 0xD4,
1744 paddsb
= 0xEC, paddsw
= 0xED, packssdw
= 0x6B,
1745 packsswb
= 0x63, packuswb
= 0x67, paddusb
= 0xDC,
1746 paddusw
= 0xDD, pand
= 0xDB, pandn
= 0xDF, pavgb
= 0xE0,
1747 pavgw
= 0xE3, pcmpeqb
= 0x74, pcmpeqd
= 0x76,
1748 pcmpeqw
= 0x75, pcmpgtb
= 0x64, pcmpgtd
= 0x66,
1749 pcmpgtw
= 0x65, pmaddwd
= 0xF5, pmaxsw
= 0xEE,
1750 pmaxub
= 0xDE, pminsw
= 0xEA, pminub
= 0xDA,
1751 pmulhuw
= 0xE4, pmulhw
= 0xE5, pmullw
= 0xD5,
1752 pmuludq
= 0xF4, por
= 0xEB, psadbw
= 0xF6, psubb
= 0xF8,
1753 psubw
= 0xF9, psubd
= 0xFA, psubq
= 0xFB, psubsb
= 0xE8,
1754 psubsw
= 0xE9, psubusb
= 0xD8, psubusw
= 0xD9,
1755 punpckhbw
= 0x68, punpckhwd
= 0x69, punpckhdq
= 0x6A,
1756 punpckhqdq
= 0x6D, punpcklbw
= 0x60, punpcklwd
= 0x61,
1757 punpckldq
= 0x62, punpcklqdq
= 0x6C, pxor
= 0xEF
1759 map_op
[name
.."_2"] = format("rmo:660F%02XrM", n
)
1760 map_op
["v"..name
.."_3"] = format("rrmoy:660FV%02XrM", n
)
1763 ------------------------------------------------------------------------------
1765 local map_vexarg
= { u
= false, v
= 1, V
= 2 }
1767 -- Process pattern string.
1768 local function dopattern(pat
, args
, sz
, op
, needrex
)
1769 local digit
, addin
, vex
1775 -- Limit number of section buffer positions used by a single dasm_put().
1776 -- A single opcode needs a maximum of 6 positions.
1777 if secpos
+6 > maxsecpos
then wflush() end
1779 -- Process each character.
1780 for c
in gmatch(pat
.."|", ".") do
1781 if match(c
, "%x") then -- Hex digit.
1782 digit
= byte(c
) - 48
1783 if digit
> 48 then digit
= digit
- 39
1784 elseif digit
> 16 then digit
= digit
- 7 end
1785 opcode
= opcode
*16 + digit
1787 elseif c
== "n" then -- Disable operand size mods for opcode.
1789 elseif c
== "X" then -- Force REX.W.
1791 elseif c
== "L" then -- Force VEX.L.
1793 elseif c
== "r" then -- Merge 1st operand regno. into opcode.
1794 addin
= args
[1]; opcode
= opcode
+ (addin
.reg
% 8)
1795 if narg
< 2 then narg
= 2 end
1796 elseif c
== "R" then -- Merge 2nd operand regno. into opcode.
1797 addin
= args
[2]; opcode
= opcode
+ (addin
.reg
% 8)
1799 elseif c
== "m" or c
== "M" then -- Encode ModRM/SIB.
1803 opcode
= opcode
- band(s
, 7) -- Undo regno opcode merge.
1805 s
= band(opcode
, 15) -- Undo last digit.
1806 opcode
= shr(opcode
, 4)
1808 local nn
= c
== "m" and 1 or 2
1810 if narg
<= nn
then narg
= nn
+ 1 end
1811 if szov
== "q" and rex
== 0 then rex
= rex
+ 8 end
1812 if t
.reg
and t
.reg
> 7 then rex
= rex
+ 1 end
1813 if t
.xreg
and t
.xreg
> 7 then rex
= rex
+ 2 end
1814 if s
> 7 then rex
= rex
+ 4 end
1815 if needrex
then rex
= rex
+ 16 end
1816 local psz
, sk
= wputop(szov
, opcode
, rex
, vex
, s
< 0, t
.vreg
or t
.vxreg
)
1818 local imark
= sub(pat
, -1) -- Force a mark (ugly).
1819 -- Put ModRM/SIB with regno/last digit as spare.
1820 wputmrmsib(t
, imark
, s
, addin
and addin
.vreg
, psz
, sk
)
1822 elseif map_vexarg
[c
] ~= nil then -- Encode using VEX prefix
1823 local b
= band(opcode
, 255); opcode
= shr(opcode
, 8)
1825 if b
== 0x38 then m
= 2
1826 elseif b
== 0x3a then m
= 3 end
1827 if m
~= 1 then b
= band(opcode
, 255); opcode
= shr(opcode
, 8) end
1829 werror("expected `0F', `0F38', or `0F3A' to precede `"..c
..
1830 "' in pattern `"..pat
.."' for `"..op
.."'")
1832 local v
= map_vexarg
[c
]
1833 if v
then v
= remove(args
, v
) end
1834 b
= band(opcode
, 255)
1836 if b
== 0x66 then p
= 1
1837 elseif b
== 0xf3 then p
= 2
1838 elseif b
== 0xf2 then p
= 3 end
1839 if p
~= 0 then opcode
= shr(opcode
, 8) end
1840 if opcode
~= 0 then wputop(nil, opcode
, 0); opcode
= 0 end
1841 vex
= { m
= m
, p
= p
, v
= v
}
1843 if opcode
then -- Flush opcode.
1844 if szov
== "q" and rex
== 0 then rex
= rex
+ 8 end
1845 if needrex
then rex
= rex
+ 16 end
1846 if addin
and addin
.reg
== -1 then
1847 local psz
, sk
= wputop(szov
, opcode
- 7, rex
, vex
, true)
1848 wvreg("opcode", addin
.vreg
, psz
, sk
)
1850 if addin
and addin
.reg
> 7 then rex
= rex
+ 1 end
1851 wputop(szov
, opcode
, rex
, vex
)
1855 if c
== "|" then break end
1856 if c
== "o" then -- Offset (pure 32 bit displacement).
1857 wputdarg(args
[1].disp
); if narg
< 2 then narg
= 2 end
1858 elseif c
== "O" then
1859 wputdarg(args
[2].disp
); narg
= 3
1861 -- Anything else is an immediate operand.
1862 local a
= args
[narg
]
1864 local mode
, imm
= a
.mode
, a
.imm
1865 if mode
== "iJ" and not match("iIJ", c
) then
1866 werror("bad operand size for label")
1870 elseif c
== "U" then
1872 elseif c
== "W" then
1874 elseif c
== "i" or c
== "I" then
1875 if mode
== "iJ" then
1876 wputlabel("IMM_", imm
, 1)
1877 elseif mode
== "iI" and c
== "I" then
1878 waction(sz
== "w" and "IMM_WB" or "IMM_DB", imm
)
1882 elseif c
== "J" then
1883 if mode
== "iPJ" then
1884 waction("REL_A", imm
) -- !x64 (secpos)
1886 wputlabel("REL_", imm
, 2)
1888 elseif c
== "s" then
1892 wvreg("imm.hi", a
.vreg
)
1897 werror("bad char `"..c
.."' in pattern `"..pat
.."' for `"..op
.."'")
1904 ------------------------------------------------------------------------------
1906 -- Mapping of operand modes to short names. Suppress output with '#'.
1907 local map_modename
= {
1908 r
= "reg", R
= "eax", C
= "cl", x
= "mem", m
= "mrm", i
= "imm",
1909 f
= "stx", F
= "st0", J
= "lbl", ["1"] = "1",
1910 I
= "#", S
= "#", O
= "#",
1913 -- Return a table/string showing all possible operand modes.
1914 local function templatehelp(template
, nparams
)
1915 if nparams
== 0 then return "" end
1917 for tm
in gmatch(template
, "[^%|]+") do
1918 local s
= map_modename
[sub(tm
, 1, 1)]
1919 s
= s
..gsub(sub(tm
, 2, nparams
), ".", function(c
)
1920 return ", "..map_modename
[c
]
1922 if not match(s
, "#") then t
[#t
+1] = s
end
1927 -- Match operand modes against mode match part of template.
1928 local function matchtm(tm
, args
)
1930 if not match(args
[i
].mode
, sub(tm
, i
, i
)) then return end
1935 -- Handle opcodes defined with template strings.
1936 map_op
[".template__"] = function(params
, template
, nparams
)
1937 if not params
then return templatehelp(template
, nparams
) end
1940 -- Zero-operand opcodes have no match part.
1941 if #params
== 0 then
1942 dopattern(template
, args
, "d", params
.op
, nil)
1946 -- Determine common operand size (coerce undefined size) or flag as mixed.
1947 local sz
, szmix
, needrex
1948 for i
,p
in ipairs(params
) do
1949 args
[i
] = parseoperand(p
)
1950 local nsz
= args
[i
].opsize
1952 if sz
and sz
~= nsz
then szmix
= true else sz
= nsz
end
1954 local nrex
= args
[i
].needrex
1956 if needrex
== nil then
1958 elseif needrex
~= nrex
then
1959 werror("bad mix of byte-addressable registers")
1964 -- Try all match:pattern pairs (separated by '|').
1965 local gotmatch
, lastpat
1966 for tm
in gmatch(template
, "[^%|]+") do
1967 -- Split off size match (starts after mode match) and pattern string.
1968 local szm
, pat
= match(tm
, "^(.-):(.*)$", #args
+1)
1969 if pat
== "" then pat
= lastpat
else lastpat
= pat
end
1970 if matchtm(tm
, args
) then
1971 local prefix
= sub(szm
, 1, 1)
1972 if prefix
== "/" then -- Exactly match leading operand sizes.
1973 for i
= #szm
,1,-1 do
1975 dopattern(pat
, args
, sz
, params
.op
, needrex
) -- Process pattern.
1977 elseif args
[i
-1].opsize
~= sub(szm
, i
, i
) then
1981 else -- Match common operand size.
1983 if szm
== "" then szm
= x64
and "qdwb" or "dwb" end -- Default sizes.
1984 if prefix
== "1" then szp
= args
[1].opsize
; szmix
= nil
1985 elseif prefix
== "2" then szp
= args
[2].opsize
; szmix
= nil end
1986 if not szmix
and (prefix
== "." or match(szm
, szp
or "#")) then
1987 dopattern(pat
, args
, szp
, params
.op
, needrex
) -- Process pattern.
1995 local msg
= "bad operand mode"
1998 msg
= "mixed operand size"
2000 msg
= sz
and "bad operand size" or "missing operand size"
2004 werror(msg
.." in `"..opmodestr(params
.op
, args
).."'")
2007 ------------------------------------------------------------------------------
2009 -- x64-specific opcode for 64 bit immediates and displacements.
2011 function map_op
.mov64_2(params
)
2012 if not params
then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
2013 if secpos
+2 > maxsecpos
then wflush() end
2014 local opcode
, op64
, sz
, rex
, vreg
2015 local op64
= match(params
[1], "^%[%s*(.-)%s*%]$")
2017 local a
= parseoperand(params
[2])
2018 if a
.mode
~= "rmR" then werror("bad operand mode") end
2020 rex
= sz
== "q" and 8 or 0
2023 op64
= match(params
[2], "^%[%s*(.-)%s*%]$")
2024 local a
= parseoperand(params
[1])
2026 if a
.mode
~= "rmR" then werror("bad operand mode") end
2028 rex
= sz
== "q" and 8 or 0
2031 if sub(a
.mode
, 1, 1) ~= "r" or a
.opsize
~= "q" then
2032 werror("bad operand mode")
2039 opcode
= 0xb8 + band(a
.reg
, 7)
2041 rex
= a
.reg
> 7 and 9 or 8
2044 local psz
, sk
= wputop(sz
, opcode
, rex
, nil, vreg
)
2045 wvreg("opcode", vreg
, psz
, sk
)
2046 waction("IMM_D", format("(unsigned int)(%s)", op64
))
2047 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64
))
2051 ------------------------------------------------------------------------------
2053 -- Pseudo-opcodes for data storage.
2054 local function op_data(params
)
2055 if not params
then return "imm..." end
2056 local sz
= sub(params
.op
, 2, 2)
2057 if sz
== "a" then sz
= addrsize
end
2058 for _
,p
in ipairs(params
) do
2059 local a
= parseoperand(p
)
2060 if sub(a
.mode
, 1, 1) ~= "i" or (a
.opsize
and a
.opsize
~= sz
) then
2061 werror("bad mode or size in `"..p
.."'")
2063 if a
.mode
== "iJ" then
2064 wputlabel("IMM_", a
.imm
, 1)
2066 wputszarg(sz
, a
.imm
)
2068 if secpos
+2 > maxsecpos
then wflush() end
2072 map_op
[".byte_*"] = op_data
2073 map_op
[".sbyte_*"] = op_data
2074 map_op
[".word_*"] = op_data
2075 map_op
[".dword_*"] = op_data
2076 map_op
[".aword_*"] = op_data
2078 ------------------------------------------------------------------------------
2080 -- Pseudo-opcode to mark the position where the action list is to be emitted.
2081 map_op
[".actionlist_1"] = function(params
)
2082 if not params
then return "cvar" end
2083 local name
= params
[1] -- No syntax check. You get to keep the pieces.
2084 wline(function(out
) writeactions(out
, name
) end)
2087 -- Pseudo-opcode to mark the position where the global enum is to be emitted.
2088 map_op
[".globals_1"] = function(params
)
2089 if not params
then return "prefix" end
2090 local prefix
= params
[1] -- No syntax check. You get to keep the pieces.
2091 wline(function(out
) writeglobals(out
, prefix
) end)
2094 -- Pseudo-opcode to mark the position where the global names are to be emitted.
2095 map_op
[".globalnames_1"] = function(params
)
2096 if not params
then return "cvar" end
2097 local name
= params
[1] -- No syntax check. You get to keep the pieces.
2098 wline(function(out
) writeglobalnames(out
, name
) end)
2101 -- Pseudo-opcode to mark the position where the extern names are to be emitted.
2102 map_op
[".externnames_1"] = function(params
)
2103 if not params
then return "cvar" end
2104 local name
= params
[1] -- No syntax check. You get to keep the pieces.
2105 wline(function(out
) writeexternnames(out
, name
) end)
2108 ------------------------------------------------------------------------------
2110 -- Label pseudo-opcode (converted from trailing colon form).
2111 map_op
[".label_2"] = function(params
)
2112 if not params
then return "[1-9] | ->global | =>pcexpr [, addr]" end
2113 if secpos
+2 > maxsecpos
then wflush() end
2114 local a
= parseoperand(params
[1])
2115 local mode
, imm
= a
.mode
, a
.imm
2116 if type(imm
) == "number" and (mode
== "iJ" or (imm
>= 1 and imm
<= 9)) then
2117 -- Local label (1: ... 9:) or global label (->global:).
2118 waction("LABEL_LG", nil, 1)
2120 elseif mode
== "iJ" then
2121 -- PC label (=>pcexpr:).
2122 waction("LABEL_PC", imm
)
2124 werror("bad label definition")
2126 -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
2127 local addr
= params
[2]
2129 local a
= parseoperand(addr
)
2130 if a
.mode
== "iPJ" then
2131 waction("SETLABEL", a
.imm
)
2133 werror("bad label assignment")
2137 map_op
[".label_1"] = map_op
[".label_2"]
2139 ------------------------------------------------------------------------------
2141 -- Alignment pseudo-opcode.
2142 map_op
[".align_1"] = function(params
)
2143 if not params
then return "numpow2" end
2144 if secpos
+1 > maxsecpos
then wflush() end
2145 local align
= tonumber(params
[1]) or map_opsizenum
[map_opsize
[params
[1]]
]
2148 -- Must be a power of 2 in the range (2 ... 256).
2152 waction("ALIGN", nil, 1)
2153 wputxb(align
-1) -- Action byte is 2**n-1.
2158 werror("bad alignment")
2161 -- Spacing pseudo-opcode.
2162 map_op
[".space_2"] = function(params
)
2163 if not params
then return "num [, filler]" end
2164 if secpos
+1 > maxsecpos
then wflush() end
2165 waction("SPACE", params
[1])
2166 local fill
= params
[2]
2168 fill
= tonumber(fill
)
2169 if not fill
or fill
< 0 or fill
> 255 then werror("bad filler") end
2173 map_op
[".space_1"] = map_op
[".space_2"]
2175 ------------------------------------------------------------------------------
2177 -- Pseudo-opcode for (primitive) type definitions (map to C types).
2178 map_op
[".type_3"] = function(params
, nparams
)
2180 return nparams
== 2 and "name, ctype" or "name, ctype, reg"
2182 local name
, ctype
, reg
= params
[1], params
[2], params
[3]
2183 if not match(name
, "^[%a_][%w_]*$") then
2184 werror("bad type name `"..name
.."'")
2186 local tp
= map_type
[name
]
2188 werror("duplicate type `"..name
.."'")
2190 if reg
and not map_reg_valid_base
[reg
] then
2191 werror("bad base register `"..(map_reg_rev
[reg
] or reg
).."'")
2193 -- Add #type to defines. A bit unclean to put it in map_archdef.
2194 map_archdef
["#"..name
] = "sizeof("..ctype
..")"
2195 -- Add new type and emit shortcut define.
2196 local num
= ctypenum
+ 1
2199 ctypefmt
= format("Dt%X(%%s)", num
),
2202 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num
, ctype
))
2205 map_op
[".type_2"] = map_op
[".type_3"]
2207 -- Dump type definitions.
2208 local function dumptypes(out
, lvl
)
2210 for name
in pairs(map_type
) do t
[#t
+1] = name
end
2212 out
:write("Type definitions:\n")
2213 for _
,name
in ipairs(t
) do
2214 local tp
= map_type
[name
]
2215 local reg
= tp
.reg
and map_reg_rev
[tp
.reg
] or ""
2216 out
:write(format(" %-20s %-20s %s\n", name
, tp
.ctype
, reg
))
2221 ------------------------------------------------------------------------------
2223 -- Set the current section.
2224 function _M
.section(num
)
2227 wflush(true) -- SECTION is a terminal action.
2230 ------------------------------------------------------------------------------
2232 -- Dump architecture description.
2233 function _M
.dumparch(out
)
2234 out
:write(format("DynASM %s version %s, released %s\n\n",
2235 _info
.arch
, _info
.version
, _info
.release
))
2240 -- Dump all user defined elements.
2241 function _M
.dumpdef(out
, lvl
)
2243 dumpglobals(out
, lvl
)
2244 dumpexterns(out
, lvl
)
2247 ------------------------------------------------------------------------------
2249 -- Pass callbacks from/to the DynASM core.
2250 function _M
.passcb(wl
, we
, wf
, ww
)
2251 wline
, werror
, wfatal
, wwarn
= wl
, we
, wf
, ww
2255 -- Setup the arch-specific module.
2256 function _M
.setup(arch
, opt
)
2257 g_arch
, g_opt
= arch
, opt
2260 -- Merge the core maps and the arch-specific maps.
2261 function _M
.mergemaps(map_coreop
, map_def
)
2262 setmetatable(map_op
, { __index
= map_coreop
})
2263 setmetatable(map_def
, { __index
= map_archdef
})
2264 return map_op
, map_def
2269 ------------------------------------------------------------------------------