1 ----------------------------------------------------------------------------
2 -- LuaJIT x86/x64 disassembler module.
4 -- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
5 -- Released under the MIT license. See Copyright Notice in luajit.h
6 ----------------------------------------------------------------------------
7 -- This is a helper module used by the LuaJIT machine code dumper module.
9 -- Sending small code snippets to an external disassembler and mixing the
10 -- output with our own stuff was too fragile. So I had to bite the bullet
11 -- and write yet another x86 disassembler. Oh well ...
13 -- The output format is very similar to what ndisasm generates. But it has
14 -- been developed independently by looking at the opcode tables from the
15 -- Intel and AMD manuals. The supported instruction set is quite extensive
16 -- and reflects what a current generation Intel or AMD CPU implements in
17 -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
18 -- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
22 -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
23 -- * No attempt at optimization has been made -- it's fast enough for my needs.
24 -- * The public API may change when more architectures are added.
25 ------------------------------------------------------------------------------
28 local sub
, byte
, format = string.sub
, string.byte
, string.format
29 local match
, gmatch
, gsub = string.match
, string.gmatch
, string.gsub
30 local lower
, rep
= string.lower
, string.rep
31 local bit
= require("bit")
32 local tohex
= bit
.tohex
34 -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
37 [0]="addBmr","addVmr","addBrm","addVrm","addBai","addVai","push es","pop es",
38 "orBmr","orVmr","orBrm","orVrm","orBai","orVai","push cs","opc2*",
40 "adcBmr","adcVmr","adcBrm","adcVrm","adcBai","adcVai","push ss","pop ss",
41 "sbbBmr","sbbVmr","sbbBrm","sbbVrm","sbbBai","sbbVai","push ds","pop ds",
43 "andBmr","andVmr","andBrm","andVrm","andBai","andVai","es:seg","daa",
44 "subBmr","subVmr","subBrm","subVrm","subBai","subVai","cs:seg","das",
46 "xorBmr","xorVmr","xorBrm","xorVrm","xorBai","xorVai","ss:seg","aaa",
47 "cmpBmr","cmpVmr","cmpBrm","cmpVrm","cmpBai","cmpVai","ds:seg","aas",
49 "incVR","incVR","incVR","incVR","incVR","incVR","incVR","incVR",
50 "decVR","decVR","decVR","decVR","decVR","decVR","decVR","decVR",
52 "pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR",
53 "popUR","popUR","popUR","popUR","popUR","popUR","popUR","popUR",
55 "sz*pushaw,pusha","sz*popaw,popa","boundVrm","arplWmr",
56 "fs:seg","gs:seg","o16:","a16",
57 "pushUi","imulVrmi","pushBs","imulVrms",
58 "insb","insVS","outsb","outsVS",
60 "joBj","jnoBj","jbBj","jnbBj","jzBj","jnzBj","jbeBj","jaBj",
61 "jsBj","jnsBj","jpeBj","jpoBj","jlBj","jgeBj","jleBj","jgBj",
63 "arith!Bmi","arith!Vmi","arith!Bmi","arith!Vms",
64 "testBmr","testVmr","xchgBrm","xchgVrm",
65 "movBmr","movVmr","movBrm","movVrm",
66 "movVmg","leaVrm","movWgm","popUm",
68 "nop*xchgVaR|pause|xchgWaR|repne nop","xchgVaR","xchgVaR","xchgVaR",
69 "xchgVaR","xchgVaR","xchgVaR","xchgVaR",
70 "sz*cbw,cwde,cdqe","sz*cwd,cdq,cqo","call farViw","wait",
71 "sz*pushfw,pushf","sz*popfw,popf","sahf","lahf",
73 "movBao","movVao","movBoa","movVoa",
74 "movsb","movsVS","cmpsb","cmpsVS",
75 "testBai","testVai","stosb","stosVS",
76 "lodsb","lodsVS","scasb","scasVS",
78 "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
79 "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
81 "shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
82 "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
84 "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
85 "fp*0","fp*1","fp*2","fp*3","fp*4","fp*5","fp*6","fp*7",
87 "loopneBj","loopeBj","loopBj","sz*jcxzBj,jecxzBj,jrcxzBj",
88 "inBau","inVau","outBua","outVua",
89 "callVj","jmpVj","jmp farViw","jmpBj","inBad","inVad","outBda","outVda",
91 "lock:","int1","repne:rep","rep:","hlt","cmc","testb!Bm","testv!Vm",
92 "clc","stc","cli","sti","cld","std","incb!Bm","incd!Vm",
94 assert(#map_opc1_32
== 255)
96 -- Map for 1st opcode byte in 64 bit mode (overrides only).
97 local map_opc1_64
= setmetatable({
98 [0x06]=false, [0x07]=false, [0x0e]=false,
99 [0x16]=false, [0x17]=false, [0x1e]=false, [0x1f]=false,
100 [0x27]=false, [0x2f]=false, [0x37]=false, [0x3f]=false,
101 [0x60]=false, [0x61]=false, [0x62]=false, [0x63]="movsxdVrDmt", [0x67]="a32:",
102 [0x40]="rex*", [0x41]="rex*b", [0x42]="rex*x", [0x43]="rex*xb",
103 [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb",
104 [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb",
105 [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
106 [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
107 [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
108 }, { __index
= map_opc1_32
})
110 -- Map for 2nd opcode byte (0F xx). True CISC hell. Hey, I told you.
111 -- Prefix dependent MMX/SSE opcodes: (none)|rep|o16|repne, -|F3|66|F2
114 [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
115 "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
117 "movupsXrm|movssXrm|movupdXrm|movsdXrm",
118 "movupsXmr|movssXmr|movupdXmr|movsdXmr",
119 "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
120 "movlpsXmr||movlpdXmr",
121 "unpcklpsXrm||unpcklpdXrm",
122 "unpckhpsXrm||unpckhpdXrm",
123 "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
124 "movhpsXmr||movhpdXmr",
125 "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
126 "hintnopVm","hintnopVm","hintnopVm","hintnopVm",
128 "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
129 "movapsXrm||movapdXrm",
130 "movapsXmr||movapdXmr",
131 "cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt",
132 "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
133 "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
134 "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
135 "ucomissXrm||ucomisdXrm",
136 "comissXrm||comisdXrm",
138 "wrmsr","rdtsc","rdmsr","rdpmc","sysenter","sysexit",nil,"getsec",
139 "opc3*38",nil,"opc3*3a",nil,nil,nil,nil,nil,
141 "cmovoVrm","cmovnoVrm","cmovbVrm","cmovnbVrm",
142 "cmovzVrm","cmovnzVrm","cmovbeVrm","cmovaVrm",
143 "cmovsVrm","cmovnsVrm","cmovpeVrm","cmovpoVrm",
144 "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
146 "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
147 "rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
148 "andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
149 "orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
150 "addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
151 "cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
152 "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
153 "subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
154 "divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
156 "punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
157 "pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
158 "punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
159 "||punpcklqdqXrm","||punpckhqdqXrm",
160 "movPrVSm","movqMrm|movdquXrm|movdqaXrm",
162 "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
163 "pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
164 "pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
165 "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
167 "||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
168 "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
170 "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
171 "jsVj","jnsVj","jpeVj","jpoVj","jlVj","jgeVj","jleVj","jgVj",
173 "setoBm","setnoBm","setbBm","setnbBm","setzBm","setnzBm","setbeBm","setaBm",
174 "setsBm","setnsBm","setpeBm","setpoBm","setlBm","setgeBm","setleBm","setgBm",
176 "push fs","pop fs","cpuid","btVmr","shldVmru","shldVmrc",nil,nil,
177 "push gs","pop gs","rsm","btsVmr","shrdVmru","shrdVmrc","fxsave!Dmp","imulVrm",
179 "cmpxchgBmr","cmpxchgVmr","$lssVrm","btrVmr",
180 "$lfsVrm","$lgsVrm","movzxVrBmt","movzxVrWmt",
181 "|popcntVrm","ud2Dp","bt!Vmu","btcVmr",
182 "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
185 "cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
186 "pinsrwPrWmu","pextrwDrPmu",
187 "shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
188 "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
190 "||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
191 "paddqPrm","pmullwPrm",
192 "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
193 "psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
194 "paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
196 "pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
197 "pmulhuwPrm","pmulhwPrm",
198 "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
199 "psubsbPrm","psubswPrm","pminswPrm","porPrm",
200 "paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
202 "|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
203 "pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
204 "psubbPrm","psubwPrm","psubdPrm","psubqPrm",
205 "paddbPrm","paddwPrm","padddPrm","ud",
207 assert(map_opc2
[255] == "ud")
209 -- Map for three-byte opcodes. Can't wait for their next invention.
211 ["38"] = { -- [66] 0f 38 xx
213 [0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
214 "pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
215 "psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
218 "||pblendvbXrma",nil,nil,nil,
219 "||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
221 "pabsbPrm","pabswPrm","pabsdPrm",nil,
223 "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
224 "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
225 "||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
228 "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
229 "||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
230 "||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
231 "||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
233 "||pmulddXrm","||phminposuwXrm",
235 [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
238 ["3a"] = { -- [66] 0f 3a xx
240 [0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
241 "||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
242 "||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
245 "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
246 nil,nil,nil,nil,nil,nil,nil,nil,
248 "||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
250 [0x40] = "||dppsXrmu",
251 [0x41] = "||dppdXrmu",
252 [0x42] = "||mpsadbwXrmu",
254 [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
255 [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
259 -- Map for VMX/SVM opcodes 0F 01 C0-FF (sgdt group with register operands).
261 [0xc1]="vmcall",[0xc2]="vmlaunch",[0xc3]="vmresume",[0xc4]="vmxoff",
262 [0xc8]="monitor",[0xc9]="mwait",
263 [0xd8]="vmrun",[0xd9]="vmmcall",[0xda]="vmload",[0xdb]="vmsave",
264 [0xdc]="stgi",[0xdd]="clgi",[0xde]="skinit",[0xdf]="invlpga",
265 [0xf8]="swapgs",[0xf9]="rdtscp",
268 -- Map for FP opcodes. And you thought stack machines are simple?
270 -- D8-DF 00-BF: opcodes with a memory operand.
272 [0]="faddFm","fmulFm","fcomFm","fcompFm","fsubFm","fsubrFm","fdivFm","fdivrFm",
273 "fldFm",nil,"fstFm","fstpFm","fldenvVm","fldcwWm","fnstenvVm","fnstcwWm",
275 "fiaddDm","fimulDm","ficomDm","ficompDm",
276 "fisubDm","fisubrDm","fidivDm","fidivrDm",
278 "fildDm","fisttpDm","fistDm","fistpDm",nil,"fld twordFmp",nil,"fstp twordFmp",
280 "faddGm","fmulGm","fcomGm","fcompGm","fsubGm","fsubrGm","fdivGm","fdivrGm",
282 "fldGm","fisttpQm","fstGm","fstpGm","frstorDmp",nil,"fnsaveDmp","fnstswWm",
284 "fiaddWm","fimulWm","ficomWm","ficompWm",
285 "fisubWm","fisubrWm","fidivWm","fidivrWm",
287 "fildWm","fisttpWm","fistWm","fistpWm",
288 "fbld twordFmp","fildQm","fbstp twordFmp","fistpQm",
289 -- xx C0-FF: opcodes with a pseudo-register operand.
291 "faddFf","fmulFf","fcomFf","fcompFf","fsubFf","fsubrFf","fdivFf","fdivrFf",
293 "fldFf","fxchFf",{"fnop"},nil,
294 {"fchs","fabs",nil,nil,"ftst","fxam"},
295 {"fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz"},
296 {"f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp"},
297 {"fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"},
299 "fcmovbFf","fcmoveFf","fcmovbeFf","fcmovuFf",nil,{nil,"fucompp"},nil,nil,
301 "fcmovnbFf","fcmovneFf","fcmovnbeFf","fcmovnuFf",
302 {nil,nil,"fnclex","fninit"},"fucomiFf","fcomiFf",nil,
304 "fadd toFf","fmul toFf",nil,nil,
305 "fsub toFf","fsubr toFf","fdivr toFf","fdiv toFf",
307 "ffreeFf",nil,"fstFf","fstpFf","fucomFf","fucompFf",nil,nil,
309 "faddpFf","fmulpFf",nil,{nil,"fcompp"},
310 "fsubrpFf","fsubpFf","fdivrpFf","fdivpFf",
312 nil,nil,nil,nil,{"fnstsw ax"},"fucomipFf","fcomipFf",nil,
314 assert(map_opcfp
[126] == "fcomipFf")
316 -- Map for opcode groups. The subkey is sp from the ModRM byte.
317 local map_opcgroup
= {
318 arith
= { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" },
319 shift
= { "rol", "ror", "rcl", "rcr", "shl", "shr", "sal", "sar" },
320 testb
= { "testBmi", "testBmi", "not", "neg", "mul", "imul", "div", "idiv" },
321 testv
= { "testVmi", "testVmi", "not", "neg", "mul", "imul", "div", "idiv" },
322 incb
= { "inc", "dec" },
323 incd
= { "inc", "dec", "callUmp", "$call farDmp",
324 "jmpUmp", "$jmp farDmp", "pushUm" },
325 sldt
= { "sldt", "str", "lldt", "ltr", "verr", "verw" },
326 sgdt
= { "vm*$sgdt", "vm*$sidt", "$lgdt", "vm*$lidt",
327 "smsw", nil, "lmsw", "vm*$invlpg" },
328 bt
= { nil, nil, nil, nil, "bt", "bts", "btr", "btc" },
329 cmpxchg
= { nil, "sz*,cmpxchg8bQmp,cmpxchg16bXmp", nil, nil,
330 nil, nil, "vmptrld|vmxon|vmclear", "vmptrst" },
331 pshiftw
= { nil, nil, "psrlw", nil, "psraw", nil, "psllw" },
332 pshiftd
= { nil, nil, "psrld", nil, "psrad", nil, "pslld" },
333 pshiftq
= { nil, nil, "psrlq", nil, nil, nil, "psllq" },
334 pshiftdq
= { nil, nil, "psrlq", "psrldq", nil, nil, "psllq", "pslldq" },
335 fxsave
= { "$fxsave", "$fxrstor", "$ldmxcsr", "$stmxcsr",
336 nil, "lfenceDp$", "mfenceDp$", "sfenceDp$clflush" },
337 prefetch
= { "prefetch", "prefetchw" },
338 prefetcht
= { "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2" },
341 ------------------------------------------------------------------------------
343 -- Maps for register names.
345 B
= { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
346 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
347 B64
= { "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil",
348 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
349 W
= { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
350 "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" },
351 D
= { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
352 "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" },
353 Q
= { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
354 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
355 M
= { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
356 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
357 X
= { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
358 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
360 local map_segregs
= { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
362 -- Maps for size names.
364 B
= 1, W
= 2, D
= 4, Q
= 8, M
= 8, X
= 16,
366 local map_sz2prefix
= {
367 B
= "byte", W
= "word", D
= "dword",
369 M
= "qword", X
= "xword",
370 F
= "dword", G
= "qword", -- No need for sizes/register names for these two.
373 ------------------------------------------------------------------------------
375 -- Output a nicely formatted line with an opcode and operands.
376 local function putop(ctx
, text
, operands
)
377 local code
, pos
, hex
= ctx
.code
, ctx
.pos
, ""
378 local hmax
= ctx
.hexdump
380 for i
=ctx
.start
,pos
-1 do
381 hex
= hex
..format("%02X", byte(code
, i
, i
))
383 if #hex
> hmax
then hex
= sub(hex
, 1, hmax
)..". "
384 else hex
= hex
..rep(" ", hmax
-#hex
+2) end
386 if operands
then text
= text
.." "..operands
end
387 if ctx
.o16
then text
= "o16 "..text
; ctx
.o16
= false end
388 if ctx
.a32
then text
= "a32 "..text
; ctx
.a32
= false end
389 if ctx
.rep
then text
= ctx
.rep
.." "..text
; ctx
.rep
= false end
391 local t
= (ctx
.rexw
and "w" or "")..(ctx
.rexr
and "r" or "")..
392 (ctx
.rexx
and "x" or "")..(ctx
.rexb
and "b" or "")
393 if t
~= "" then text
= "rex."..t
.." "..text
end
394 ctx
.rexw
= false; ctx
.rexr
= false; ctx
.rexx
= false; ctx
.rexb
= false
398 local text2
, n
= gsub(text
, "%[", "["..ctx
.seg
..":")
399 if n
== 0 then text
= ctx
.seg
.." "..text
else text
= text2
end
402 if ctx
.lock
then text
= "lock "..text
; ctx
.lock
= false end
405 local sym
= ctx
.symtab
[imm
]
406 if sym
then text
= text
.."\t->"..sym
end
408 ctx
.out(format("%08x %s%s\n", ctx
.addr
+ctx
.start
, hex
, text
))
414 -- Clear all prefix flags.
415 local function clearprefixes(ctx
)
416 ctx
.o16
= false; ctx
.seg
= false; ctx
.lock
= false; ctx
.rep
= false
417 ctx
.rexw
= false; ctx
.rexr
= false; ctx
.rexx
= false; ctx
.rexb
= false
418 ctx
.rex
= false; ctx
.a32
= false
421 -- Fallback for incomplete opcodes at the end.
422 local function incomplete(ctx
)
425 return putop(ctx
, "(incomplete)")
428 -- Fallback for unknown opcodes.
429 local function unknown(ctx
)
431 return putop(ctx
, "(unknown)")
434 -- Return an immediate of the specified size.
435 local function getimm(ctx
, pos
, n
)
436 if pos
+n
-1 > ctx
.stop
then return incomplete(ctx
) end
437 local code
= ctx
.code
439 local b1
= byte(code
, pos
, pos
)
442 local b1
, b2
= byte(code
, pos
, pos
+1)
445 local b1
, b2
, b3
, b4
= byte(code
, pos
, pos
+3)
446 local imm
= b1
+b2
*256+b3
*65536+b4
*16777216
452 -- Process pattern string and generate the operands.
453 local function putpat(ctx
, name
, pat
)
454 local operands
, regs
, sz
, mode
, sp
, rm
, sc
, rx
, sdisp
455 local code
, pos
, stop
= ctx
.code
, ctx
.pos
, ctx
.stop
457 -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
458 for p
in gmatch(pat
, ".") do
460 if p
== "V" or p
== "U" then
461 if ctx
.rexw
then sz
= "Q"; ctx
.rexw
= false
462 elseif ctx
.o16
then sz
= "W"; ctx
.o16
= false
463 elseif p
== "U" and ctx
.x64
then sz
= "Q"
467 if ctx
.rexw
then sz
= "Q"; ctx
.rexw
= false else sz
= "D" end
471 regs
= ctx
.rex
and map_regs
.B64
or map_regs
.B
472 elseif match(p
, "[WDQMXFG]") then
476 sz
= ctx
.o16
and "X" or "M"; ctx
.o16
= false
479 name
= name
..lower(sz
)
481 local imm
= getimm(ctx
, pos
, 1); if not imm
then return end
482 x
= imm
<= 127 and format("+0x%02x", imm
)
483 or format("-0x%02x", 256-imm
)
486 local imm
= getimm(ctx
, pos
, 1); if not imm
then return end
487 x
= format("0x%02x", imm
)
490 local imm
= getimm(ctx
, pos
, 2); if not imm
then return end
491 x
= format("0x%x", imm
)
493 elseif p
== "o" then -- [offset]
495 local imm1
= getimm(ctx
, pos
, 4); if not imm1
then return end
496 local imm2
= getimm(ctx
, pos
+4, 4); if not imm2
then return end
497 x
= format("[0x%08x%08x]", imm2
, imm1
)
500 local imm
= getimm(ctx
, pos
, 4); if not imm
then return end
501 x
= format("[0x%08x]", imm
)
504 elseif p
== "i" or p
== "I" then
505 local n
= map_sz2n
[sz
]
506 if n
== 8 and ctx
.x64
and p
== "I" then
507 local imm1
= getimm(ctx
, pos
, 4); if not imm1
then return end
508 local imm2
= getimm(ctx
, pos
+4, 4); if not imm2
then return end
509 x
= format("0x%08x%08x", imm2
, imm1
)
511 if n
== 8 then n
= 4 end
512 local imm
= getimm(ctx
, pos
, n
); if not imm
then return end
513 if sz
== "Q" and (imm
< 0 or imm
> 0x7fffffff) then
514 imm
= (0xffffffff+1)-imm
515 x
= format(imm
> 65535 and "-0x%08x" or "-0x%x", imm
)
517 x
= format(imm
> 65535 and "0x%08x" or "0x%x", imm
)
522 local n
= map_sz2n
[sz
]
523 if n
== 8 then n
= 4 end
524 local imm
= getimm(ctx
, pos
, n
); if not imm
then return end
525 if sz
== "B" and imm
> 127 then imm
= imm
-256
526 elseif imm
> 2147483647 then imm
= imm
-4294967296 end
528 imm
= imm
+ pos
+ ctx
.addr
529 if imm
> 4294967295 and not ctx
.x64
then imm
= imm
-4294967296 end
532 x
= format("word 0x%04x", imm
%65536)
534 local lo
= imm
% 0x1000000
535 x
= format("0x%02x%06x", (imm
-lo
) / 0x1000000, lo
)
540 local r
= byte(code
, pos
-1, pos
-1)%8
541 if ctx
.rexb
then r
= r
+ 8; ctx
.rexb
= false end
543 elseif p
== "a" then x
= regs
[1]
544 elseif p
== "c" then x
= "cl"
545 elseif p
== "d" then x
= "dx"
546 elseif p
== "1" then x
= "1"
551 if pos
> stop
then return incomplete(ctx
) end
552 mode
= byte(code
, pos
, pos
)
555 rm
= mode
%8; mode
= (mode
-rm
)/8
556 sp
= mode
%8; mode
= (mode
-sp
)/8
560 if pos
> stop
then return incomplete(ctx
) end
561 sc
= byte(code
, pos
, pos
)
563 rm
= sc
%8; sc
= (sc
-rm
)/8
564 rx
= sc
%8; sc
= (sc
-rx
)/8
565 if ctx
.rexx
then rx
= rx
+ 8; ctx
.rexx
= false end
566 if rx
== 4 then rx
= nil end
568 if mode
> 0 or rm
== 5 then
570 if dsz
~= 1 then dsz
= 4 end
571 local disp
= getimm(ctx
, pos
, dsz
); if not disp
then return end
572 if mode
== 0 then rm
= nil end
573 if rm
or rx
or (not sc
and ctx
.x64
and not ctx
.a32
) then
574 if dsz
== 1 and disp
> 127 then
575 sdisp
= format("-0x%x", 256-disp
)
576 elseif disp
>= 0 and disp
<= 0x7fffffff then
577 sdisp
= format("+0x%x", disp
)
579 sdisp
= format("-0x%x", (0xffffffff+1)-disp
)
582 sdisp
= format(ctx
.x64
and not ctx
.a32
and
583 not (disp
>= 0 and disp
<= 0x7fffffff)
584 and "0xffffffff%08x" or "0x%08x", disp
)
589 if rm
and ctx
.rexb
then rm
= rm
+ 8; ctx
.rexb
= false end
590 if ctx
.rexr
then sp
= sp
+ 8; ctx
.rexr
= false end
593 if mode
== 3 then x
= regs
[rm
+1]
595 local aregs
= ctx
.a32
and map_regs
.D
or ctx
.aregs
596 local srm
, srx
= "", ""
597 if rm
then srm
= aregs
[rm
+1]
598 elseif not sc
and ctx
.x64
and not ctx
.a32
then srm
= "rip" end
601 if rm
then srm
= srm
.."+" end
603 if sc
> 0 then srx
= srx
.."*"..(2^sc
) end
605 x
= format("[%s%s%s]", srm
, srx
, sdisp
)
608 (not match(pat
, "[aRrgp]") or match(pat
, "t")) then -- Yuck.
609 x
= map_sz2prefix
[sz
].." "..x
611 elseif p
== "r" then x
= regs
[sp
+1]
612 elseif p
== "g" then x
= map_segregs
[sp
+1]
613 elseif p
== "p" then -- Suppress prefix.
614 elseif p
== "f" then x
= "st"..rm
616 if sp
== 0 and ctx
.lock
and not ctx
.x64
then
617 x
= "CR8"; ctx
.lock
= false
621 elseif p
== "y" then x
= "DR"..sp
622 elseif p
== "z" then x
= "TR"..sp
625 error("bad pattern `"..pat
.."'")
628 if x
then operands
= operands
and operands
..", "..x
or x
end
631 return putop(ctx
, name
, operands
)
634 -- Forward declaration.
637 -- Fetch and cache MRM byte.
638 local function getmrm(ctx
)
642 if pos
> ctx
.stop
then return nil end
643 mrm
= byte(ctx
.code
, pos
, pos
)
650 -- Dispatch to handler depending on pattern.
651 local function dispatch(ctx
, opat
, patgrp
)
652 if not opat
then return unknown(ctx
) end
653 if match(opat
, "%|") then -- MMX/SSE variants depending on prefix.
656 p
= ctx
.rep
=="rep" and "%|([^%|]*)" or "%|[^%|]*%|[^%|]*%|([^%|]*)"
658 elseif ctx
.o16
then p
= "%|[^%|]*%|([^%|]*)"; ctx
.o16
= false
659 else p
= "^[^%|]*" end
660 opat
= match(opat
, p
)
661 if not opat
then return unknown(ctx
) end
662 -- ctx.rep = false; ctx.o16 = false
663 --XXX fails for 66 f2 0f 38 f1 06 crc32 eax,WORD PTR [esi]
664 --XXX remove in branches?
666 if match(opat
, "%$") then -- reg$mem variants.
667 local mrm
= getmrm(ctx
); if not mrm
then return incomplete(ctx
) end
668 opat
= match(opat
, mrm
>= 192 and "^[^%$]*" or "%$(.*)")
669 if opat
== "" then return unknown(ctx
) end
671 if opat
== "" then return unknown(ctx
) end
672 local name
, pat
= match(opat
, "^([a-z0-9 ]*)(.*)")
673 if pat
== "" and patgrp
then pat
= patgrp
end
674 return map_act
[sub(pat
, 1, 1)](ctx
, name
, pat
)
677 -- Get a pattern from an opcode map and dispatch to handler.
678 local function dispatchmap(ctx
, opcmap
)
680 local opat
= opcmap
[byte(ctx
.code
, pos
, pos
)]
683 return dispatch(ctx
, opat
)
686 -- Map for action codes. The key is the first char after the name.
688 -- Simple opcodes without operands.
689 [""] = function(ctx
, name
, pat
)
690 return putop(ctx
, name
)
693 -- Operand size chars fall right through.
694 B
= putpat
, W
= putpat
, D
= putpat
, Q
= putpat
,
695 V
= putpat
, U
= putpat
, T
= putpat
,
696 M
= putpat
, X
= putpat
, P
= putpat
,
697 F
= putpat
, G
= putpat
,
700 [":"] = function(ctx
, name
, pat
)
701 ctx
[pat
== ":" and name
or sub(pat
, 2)] = name
702 if ctx
.pos
- ctx
.start
> 5 then return unknown(ctx
) end -- Limit #prefixes.
705 -- Chain to special handler specified by name.
706 ["*"] = function(ctx
, name
, pat
)
707 return map_act
[name
](ctx
, name
, sub(pat
, 2))
710 -- Use named subtable for opcode group.
711 ["!"] = function(ctx
, name
, pat
)
712 local mrm
= getmrm(ctx
); if not mrm
then return incomplete(ctx
) end
713 return dispatch(ctx
, map_opcgroup
[name
][((mrm
-(mrm
%8))/8)%8+1], sub(pat
, 2))
716 -- o16,o32[,o64] variants.
717 sz
= function(ctx
, name
, pat
)
718 if ctx
.o16
then ctx
.o16
= false
720 pat
= match(pat
, ",(.*)")
722 local p
= match(pat
, ",(.*)")
723 if p
then pat
= p
; ctx
.rexw
= false end
726 pat
= match(pat
, "^[^,]*")
727 return dispatch(ctx
, pat
)
730 -- Two-byte opcode dispatch.
731 opc2
= function(ctx
, name
, pat
)
732 return dispatchmap(ctx
, map_opc2
)
735 -- Three-byte opcode dispatch.
736 opc3
= function(ctx
, name
, pat
)
737 return dispatchmap(ctx
, map_opc3
[pat
])
741 vm
= function(ctx
, name
, pat
)
742 return dispatch(ctx
, map_opcvm
[ctx
.mrm
])
745 -- Floating point opcode dispatch.
746 fp
= function(ctx
, name
, pat
)
747 local mrm
= getmrm(ctx
); if not mrm
then return incomplete(ctx
) end
749 local idx
= pat
*8 + ((mrm
-rm
)/8)%8
750 if mrm
>= 192 then idx
= idx
+ 64 end
751 local opat
= map_opcfp
[idx
]
752 if type(opat
) == "table" then opat
= opat
[rm
+1] end
753 return dispatch(ctx
, opat
)
757 rex
= function(ctx
, name
, pat
)
758 if ctx
.rex
then return unknown(ctx
) end -- Only 1 REX prefix allowed.
759 for p
in gmatch(pat
, ".") do ctx
["rex"..p
] = true end
763 -- Special case for nop with REX prefix.
764 nop
= function(ctx
, name
, pat
)
765 return dispatch(ctx
, ctx
.rex
and pat
or "nop")
769 ------------------------------------------------------------------------------
771 -- Disassemble a block of code.
772 local function disass_block(ctx
, ofs
, len
)
773 if not ofs
then ofs
= 0 end
774 local stop
= len
and ofs
+len
or #ctx
.code
782 while ctx
.pos
<= stop
do dispatchmap(ctx
, ctx
.map1
) end
783 if ctx
.pos
~= ctx
.start
then incomplete(ctx
) end
786 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
787 local function create(code
, addr
, out
)
790 ctx
.addr
= (addr
or 0) - 1
791 ctx
.out
= out
or io
.write
793 ctx
.disass
= disass_block
796 ctx
.map1
= map_opc1_32
797 ctx
.aregs
= map_regs
.D
801 local function create64(code
, addr
, out
)
802 local ctx
= create(code
, addr
, out
)
804 ctx
.map1
= map_opc1_64
805 ctx
.aregs
= map_regs
.Q
809 -- Simple API: disassemble code (a string) at address and output via out.
810 local function disass(code
, addr
, out
)
811 create(code
, addr
, out
):disass()
814 local function disass64(code
, addr
, out
)
815 create64(code
, addr
, out
):disass()
818 -- Return register name for RID.
819 local function regname(r
)
820 if r
< 8 then return map_regs
.D
[r
+1] end
821 return map_regs
.X
[r
-7]
824 local function regname64(r
)
825 if r
< 16 then return map_regs
.Q
[r
+1] end
826 return map_regs
.X
[r
-15]
829 -- Public module functions.
836 regname64
= regname64