1 ----------------------------------------------------------------------------
2 -- LuaJIT x86/x64 disassembler module.
4 -- Copyright (C) 2005-2011 Mike Pall. All rights reserved.
5 -- Released under the MIT license. See Copyright Notice in luajit.h
6 ----------------------------------------------------------------------------
7 -- This is a helper module used by the LuaJIT machine code dumper module.
9 -- Sending small code snippets to an external disassembler and mixing the
10 -- output with our own stuff was too fragile. So I had to bite the bullet
11 -- and write yet another x86 disassembler. Oh well ...
13 -- The output format is very similar to what ndisasm generates. But it has
14 -- been developed independently by looking at the opcode tables from the
15 -- Intel and AMD manuals. The supported instruction set is quite extensive
16 -- and reflects what a current generation Intel or AMD CPU implements in
17 -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
18 -- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
22 -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
23 -- * No attempt at optimization has been made -- it's fast enough for my needs.
24 -- * The public API may change when more architectures are added.
25 ------------------------------------------------------------------------------
28 local sub
, byte
, format = string.sub
, string.byte
, string.format
29 local match
, gmatch
, gsub = string.match
, string.gmatch
, string.gsub
30 local lower
, rep
= string.lower
, string.rep
32 -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
35 [0]="addBmr","addVmr","addBrm","addVrm","addBai","addVai","push es","pop es",
36 "orBmr","orVmr","orBrm","orVrm","orBai","orVai","push cs","opc2*",
38 "adcBmr","adcVmr","adcBrm","adcVrm","adcBai","adcVai","push ss","pop ss",
39 "sbbBmr","sbbVmr","sbbBrm","sbbVrm","sbbBai","sbbVai","push ds","pop ds",
41 "andBmr","andVmr","andBrm","andVrm","andBai","andVai","es:seg","daa",
42 "subBmr","subVmr","subBrm","subVrm","subBai","subVai","cs:seg","das",
44 "xorBmr","xorVmr","xorBrm","xorVrm","xorBai","xorVai","ss:seg","aaa",
45 "cmpBmr","cmpVmr","cmpBrm","cmpVrm","cmpBai","cmpVai","ds:seg","aas",
47 "incVR","incVR","incVR","incVR","incVR","incVR","incVR","incVR",
48 "decVR","decVR","decVR","decVR","decVR","decVR","decVR","decVR",
50 "pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR",
51 "popUR","popUR","popUR","popUR","popUR","popUR","popUR","popUR",
53 "sz*pushaw,pusha","sz*popaw,popa","boundVrm","arplWmr",
54 "fs:seg","gs:seg","o16:","a16",
55 "pushUi","imulVrmi","pushBs","imulVrms",
56 "insb","insVS","outsb","outsVS",
58 "joBj","jnoBj","jbBj","jnbBj","jzBj","jnzBj","jbeBj","jaBj",
59 "jsBj","jnsBj","jpeBj","jpoBj","jlBj","jgeBj","jleBj","jgBj",
61 "arith!Bmi","arith!Vmi","arith!Bmi","arith!Vms",
62 "testBmr","testVmr","xchgBrm","xchgVrm",
63 "movBmr","movVmr","movBrm","movVrm",
64 "movVmg","leaVrm","movWgm","popUm",
66 "nop*xchgVaR|pause|xchgWaR|repne nop","xchgVaR","xchgVaR","xchgVaR",
67 "xchgVaR","xchgVaR","xchgVaR","xchgVaR",
68 "sz*cbw,cwde,cdqe","sz*cwd,cdq,cqo","call farViw","wait",
69 "sz*pushfw,pushf","sz*popfw,popf","sahf","lahf",
71 "movBao","movVao","movBoa","movVoa",
72 "movsb","movsVS","cmpsb","cmpsVS",
73 "testBai","testVai","stosb","stosVS",
74 "lodsb","lodsVS","scasb","scasVS",
76 "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
77 "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
79 "shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
80 "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
82 "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
83 "fp*0","fp*1","fp*2","fp*3","fp*4","fp*5","fp*6","fp*7",
85 "loopneBj","loopeBj","loopBj","sz*jcxzBj,jecxzBj,jrcxzBj",
86 "inBau","inVau","outBua","outVua",
87 "callVj","jmpVj","jmp farViw","jmpBj","inBad","inVad","outBda","outVda",
89 "lock:","int1","repne:rep","rep:","hlt","cmc","testb!Bm","testv!Vm",
90 "clc","stc","cli","sti","cld","std","incb!Bm","incd!Vm",
92 assert(#map_opc1_32
== 255)
94 -- Map for 1st opcode byte in 64 bit mode (overrides only).
95 local map_opc1_64
= setmetatable({
96 [0x06]=false, [0x07]=false, [0x0e]=false,
97 [0x16]=false, [0x17]=false, [0x1e]=false, [0x1f]=false,
98 [0x27]=false, [0x2f]=false, [0x37]=false, [0x3f]=false,
99 [0x60]=false, [0x61]=false, [0x62]=false, [0x63]="movsxdVrDmt", [0x67]="a32:",
100 [0x40]="rex*", [0x41]="rex*b", [0x42]="rex*x", [0x43]="rex*xb",
101 [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb",
102 [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb",
103 [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
104 [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
105 [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
106 }, { __index
= map_opc1_32
})
108 -- Map for 2nd opcode byte (0F xx). True CISC hell. Hey, I told you.
109 -- Prefix dependent MMX/SSE opcodes: (none)|rep|o16|repne, -|F3|66|F2
112 [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
113 "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
115 "movupsXrm|movssXrm|movupdXrm|movsdXrm",
116 "movupsXmr|movssXmr|movupdXmr|movsdXmr",
117 "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
118 "movlpsXmr||movlpdXmr",
119 "unpcklpsXrm||unpcklpdXrm",
120 "unpckhpsXrm||unpckhpdXrm",
121 "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
122 "movhpsXmr||movhpdXmr",
123 "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
124 "hintnopVm","hintnopVm","hintnopVm","hintnopVm",
126 "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
127 "movapsXrm||movapdXrm",
128 "movapsXmr||movapdXmr",
129 "cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt",
130 "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
131 "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
132 "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
133 "ucomissXrm||ucomisdXrm",
134 "comissXrm||comisdXrm",
136 "wrmsr","rdtsc","rdmsr","rdpmc","sysenter","sysexit",nil,"getsec",
137 "opc3*38",nil,"opc3*3a",nil,nil,nil,nil,nil,
139 "cmovoVrm","cmovnoVrm","cmovbVrm","cmovnbVrm",
140 "cmovzVrm","cmovnzVrm","cmovbeVrm","cmovaVrm",
141 "cmovsVrm","cmovnsVrm","cmovpeVrm","cmovpoVrm",
142 "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
144 "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
145 "rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
146 "andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
147 "orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
148 "addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
149 "cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
150 "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
151 "subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
152 "divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
154 "punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
155 "pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
156 "punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
157 "||punpcklqdqXrm","||punpckhqdqXrm",
158 "movPrVSm","movqMrm|movdquXrm|movdqaXrm",
160 "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
161 "pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
162 "pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
163 "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
165 "||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
166 "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
168 "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
169 "jsVj","jnsVj","jpeVj","jpoVj","jlVj","jgeVj","jleVj","jgVj",
171 "setoBm","setnoBm","setbBm","setnbBm","setzBm","setnzBm","setbeBm","setaBm",
172 "setsBm","setnsBm","setpeBm","setpoBm","setlBm","setgeBm","setleBm","setgBm",
174 "push fs","pop fs","cpuid","btVmr","shldVmru","shldVmrc",nil,nil,
175 "push gs","pop gs","rsm","btsVmr","shrdVmru","shrdVmrc","fxsave!Dmp","imulVrm",
177 "cmpxchgBmr","cmpxchgVmr","$lssVrm","btrVmr",
178 "$lfsVrm","$lgsVrm","movzxVrBmt","movzxVrWmt",
179 "|popcntVrm","ud2Dp","bt!Vmu","btcVmr",
180 "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
183 "cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
184 "pinsrwPrWmu","pextrwDrPmu",
185 "shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
186 "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
188 "||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
189 "paddqPrm","pmullwPrm",
190 "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
191 "psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
192 "paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
194 "pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
195 "pmulhuwPrm","pmulhwPrm",
196 "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
197 "psubsbPrm","psubswPrm","pminswPrm","porPrm",
198 "paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
200 "|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
201 "pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
202 "psubbPrm","psubwPrm","psubdPrm","psubqPrm",
203 "paddbPrm","paddwPrm","padddPrm","ud",
205 assert(map_opc2
[255] == "ud")
207 -- Map for three-byte opcodes. Can't wait for their next invention.
209 ["38"] = { -- [66] 0f 38 xx
211 [0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
212 "pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
213 "psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
216 "||pblendvbXrma",nil,nil,nil,
217 "||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
219 "pabsbPrm","pabswPrm","pabsdPrm",nil,
221 "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
222 "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
223 "||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
226 "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
227 "||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
228 "||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
229 "||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
231 "||pmulddXrm","||phminposuwXrm",
233 [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
236 ["3a"] = { -- [66] 0f 3a xx
238 [0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
239 "||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
240 "||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
243 "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
244 nil,nil,nil,nil,nil,nil,nil,nil,
246 "||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
248 [0x40] = "||dppsXrmu",
249 [0x41] = "||dppdXrmu",
250 [0x42] = "||mpsadbwXrmu",
252 [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
253 [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
257 -- Map for VMX/SVM opcodes 0F 01 C0-FF (sgdt group with register operands).
259 [0xc1]="vmcall",[0xc2]="vmlaunch",[0xc3]="vmresume",[0xc4]="vmxoff",
260 [0xc8]="monitor",[0xc9]="mwait",
261 [0xd8]="vmrun",[0xd9]="vmmcall",[0xda]="vmload",[0xdb]="vmsave",
262 [0xdc]="stgi",[0xdd]="clgi",[0xde]="skinit",[0xdf]="invlpga",
263 [0xf8]="swapgs",[0xf9]="rdtscp",
266 -- Map for FP opcodes. And you thought stack machines are simple?
268 -- D8-DF 00-BF: opcodes with a memory operand.
270 [0]="faddFm","fmulFm","fcomFm","fcompFm","fsubFm","fsubrFm","fdivFm","fdivrFm",
271 "fldFm",nil,"fstFm","fstpFm","fldenvVm","fldcwWm","fnstenvVm","fnstcwWm",
273 "fiaddDm","fimulDm","ficomDm","ficompDm",
274 "fisubDm","fisubrDm","fidivDm","fidivrDm",
276 "fildDm","fisttpDm","fistDm","fistpDm",nil,"fld twordFmp",nil,"fstp twordFmp",
278 "faddGm","fmulGm","fcomGm","fcompGm","fsubGm","fsubrGm","fdivGm","fdivrGm",
280 "fldGm","fisttpQm","fstGm","fstpGm","frstorDmp",nil,"fnsaveDmp","fnstswWm",
282 "fiaddWm","fimulWm","ficomWm","ficompWm",
283 "fisubWm","fisubrWm","fidivWm","fidivrWm",
285 "fildWm","fisttpWm","fistWm","fistpWm",
286 "fbld twordFmp","fildQm","fbstp twordFmp","fistpQm",
287 -- xx C0-FF: opcodes with a pseudo-register operand.
289 "faddFf","fmulFf","fcomFf","fcompFf","fsubFf","fsubrFf","fdivFf","fdivrFf",
291 "fldFf","fxchFf",{"fnop"},nil,
292 {"fchs","fabs",nil,nil,"ftst","fxam"},
293 {"fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz"},
294 {"f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp"},
295 {"fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"},
297 "fcmovbFf","fcmoveFf","fcmovbeFf","fcmovuFf",nil,{nil,"fucompp"},nil,nil,
299 "fcmovnbFf","fcmovneFf","fcmovnbeFf","fcmovnuFf",
300 {nil,nil,"fnclex","fninit"},"fucomiFf","fcomiFf",nil,
302 "fadd toFf","fmul toFf",nil,nil,
303 "fsub toFf","fsubr toFf","fdivr toFf","fdiv toFf",
305 "ffreeFf",nil,"fstFf","fstpFf","fucomFf","fucompFf",nil,nil,
307 "faddpFf","fmulpFf",nil,{nil,"fcompp"},
308 "fsubrpFf","fsubpFf","fdivrpFf","fdivpFf",
310 nil,nil,nil,nil,{"fnstsw ax"},"fucomipFf","fcomipFf",nil,
312 assert(map_opcfp
[126] == "fcomipFf")
314 -- Map for opcode groups. The subkey is sp from the ModRM byte.
315 local map_opcgroup
= {
316 arith
= { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" },
317 shift
= { "rol", "ror", "rcl", "rcr", "shl", "shr", "sal", "sar" },
318 testb
= { "testBmi", "testBmi", "not", "neg", "mul", "imul", "div", "idiv" },
319 testv
= { "testVmi", "testVmi", "not", "neg", "mul", "imul", "div", "idiv" },
320 incb
= { "inc", "dec" },
321 incd
= { "inc", "dec", "callUmp", "$call farDmp",
322 "jmpUmp", "$jmp farDmp", "pushUm" },
323 sldt
= { "sldt", "str", "lldt", "ltr", "verr", "verw" },
324 sgdt
= { "vm*$sgdt", "vm*$sidt", "$lgdt", "vm*$lidt",
325 "smsw", nil, "lmsw", "vm*$invlpg" },
326 bt
= { nil, nil, nil, nil, "bt", "bts", "btr", "btc" },
327 cmpxchg
= { nil, "sz*,cmpxchg8bQmp,cmpxchg16bXmp", nil, nil,
328 nil, nil, "vmptrld|vmxon|vmclear", "vmptrst" },
329 pshiftw
= { nil, nil, "psrlw", nil, "psraw", nil, "psllw" },
330 pshiftd
= { nil, nil, "psrld", nil, "psrad", nil, "pslld" },
331 pshiftq
= { nil, nil, "psrlq", nil, nil, nil, "psllq" },
332 pshiftdq
= { nil, nil, "psrlq", "psrldq", nil, nil, "psllq", "pslldq" },
333 fxsave
= { "$fxsave", "$fxrstor", "$ldmxcsr", "$stmxcsr",
334 nil, "lfenceDp$", "mfenceDp$", "sfenceDp$clflush" },
335 prefetch
= { "prefetch", "prefetchw" },
336 prefetcht
= { "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2" },
339 ------------------------------------------------------------------------------
341 -- Maps for register names.
343 B
= { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
344 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
345 B64
= { "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil",
346 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
347 W
= { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
348 "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" },
349 D
= { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
350 "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" },
351 Q
= { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
352 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
353 M
= { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
354 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
355 X
= { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
356 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
358 local map_segregs
= { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
360 -- Maps for size names.
362 B
= 1, W
= 2, D
= 4, Q
= 8, M
= 8, X
= 16,
364 local map_sz2prefix
= {
365 B
= "byte", W
= "word", D
= "dword",
367 M
= "qword", X
= "xword",
368 F
= "dword", G
= "qword", -- No need for sizes/register names for these two.
371 ------------------------------------------------------------------------------
373 -- Output a nicely formatted line with an opcode and operands.
374 local function putop(ctx
, text
, operands
)
375 local code
, pos
, hex
= ctx
.code
, ctx
.pos
, ""
376 local hmax
= ctx
.hexdump
378 for i
=ctx
.start
,pos
-1 do
379 hex
= hex
..format("%02X", byte(code
, i
, i
))
381 if #hex
> hmax
then hex
= sub(hex
, 1, hmax
)..". "
382 else hex
= hex
..rep(" ", hmax
-#hex
+2) end
384 if operands
then text
= text
.." "..operands
end
385 if ctx
.o16
then text
= "o16 "..text
; ctx
.o16
= false end
386 if ctx
.a32
then text
= "a32 "..text
; ctx
.a32
= false end
387 if ctx
.rep
then text
= ctx
.rep
.." "..text
; ctx
.rep
= false end
389 local t
= (ctx
.rexw
and "w" or "")..(ctx
.rexr
and "r" or "")..
390 (ctx
.rexx
and "x" or "")..(ctx
.rexb
and "b" or "")
391 if t
~= "" then text
= "rex."..t
.." "..text
end
392 ctx
.rexw
= false; ctx
.rexr
= false; ctx
.rexx
= false; ctx
.rexb
= false
396 local text2
, n
= gsub(text
, "%[", "["..ctx
.seg
..":")
397 if n
== 0 then text
= ctx
.seg
.." "..text
else text
= text2
end
400 if ctx
.lock
then text
= "lock "..text
; ctx
.lock
= false end
403 local sym
= ctx
.symtab
[imm
]
404 if sym
then text
= text
.."\t->"..sym
end
406 ctx
.out(format("%08x %s%s\n", ctx
.addr
+ctx
.start
, hex
, text
))
412 -- Clear all prefix flags.
413 local function clearprefixes(ctx
)
414 ctx
.o16
= false; ctx
.seg
= false; ctx
.lock
= false; ctx
.rep
= false
415 ctx
.rexw
= false; ctx
.rexr
= false; ctx
.rexx
= false; ctx
.rexb
= false
416 ctx
.rex
= false; ctx
.a32
= false
419 -- Fallback for incomplete opcodes at the end.
420 local function incomplete(ctx
)
423 return putop(ctx
, "(incomplete)")
426 -- Fallback for unknown opcodes.
427 local function unknown(ctx
)
429 return putop(ctx
, "(unknown)")
432 -- Return an immediate of the specified size.
433 local function getimm(ctx
, pos
, n
)
434 if pos
+n
-1 > ctx
.stop
then return incomplete(ctx
) end
435 local code
= ctx
.code
437 local b1
= byte(code
, pos
, pos
)
440 local b1
, b2
= byte(code
, pos
, pos
+1)
443 local b1
, b2
, b3
, b4
= byte(code
, pos
, pos
+3)
444 local imm
= b1
+b2
*256+b3
*65536+b4
*16777216
450 -- Process pattern string and generate the operands.
451 local function putpat(ctx
, name
, pat
)
452 local operands
, regs
, sz
, mode
, sp
, rm
, sc
, rx
, sdisp
453 local code
, pos
, stop
= ctx
.code
, ctx
.pos
, ctx
.stop
455 -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
456 for p
in gmatch(pat
, ".") do
458 if p
== "V" or p
== "U" then
459 if ctx
.rexw
then sz
= "Q"; ctx
.rexw
= false
460 elseif ctx
.o16
then sz
= "W"; ctx
.o16
= false
461 elseif p
== "U" and ctx
.x64
then sz
= "Q"
465 if ctx
.rexw
then sz
= "Q"; ctx
.rexw
= false else sz
= "D" end
469 regs
= ctx
.rex
and map_regs
.B64
or map_regs
.B
470 elseif match(p
, "[WDQMXFG]") then
474 sz
= ctx
.o16
and "X" or "M"; ctx
.o16
= false
477 name
= name
..lower(sz
)
479 local imm
= getimm(ctx
, pos
, 1); if not imm
then return end
480 x
= imm
<= 127 and format("+0x%02x", imm
)
481 or format("-0x%02x", 256-imm
)
484 local imm
= getimm(ctx
, pos
, 1); if not imm
then return end
485 x
= format("0x%02x", imm
)
488 local imm
= getimm(ctx
, pos
, 2); if not imm
then return end
489 x
= format("0x%x", imm
)
491 elseif p
== "o" then -- [offset]
493 local imm1
= getimm(ctx
, pos
, 4); if not imm1
then return end
494 local imm2
= getimm(ctx
, pos
+4, 4); if not imm2
then return end
495 x
= format("[0x%08x%08x]", imm2
, imm1
)
498 local imm
= getimm(ctx
, pos
, 4); if not imm
then return end
499 x
= format("[0x%08x]", imm
)
502 elseif p
== "i" or p
== "I" then
503 local n
= map_sz2n
[sz
]
504 if n
== 8 and ctx
.x64
and p
== "I" then
505 local imm1
= getimm(ctx
, pos
, 4); if not imm1
then return end
506 local imm2
= getimm(ctx
, pos
+4, 4); if not imm2
then return end
507 x
= format("0x%08x%08x", imm2
, imm1
)
509 if n
== 8 then n
= 4 end
510 local imm
= getimm(ctx
, pos
, n
); if not imm
then return end
511 if sz
== "Q" and (imm
< 0 or imm
> 0x7fffffff) then
512 imm
= (0xffffffff+1)-imm
513 x
= format(imm
> 65535 and "-0x%08x" or "-0x%x", imm
)
515 x
= format(imm
> 65535 and "0x%08x" or "0x%x", imm
)
520 local n
= map_sz2n
[sz
]
521 if n
== 8 then n
= 4 end
522 local imm
= getimm(ctx
, pos
, n
); if not imm
then return end
523 if sz
== "B" and imm
> 127 then imm
= imm
-256
524 elseif imm
> 2147483647 then imm
= imm
-4294967296 end
526 imm
= imm
+ pos
+ ctx
.addr
527 if imm
> 4294967295 and not ctx
.x64
then imm
= imm
-4294967296 end
530 x
= format("word 0x%04x", imm
%65536)
532 local lo
= imm
% 0x1000000
533 x
= format("0x%02x%06x", (imm
-lo
) / 0x1000000, lo
)
535 x
= format("0x%08x", imm
)
538 local r
= byte(code
, pos
-1, pos
-1)%8
539 if ctx
.rexb
then r
= r
+ 8; ctx
.rexb
= false end
541 elseif p
== "a" then x
= regs
[1]
542 elseif p
== "c" then x
= "cl"
543 elseif p
== "d" then x
= "dx"
544 elseif p
== "1" then x
= "1"
549 if pos
> stop
then return incomplete(ctx
) end
550 mode
= byte(code
, pos
, pos
)
553 rm
= mode
%8; mode
= (mode
-rm
)/8
554 sp
= mode
%8; mode
= (mode
-sp
)/8
558 if pos
> stop
then return incomplete(ctx
) end
559 sc
= byte(code
, pos
, pos
)
561 rm
= sc
%8; sc
= (sc
-rm
)/8
562 rx
= sc
%8; sc
= (sc
-rx
)/8
563 if ctx
.rexx
then rx
= rx
+ 8; ctx
.rexx
= false end
564 if rx
== 4 then rx
= nil end
566 if mode
> 0 or rm
== 5 then
568 if dsz
~= 1 then dsz
= 4 end
569 local disp
= getimm(ctx
, pos
, dsz
); if not disp
then return end
570 if mode
== 0 then rm
= nil end
571 if rm
or rx
or (not sc
and ctx
.x64
and not ctx
.a32
) then
572 if dsz
== 1 and disp
> 127 then
573 sdisp
= format("-0x%x", 256-disp
)
574 elseif disp
>= 0 and disp
<= 0x7fffffff then
575 sdisp
= format("+0x%x", disp
)
577 sdisp
= format("-0x%x", (0xffffffff+1)-disp
)
580 sdisp
= format(ctx
.x64
and not ctx
.a32
and
581 not (disp
>= 0 and disp
<= 0x7fffffff)
582 and "0xffffffff%08x" or "0x%08x", disp
)
587 if rm
and ctx
.rexb
then rm
= rm
+ 8; ctx
.rexb
= false end
588 if ctx
.rexr
then sp
= sp
+ 8; ctx
.rexr
= false end
591 if mode
== 3 then x
= regs
[rm
+1]
593 local aregs
= ctx
.a32
and map_regs
.D
or ctx
.aregs
594 local srm
, srx
= "", ""
595 if rm
then srm
= aregs
[rm
+1]
596 elseif not sc
and ctx
.x64
and not ctx
.a32
then srm
= "rip" end
599 if rm
then srm
= srm
.."+" end
601 if sc
> 0 then srx
= srx
.."*"..(2^sc
) end
603 x
= format("[%s%s%s]", srm
, srx
, sdisp
)
606 (not match(pat
, "[aRrgp]") or match(pat
, "t")) then -- Yuck.
607 x
= map_sz2prefix
[sz
].." "..x
609 elseif p
== "r" then x
= regs
[sp
+1]
610 elseif p
== "g" then x
= map_segregs
[sp
+1]
611 elseif p
== "p" then -- Suppress prefix.
612 elseif p
== "f" then x
= "st"..rm
614 if sp
== 0 and ctx
.lock
and not ctx
.x64
then
615 x
= "CR8"; ctx
.lock
= false
619 elseif p
== "y" then x
= "DR"..sp
620 elseif p
== "z" then x
= "TR"..sp
623 error("bad pattern `"..pat
.."'")
626 if x
then operands
= operands
and operands
..", "..x
or x
end
629 return putop(ctx
, name
, operands
)
632 -- Forward declaration.
635 -- Fetch and cache MRM byte.
636 local function getmrm(ctx
)
640 if pos
> ctx
.stop
then return nil end
641 mrm
= byte(ctx
.code
, pos
, pos
)
648 -- Dispatch to handler depending on pattern.
649 local function dispatch(ctx
, opat
, patgrp
)
650 if not opat
then return unknown(ctx
) end
651 if match(opat
, "%|") then -- MMX/SSE variants depending on prefix.
654 p
= ctx
.rep
=="rep" and "%|([^%|]*)" or "%|[^%|]*%|[^%|]*%|([^%|]*)"
656 elseif ctx
.o16
then p
= "%|[^%|]*%|([^%|]*)"; ctx
.o16
= false
657 else p
= "^[^%|]*" end
658 opat
= match(opat
, p
)
659 if not opat
then return unknown(ctx
) end
660 -- ctx.rep = false; ctx.o16 = false
661 --XXX fails for 66 f2 0f 38 f1 06 crc32 eax,WORD PTR [esi]
662 --XXX remove in branches?
664 if match(opat
, "%$") then -- reg$mem variants.
665 local mrm
= getmrm(ctx
); if not mrm
then return incomplete(ctx
) end
666 opat
= match(opat
, mrm
>= 192 and "^[^%$]*" or "%$(.*)")
667 if opat
== "" then return unknown(ctx
) end
669 if opat
== "" then return unknown(ctx
) end
670 local name
, pat
= match(opat
, "^([a-z0-9 ]*)(.*)")
671 if pat
== "" and patgrp
then pat
= patgrp
end
672 return map_act
[sub(pat
, 1, 1)](ctx
, name
, pat
)
675 -- Get a pattern from an opcode map and dispatch to handler.
676 local function dispatchmap(ctx
, opcmap
)
678 local opat
= opcmap
[byte(ctx
.code
, pos
, pos
)]
681 return dispatch(ctx
, opat
)
684 -- Map for action codes. The key is the first char after the name.
686 -- Simple opcodes without operands.
687 [""] = function(ctx
, name
, pat
)
688 return putop(ctx
, name
)
691 -- Operand size chars fall right through.
692 B
= putpat
, W
= putpat
, D
= putpat
, Q
= putpat
,
693 V
= putpat
, U
= putpat
, T
= putpat
,
694 M
= putpat
, X
= putpat
, P
= putpat
,
695 F
= putpat
, G
= putpat
,
698 [":"] = function(ctx
, name
, pat
)
699 ctx
[pat
== ":" and name
or sub(pat
, 2)] = name
700 if ctx
.pos
- ctx
.start
> 5 then return unknown(ctx
) end -- Limit #prefixes.
703 -- Chain to special handler specified by name.
704 ["*"] = function(ctx
, name
, pat
)
705 return map_act
[name
](ctx
, name
, sub(pat
, 2))
708 -- Use named subtable for opcode group.
709 ["!"] = function(ctx
, name
, pat
)
710 local mrm
= getmrm(ctx
); if not mrm
then return incomplete(ctx
) end
711 return dispatch(ctx
, map_opcgroup
[name
][((mrm
-(mrm
%8))/8)%8+1], sub(pat
, 2))
714 -- o16,o32[,o64] variants.
715 sz
= function(ctx
, name
, pat
)
716 if ctx
.o16
then ctx
.o16
= false
718 pat
= match(pat
, ",(.*)")
720 local p
= match(pat
, ",(.*)")
721 if p
then pat
= p
; ctx
.rexw
= false end
724 pat
= match(pat
, "^[^,]*")
725 return dispatch(ctx
, pat
)
728 -- Two-byte opcode dispatch.
729 opc2
= function(ctx
, name
, pat
)
730 return dispatchmap(ctx
, map_opc2
)
733 -- Three-byte opcode dispatch.
734 opc3
= function(ctx
, name
, pat
)
735 return dispatchmap(ctx
, map_opc3
[pat
])
739 vm
= function(ctx
, name
, pat
)
740 return dispatch(ctx
, map_opcvm
[ctx
.mrm
])
743 -- Floating point opcode dispatch.
744 fp
= function(ctx
, name
, pat
)
745 local mrm
= getmrm(ctx
); if not mrm
then return incomplete(ctx
) end
747 local idx
= pat
*8 + ((mrm
-rm
)/8)%8
748 if mrm
>= 192 then idx
= idx
+ 64 end
749 local opat
= map_opcfp
[idx
]
750 if type(opat
) == "table" then opat
= opat
[rm
+1] end
751 return dispatch(ctx
, opat
)
755 rex
= function(ctx
, name
, pat
)
756 if ctx
.rex
then return unknown(ctx
) end -- Only 1 REX prefix allowed.
757 for p
in gmatch(pat
, ".") do ctx
["rex"..p
] = true end
761 -- Special case for nop with REX prefix.
762 nop
= function(ctx
, name
, pat
)
763 return dispatch(ctx
, ctx
.rex
and pat
or "nop")
767 ------------------------------------------------------------------------------
769 -- Disassemble a block of code.
770 local function disass_block(ctx
, ofs
, len
)
771 if not ofs
then ofs
= 0 end
772 local stop
= len
and ofs
+len
or #ctx
.code
780 while ctx
.pos
<= stop
do dispatchmap(ctx
, ctx
.map1
) end
781 if ctx
.pos
~= ctx
.start
then incomplete(ctx
) end
784 -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
785 local function create_(code
, addr
, out
)
788 ctx
.addr
= (addr
or 0) - 1
789 ctx
.out
= out
or io
.write
791 ctx
.disass
= disass_block
794 ctx
.map1
= map_opc1_32
795 ctx
.aregs
= map_regs
.D
799 local function create64_(code
, addr
, out
)
800 local ctx
= create_(code
, addr
, out
)
802 ctx
.map1
= map_opc1_64
803 ctx
.aregs
= map_regs
.Q
807 -- Simple API: disassemble code (a string) at address and output via out.
808 local function disass_(code
, addr
, out
)
809 create_(code
, addr
, out
):disass()
812 local function disass64_(code
, addr
, out
)
813 create64_(code
, addr
, out
):disass()
816 -- Return register name for RID.
817 local function regname_(r
)
818 if r
< 8 then return map_regs
.D
[r
+1] end
819 return map_regs
.X
[r
-7]
822 local function regname64_(r
)
823 if r
< 16 then return map_regs
.Q
[r
+1] end
824 return map_regs
.X
[r
-15]
827 -- Public module functions.
835 regname64
= regname64_