Fixed uninitialized structure data.
[nasm/sigaren-mirror.git] / assemble.c
blob843731d038a45c7b4c575824d34c8fd58df39514
1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
14 * on operand 0
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the operand-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \44, \45, \46 - select between \3[012] and \4[012] depending on 16/32 bit
29 * assembly mode or the address-size override on the operand
30 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
31 * \54, \55, \56 - a qword immediate operand, from operand 0, 1 or 2
32 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
33 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
34 * assembly mode or the operand-size override on the operand
35 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
36 * \1ab - a ModRM, calculated on EA in operand a, with the spare
37 * field the register value of operand b.
38 * \130,\131,\132 - an immediate word or signed byte for operand 0, 1, or 2
39 * \133,\134,\135 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
40 * is a signed byte rather than a word.
41 * \140,\141,\142 - an immediate dword or signed byte for operand 0, 1, or 2
42 * \143,\144,\145 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
43 * is a signed byte rather than a dword.
44 * \150,\151,\152 - an immediate qword or signed byte for operand 0, 1, or 2
45 * \153,\154,\155 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
46 * is a signed byte rather than a qword.
47 * \2ab - a ModRM, calculated on EA in operand a, with the spare
48 * field equal to digit b.
49 * \30x - might be an 0x67 byte, depending on the address size of
50 * the memory reference in operand x.
51 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
52 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
53 * \312 - (disassembler only) marker on LOOP, LOOPxx instructions.
54 * \313 - indicates fixed 64-bit address size, no REX required.
55 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
56 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
57 * \322 - indicates that this instruction is only valid when the
58 * operand size is the default (instruction to disassembler,
59 * generates no code in the assembler)
60 * \323 - indicates fixed 64-bit operand size, REX on extensions, only.
61 * \324 - indicates 64-bit operand size requiring REX prefix.
62 * \330 - a literal byte follows in the code stream, to be added
63 * to the condition code value of the instruction.
64 * \331 - instruction not valid with REP prefix. Hint for
65 * disassembler only; for SSE instructions.
66 * \332 - disassemble a rep (0xF3 byte) prefix as repe not rep.
67 * \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded
68 * as a literal byte in order to aid the disassembler.
69 * \340 - reserve <operand 0> bytes of uninitialized storage.
70 * Operand 0 had better be a segmentless constant.
71 * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
72 * 370 is used for Jcc, 371 is used for JMP.
73 * \373 - assemble 0x03 if bits==16, 0x05 if bits==32;
74 * used for conditional jump over longer jump
77 #include <stdio.h>
78 #include <string.h>
79 #include <inttypes.h>
81 #include "nasm.h"
82 #include "nasmlib.h"
83 #include "assemble.h"
84 #include "insns.h"
85 #include "preproc.h"
86 #include "regvals.c"
88 extern struct itemplate *nasm_instructions[];
90 typedef struct {
91 int sib_present; /* is a SIB byte necessary? */
92 int bytes; /* # of bytes of offset needed */
93 int size; /* lazy - this is sib+bytes+1 */
94 uint8_t modrm, sib, rex, rip; /* the bytes themselves */
95 } ea;
97 static uint32_t cpu; /* cpu level received from nasm.c */
98 static efunc errfunc;
99 static struct ofmt *outfmt;
100 static ListGen *list;
102 static int32_t calcsize(int32_t, int32_t, int, insn *, const char *);
103 static void gencode(int32_t, int32_t, int, insn *, const char *, int32_t);
104 static int regval(operand * o);
105 // static int regflag(operand * o);
106 static int matches(struct itemplate *, insn *, int bits);
107 static ea *process_ea(operand *, ea *, int, int, int);
108 static int chsize(operand *, int);
111 * This routine wrappers the real output format's output routine,
112 * in order to pass a copy of the data off to the listing file
113 * generator at the same time.
115 static void out(int32_t offset, int32_t segto, const void *data,
116 uint32_t type, int32_t segment, int32_t wrt)
118 static int32_t lineno = 0; /* static!!! */
119 static char *lnfname = NULL;
121 if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
122 if (segment != NO_SEG || wrt != NO_SEG) {
124 * This address is relocated. We must write it as
125 * OUT_ADDRESS, so there's no work to be done here.
127 list->output(offset, data, type);
128 } else {
129 uint8_t p[8], *q = p;
131 * This is a non-relocated address, and we're going to
132 * convert it into RAWDATA format.
134 if ((type & OUT_SIZMASK) == 4) {
135 WRITELONG(q, *(int32_t *)data);
136 list->output(offset, p, OUT_RAWDATA + 4);
137 } else if ((type & OUT_SIZMASK) == 8) {
138 WRITEDLONG(q, *(int64_t *)data);
139 list->output(offset, p, OUT_RAWDATA + 8);
140 } else {
141 WRITESHORT(q, *(int32_t *)data);
142 list->output(offset, p, OUT_RAWDATA + 2);
145 } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
146 list->output(offset, data, type);
147 } else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
148 list->output(offset, NULL, type);
149 } else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
150 (type & OUT_TYPMASK) == OUT_REL4ADR) {
151 list->output(offset, data, type);
155 * this call to src_get determines when we call the
156 * debug-format-specific "linenum" function
157 * it updates lineno and lnfname to the current values
158 * returning 0 if "same as last time", -2 if lnfname
159 * changed, and the amount by which lineno changed,
160 * if it did. thus, these variables must be static
163 if (src_get(&lineno, &lnfname)) {
164 outfmt->current_dfmt->linenum(lnfname, lineno, segto);
167 outfmt->output(segto, data, type, segment, wrt);
170 static int jmp_match(int32_t segment, int32_t offset, int bits,
171 insn * ins, const char *code)
173 int32_t isize;
174 uint8_t c = code[0];
176 if (c != 0370 && c != 0371)
177 return 0;
178 if (ins->oprs[0].opflags & OPFLAG_FORWARD) {
179 if ((optimizing < 0 || (ins->oprs[0].type & STRICT))
180 && c == 0370)
181 return 1;
182 else
183 return (pass0 == 0); /* match a forward reference */
185 isize = calcsize(segment, offset, bits, ins, code);
186 if (ins->oprs[0].segment != segment)
187 return 0;
188 isize = ins->oprs[0].offset - offset - isize; /* isize is now the delta */
189 if (isize >= -128L && isize <= 127L)
190 return 1; /* it is byte size */
192 return 0;
195 int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
196 insn * instruction, struct ofmt *output, efunc error,
197 ListGen * listgen)
199 struct itemplate *temp;
200 int j;
201 int size_prob;
202 int32_t insn_end;
203 int32_t itimes;
204 int32_t start = offset;
205 int32_t wsize = 0; /* size for DB etc. */
207 errfunc = error; /* to pass to other functions */
208 cpu = cp;
209 outfmt = output; /* likewise */
210 list = listgen; /* and again */
212 switch (instruction->opcode) {
213 case -1:
214 return 0;
215 case I_DB:
216 wsize = 1;
217 break;
218 case I_DW:
219 wsize = 2;
220 break;
221 case I_DD:
222 wsize = 4;
223 break;
224 case I_DQ:
225 wsize = 8;
226 break;
227 case I_DT:
228 wsize = 10;
229 break;
232 if (wsize) {
233 extop *e;
234 int32_t t = instruction->times;
235 if (t < 0)
236 errfunc(ERR_PANIC,
237 "instruction->times < 0 (%ld) in assemble()", t);
239 while (t--) { /* repeat TIMES times */
240 for (e = instruction->eops; e; e = e->next) {
241 if (e->type == EOT_DB_NUMBER) {
242 if (wsize == 1) {
243 if (e->segment != NO_SEG)
244 errfunc(ERR_NONFATAL,
245 "one-byte relocation attempted");
246 else {
247 uint8_t out_byte = e->offset;
248 out(offset, segment, &out_byte,
249 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
251 } else if (wsize > 5) {
252 errfunc(ERR_NONFATAL, "integer supplied to a D%c"
253 " instruction", wsize == 8 ? 'Q' : 'T');
254 } else
255 out(offset, segment, &e->offset,
256 OUT_ADDRESS + wsize, e->segment, e->wrt);
257 offset += wsize;
258 } else if (e->type == EOT_DB_STRING) {
259 int align;
261 out(offset, segment, e->stringval,
262 OUT_RAWDATA + e->stringlen, NO_SEG, NO_SEG);
263 align = e->stringlen % wsize;
265 if (align) {
266 align = wsize - align;
267 out(offset, segment, "\0\0\0\0\0\0\0\0",
268 OUT_RAWDATA + align, NO_SEG, NO_SEG);
270 offset += e->stringlen + align;
273 if (t > 0 && t == instruction->times - 1) {
275 * Dummy call to list->output to give the offset to the
276 * listing module.
278 list->output(offset, NULL, OUT_RAWDATA);
279 list->uplevel(LIST_TIMES);
282 if (instruction->times > 1)
283 list->downlevel(LIST_TIMES);
284 return offset - start;
287 if (instruction->opcode == I_INCBIN) {
288 static char fname[FILENAME_MAX];
289 FILE *fp;
290 int32_t len;
291 char *prefix = "", *combine;
292 char **pPrevPath = NULL;
294 len = FILENAME_MAX - 1;
295 if (len > instruction->eops->stringlen)
296 len = instruction->eops->stringlen;
297 strncpy(fname, instruction->eops->stringval, len);
298 fname[len] = '\0';
300 while (1) { /* added by alexfru: 'incbin' uses include paths */
301 combine = nasm_malloc(strlen(prefix) + len + 1);
302 strcpy(combine, prefix);
303 strcat(combine, fname);
305 if ((fp = fopen(combine, "rb")) != NULL) {
306 nasm_free(combine);
307 break;
310 nasm_free(combine);
311 pPrevPath = pp_get_include_path_ptr(pPrevPath);
312 if (pPrevPath == NULL)
313 break;
314 prefix = *pPrevPath;
317 if (fp == NULL)
318 error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
319 fname);
320 else if (fseek(fp, 0L, SEEK_END) < 0)
321 error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
322 fname);
323 else {
324 static char buf[2048];
325 int32_t t = instruction->times;
326 int32_t base = 0;
328 len = ftell(fp);
329 if (instruction->eops->next) {
330 base = instruction->eops->next->offset;
331 len -= base;
332 if (instruction->eops->next->next &&
333 len > instruction->eops->next->next->offset)
334 len = instruction->eops->next->next->offset;
337 * Dummy call to list->output to give the offset to the
338 * listing module.
340 list->output(offset, NULL, OUT_RAWDATA);
341 list->uplevel(LIST_INCBIN);
342 while (t--) {
343 int32_t l;
345 fseek(fp, base, SEEK_SET);
346 l = len;
347 while (l > 0) {
348 int32_t m =
349 fread(buf, 1, (l > sizeof(buf) ? sizeof(buf) : l),
350 fp);
351 if (!m) {
353 * This shouldn't happen unless the file
354 * actually changes while we are reading
355 * it.
357 error(ERR_NONFATAL,
358 "`incbin': unexpected EOF while"
359 " reading file `%s'", fname);
360 t = 0; /* Try to exit cleanly */
361 break;
363 out(offset, segment, buf, OUT_RAWDATA + m,
364 NO_SEG, NO_SEG);
365 l -= m;
368 list->downlevel(LIST_INCBIN);
369 if (instruction->times > 1) {
371 * Dummy call to list->output to give the offset to the
372 * listing module.
374 list->output(offset, NULL, OUT_RAWDATA);
375 list->uplevel(LIST_TIMES);
376 list->downlevel(LIST_TIMES);
378 fclose(fp);
379 return instruction->times * len;
381 return 0; /* if we're here, there's an error */
384 size_prob = FALSE;
386 for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
387 int m = matches(temp, instruction, bits);
389 if (m == 99)
390 m += jmp_match(segment, offset, bits, instruction, temp->code);
392 if (m == 100) { /* matches! */
393 const char *codes = temp->code;
394 int32_t insn_size = calcsize(segment, offset, bits,
395 instruction, codes);
396 itimes = instruction->times;
397 if (insn_size < 0) /* shouldn't be, on pass two */
398 error(ERR_PANIC, "errors made it through from pass one");
399 else
400 while (itimes--) {
401 for (j = 0; j < instruction->nprefix; j++) {
402 uint8_t c = 0;
403 switch (instruction->prefixes[j]) {
404 case P_LOCK:
405 c = 0xF0;
406 break;
407 case P_REPNE:
408 case P_REPNZ:
409 c = 0xF2;
410 break;
411 case P_REPE:
412 case P_REPZ:
413 case P_REP:
414 c = 0xF3;
415 break;
416 case R_CS:
417 c = 0x2E;
418 break;
419 case R_DS:
420 c = 0x3E;
421 break;
422 case R_ES:
423 c = 0x26;
424 break;
425 case R_FS:
426 c = 0x64;
427 break;
428 case R_GS:
429 c = 0x65;
430 break;
431 case R_SS:
432 c = 0x36;
433 break;
434 case R_SEGR6:
435 case R_SEGR7:
436 error(ERR_NONFATAL,
437 "segr6 and segr7 cannot be used as prefixes");
438 break;
439 case P_A16:
440 if (bits == 64) {
441 error(ERR_PANIC, "16-bit addressing is depreciated in long mode");
442 break;
444 if (bits != 16)
445 c = 0x67;
446 break;
447 case P_A32:
448 if (bits != 32)
449 c = 0x67;
450 break;
451 case P_O16:
452 if (bits != 16)
453 c = 0x66;
454 break;
455 case P_O32:
456 if (bits == 16)
457 c = 0x66;
458 break;
459 default:
460 error(ERR_PANIC, "invalid instruction prefix");
462 if (c != 0) {
463 out(offset, segment, &c, OUT_RAWDATA + 1,
464 NO_SEG, NO_SEG);
465 offset++;
468 insn_end = offset + insn_size;
469 gencode(segment, offset, bits, instruction, codes,
470 insn_end);
471 offset += insn_size;
472 if (itimes > 0 && itimes == instruction->times - 1) {
474 * Dummy call to list->output to give the offset to the
475 * listing module.
477 list->output(offset, NULL, OUT_RAWDATA);
478 list->uplevel(LIST_TIMES);
481 if (instruction->times > 1)
482 list->downlevel(LIST_TIMES);
483 return offset - start;
484 } else if (m > 0 && m > size_prob) {
485 size_prob = m;
487 // temp++;
490 if (temp->opcode == -1) { /* didn't match any instruction */
491 if (size_prob == 1) /* would have matched, but for size */
492 error(ERR_NONFATAL, "operation size not specified");
493 else if (size_prob == 2)
494 error(ERR_NONFATAL, "mismatch in operand sizes");
495 else if (size_prob == 3)
496 error(ERR_NONFATAL, "no instruction for this cpu level");
497 else if (size_prob == 4)
498 error(ERR_NONFATAL, "instruction depreciated in long mode");
499 else
500 error(ERR_NONFATAL,
501 "invalid combination of opcode and operands");
503 return 0;
506 int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp,
507 insn * instruction, efunc error)
509 struct itemplate *temp;
511 errfunc = error; /* to pass to other functions */
512 cpu = cp;
514 if (instruction->opcode == -1)
515 return 0;
517 if (instruction->opcode == I_DB ||
518 instruction->opcode == I_DW ||
519 instruction->opcode == I_DD ||
520 instruction->opcode == I_DQ || instruction->opcode == I_DT) {
521 extop *e;
522 int32_t isize, osize, wsize = 0; /* placate gcc */
524 isize = 0;
525 switch (instruction->opcode) {
526 case I_DB:
527 wsize = 1;
528 break;
529 case I_DW:
530 wsize = 2;
531 break;
532 case I_DD:
533 wsize = 4;
534 break;
535 case I_DQ:
536 wsize = 8;
537 break;
538 case I_DT:
539 wsize = 10;
540 break;
543 for (e = instruction->eops; e; e = e->next) {
544 int32_t align;
546 osize = 0;
547 if (e->type == EOT_DB_NUMBER)
548 osize = 1;
549 else if (e->type == EOT_DB_STRING)
550 osize = e->stringlen;
552 align = (-osize) % wsize;
553 if (align < 0)
554 align += wsize;
555 isize += osize + align;
557 return isize * instruction->times;
560 if (instruction->opcode == I_INCBIN) {
561 char fname[FILENAME_MAX];
562 FILE *fp;
563 int32_t len;
564 char *prefix = "", *combine;
565 char **pPrevPath = NULL;
567 len = FILENAME_MAX - 1;
568 if (len > instruction->eops->stringlen)
569 len = instruction->eops->stringlen;
570 strncpy(fname, instruction->eops->stringval, len);
571 fname[len] = '\0';
573 while (1) { /* added by alexfru: 'incbin' uses include paths */
574 combine = nasm_malloc(strlen(prefix) + len + 1);
575 strcpy(combine, prefix);
576 strcat(combine, fname);
578 if ((fp = fopen(combine, "rb")) != NULL) {
579 nasm_free(combine);
580 break;
583 nasm_free(combine);
584 pPrevPath = pp_get_include_path_ptr(pPrevPath);
585 if (pPrevPath == NULL)
586 break;
587 prefix = *pPrevPath;
590 if (fp == NULL)
591 error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
592 fname);
593 else if (fseek(fp, 0L, SEEK_END) < 0)
594 error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
595 fname);
596 else {
597 len = ftell(fp);
598 fclose(fp);
599 if (instruction->eops->next) {
600 len -= instruction->eops->next->offset;
601 if (instruction->eops->next->next &&
602 len > instruction->eops->next->next->offset) {
603 len = instruction->eops->next->next->offset;
606 return instruction->times * len;
608 return 0; /* if we're here, there's an error */
611 for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
612 int m = matches(temp, instruction, bits);
613 if (m == 99)
614 m += jmp_match(segment, offset, bits, instruction, temp->code);
616 if (m == 100) {
617 /* we've matched an instruction. */
618 int32_t isize;
619 const char *codes = temp->code;
620 int j;
622 isize = calcsize(segment, offset, bits, instruction, codes);
623 if (isize < 0)
624 return -1;
625 for (j = 0; j < instruction->nprefix; j++) {
626 if ((instruction->prefixes[j] != P_A16 &&
627 instruction->prefixes[j] != P_O16 && bits == 16) ||
628 (instruction->prefixes[j] != P_A32 &&
629 instruction->prefixes[j] != P_O32 && bits == 32) ||
630 (instruction->prefixes[j] == P_A32 &&
631 instruction->prefixes[j] != P_O32 && bits == 64)) {
632 isize++;
635 return isize * instruction->times;
638 return -1; /* didn't match any instruction */
641 /* check that opn[op] is a signed byte of size 16 or 32,
642 and return the signed value*/
643 static int is_sbyte(insn * ins, int op, int size)
645 int32_t v;
646 int ret;
648 ret = !(ins->forw_ref && ins->oprs[op].opflags) && /* dead in the water on forward reference or External */
649 optimizing >= 0 &&
650 !(ins->oprs[op].type & STRICT) &&
651 ins->oprs[op].wrt == NO_SEG && ins->oprs[op].segment == NO_SEG;
653 v = ins->oprs[op].offset;
654 if (size == 16)
655 v = (int16_t)v; /* sign extend if 16 bits */
657 return ret && v >= -128L && v <= 127L;
660 static int32_t calcsize(int32_t segment, int32_t offset, int bits,
661 insn * ins, const char *codes)
663 int32_t length = 0;
664 uint8_t c;
665 int t;
666 ins->rex = 0; /* Ensure REX is reset */
667 int rex_mask = 0xFF;
669 (void)segment; /* Don't warn that this parameter is unused */
670 (void)offset; /* Don't warn that this parameter is unused */
672 while (*codes)
673 switch (c = *codes++) {
674 case 01:
675 case 02:
676 case 03:
677 codes += c, length += c;
678 break;
679 case 04:
680 case 05:
681 case 06:
682 case 07:
683 length++;
684 break;
685 case 010:
686 case 011:
687 case 012:
688 if (bits == 64) {
689 t = regval(&ins->oprs[c - 010]);
690 if (t >= 0400 && t < 0500) { /* Calculate REX.B */
691 if (t < 0410 || (t >= 0440 && t < 0450))
692 ins->rex |= 0xF0; /* Set REX.0 */
693 else
694 ins->rex |= 0xF1; /* Set REX.B */
695 if (t >= 0440)
696 ins->rex |= 0xF8; /* Set REX.W */
699 codes++, length++;
700 break;
701 case 017:
702 length++;
703 break;
704 case 014:
705 case 015:
706 case 016:
707 length++;
708 break;
709 case 020:
710 case 021:
711 case 022:
712 length++;
713 break;
714 case 024:
715 case 025:
716 case 026:
717 length++;
718 break;
719 case 030:
720 case 031:
721 case 032:
722 length += 2;
723 break;
724 case 034:
725 case 035:
726 case 036:
727 if (ins->oprs[c - 034].type & (BITS16 | BITS32 | BITS64))
728 length += (ins->oprs[c - 034].type & BITS16) ? 2 : 4;
729 else
730 length += (bits == 16) ? 2 : 4;
731 break;
732 case 037:
733 length += 2;
734 break;
735 case 040:
736 case 041:
737 case 042:
738 length += 4;
739 break;
740 case 044:
741 case 045:
742 case 046:
743 length += ((ins->oprs[c - 044].addr_size ?
744 ins->oprs[c - 044].addr_size : bits) >> 3);
745 break;
746 case 050:
747 case 051:
748 case 052:
749 length++;
750 break;
751 case 054:
752 case 055:
753 case 056:
754 length += 8; /* MOV reg64/imm */
755 break;
756 case 060:
757 case 061:
758 case 062:
759 length += 2;
760 break;
761 case 064:
762 case 065:
763 case 066:
764 if (ins->oprs[c - 064].type & (BITS16 | BITS32 | BITS64))
765 length += (ins->oprs[c - 064].type & BITS16) ? 2 : 4;
766 else
767 length += (bits == 16) ? 2 : 4;
768 break;
769 case 070:
770 case 071:
771 case 072:
772 length += 4;
773 break;
774 case 0130:
775 case 0131:
776 case 0132:
777 length += is_sbyte(ins, c - 0130, 16) ? 1 : 2;
778 break;
779 case 0133:
780 case 0134:
781 case 0135:
782 codes += 2;
783 length++;
784 break;
785 case 0140:
786 case 0141:
787 case 0142:
788 length += is_sbyte(ins, c - 0140, 32) ? 1 : 4;
789 break;
790 case 0143:
791 case 0144:
792 case 0145:
793 codes += 2;
794 length++;
795 break;
796 case 0300:
797 case 0301:
798 case 0302:
799 if (bits == 64) { /* Calculate REX */
800 t = ins->oprs[c - 0300].basereg;
801 if (t >= EXPR_REG_START && t < REG_ENUM_LIMIT) {
802 t = regvals[t];
803 if ((t >= 0410 && t < 0440) || (t >= 0450 && t < 0500)) {
804 ins->rex |= 0xF1; /* Set REX.B */
808 length += chsize(&ins->oprs[c - 0300], bits);
809 break;
810 case 0310:
811 length += (bits != 16);
812 break;
813 case 0311:
814 length += (bits != 32);
815 break;
816 case 0312:
817 break;
818 case 0313:
819 length -= 1;
820 break;
821 case 0320:
822 length += (bits != 16);
823 break;
824 case 0321:
825 length += (bits == 16);
826 break;
827 case 0322:
828 break;
829 case 0323:
830 rex_mask = 0x07;
831 break;
832 case 0324:
833 length++;
834 break;
835 case 0330:
836 codes++, length++;
837 break;
838 case 0331:
839 case 0332:
840 break;
841 case 0333:
842 length++;
843 break;
844 case 0340:
845 case 0341:
846 case 0342:
847 if (ins->oprs[0].segment != NO_SEG)
848 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
849 " quantity of BSS space");
850 else
851 length += ins->oprs[0].offset << (c - 0340);
852 break;
853 case 0370:
854 case 0371:
855 case 0372:
856 break;
857 case 0373:
858 length++;
859 break;
860 default: /* can't do it by 'case' statements */
861 if (c >= 0100 && c <= 0277) { /* it's an EA */
862 ea ea_data;
863 int rfield;
864 ea_data.rex = 0; /* Ensure ea.REX is initially 0 */
866 if (bits == 64) {
867 if (c <= 0177) /* pick rfield from operand b */
868 rfield = regval(&ins->oprs[c & 7]);
869 else
870 rfield = c & 7;
871 } else
872 rfield = 0;
874 if (!process_ea
875 (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
876 rfield, ins->forw_ref)) {
877 errfunc(ERR_NONFATAL, "invalid effective address");
878 return -1;
879 } else {
880 if (bits == 64)
881 ins->rex |= ea_data.rex;
882 length += ea_data.size;
884 } else
885 errfunc(ERR_PANIC, "internal instruction table corrupt"
886 ": instruction code 0x%02X given", c);
889 if (bits == 64) {
890 ins->rex &= rex_mask;
891 if (ins->rex)
892 length += 1;
895 return length; }
897 static void gencode(int32_t segment, int32_t offset, int bits,
898 insn * ins, const char *codes, int32_t insn_end)
900 static char condval[] = { /* conditional opcodes */
901 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
902 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
903 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
905 uint8_t c;
906 uint8_t bytes[4];
907 int32_t size;
908 int64_t data;
910 while (*codes)
911 switch (c = *codes++) {
912 case 01:
913 case 02:
914 case 03:
915 if(ins->rex && (bits == 64)) { /* REX Supercedes all other Prefixes */
916 ins->rex = (ins->rex&0x0F)+0x40;
917 out(offset, segment, &ins->rex, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
918 ins->rex = 0;
919 offset += 1;
921 out(offset, segment, codes, OUT_RAWDATA + c, NO_SEG, NO_SEG);
922 codes += c;
923 offset += c;
924 break;
926 case 04:
927 case 06:
928 switch (ins->oprs[0].basereg) {
929 case R_CS:
930 bytes[0] = 0x0E + (c == 0x04 ? 1 : 0);
931 break;
932 case R_DS:
933 bytes[0] = 0x1E + (c == 0x04 ? 1 : 0);
934 break;
935 case R_ES:
936 bytes[0] = 0x06 + (c == 0x04 ? 1 : 0);
937 break;
938 case R_SS:
939 bytes[0] = 0x16 + (c == 0x04 ? 1 : 0);
940 break;
941 default:
942 errfunc(ERR_PANIC,
943 "bizarre 8086 segment register received");
945 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
946 offset++;
947 break;
949 case 05:
950 case 07:
951 switch (ins->oprs[0].basereg) {
952 case R_FS:
953 bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0);
954 break;
955 case R_GS:
956 bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0);
957 break;
958 default:
959 errfunc(ERR_PANIC,
960 "bizarre 386 segment register received");
962 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
963 offset++;
964 break;
966 case 010:
967 case 011:
968 case 012:
969 if(ins->rex && (bits == 64)) { /* REX Supercedes all other Prefixes */
970 ins->rex = (ins->rex&0x0F)+0x40;
971 out(offset, segment, &ins->rex, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
972 ins->rex = 0;
973 offset += 1;
975 bytes[0] = *codes++ + ((regval(&ins->oprs[c - 010])) & 7);
976 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
977 offset += 1;
978 break;
980 case 017:
981 bytes[0] = 0;
982 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
983 offset += 1;
984 break;
986 case 014:
987 case 015:
988 case 016:
989 if (ins->oprs[c - 014].offset < -128
990 || ins->oprs[c - 014].offset > 127) {
991 errfunc(ERR_WARNING, "signed byte value exceeds bounds");
994 if (ins->oprs[c - 014].segment != NO_SEG) {
995 data = ins->oprs[c - 014].offset;
996 out(offset, segment, &data, OUT_ADDRESS + 1,
997 ins->oprs[c - 014].segment, ins->oprs[c - 014].wrt);
998 } else {
999 bytes[0] = ins->oprs[c - 014].offset;
1000 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1001 NO_SEG);
1003 offset += 1;
1004 break;
1006 case 020:
1007 case 021:
1008 case 022:
1009 if (ins->oprs[c - 020].offset < -256
1010 || ins->oprs[c - 020].offset > 255) {
1011 errfunc(ERR_WARNING, "byte value exceeds bounds");
1013 if (ins->oprs[c - 020].segment != NO_SEG) {
1014 data = ins->oprs[c - 020].offset;
1015 out(offset, segment, &data, OUT_ADDRESS + 1,
1016 ins->oprs[c - 020].segment, ins->oprs[c - 020].wrt);
1017 } else {
1018 bytes[0] = ins->oprs[c - 020].offset;
1019 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1020 NO_SEG);
1022 offset += 1;
1023 break;
1025 case 024:
1026 case 025:
1027 case 026:
1028 if (ins->oprs[c - 024].offset < 0
1029 || ins->oprs[c - 024].offset > 255)
1030 errfunc(ERR_WARNING, "unsigned byte value exceeds bounds");
1031 if (ins->oprs[c - 024].segment != NO_SEG) {
1032 data = ins->oprs[c - 024].offset;
1033 out(offset, segment, &data, OUT_ADDRESS + 1,
1034 ins->oprs[c - 024].segment, ins->oprs[c - 024].wrt);
1035 } else {
1036 bytes[0] = ins->oprs[c - 024].offset;
1037 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1038 NO_SEG);
1040 offset += 1;
1041 break;
1043 case 030:
1044 case 031:
1045 case 032:
1046 if (ins->oprs[c - 030].segment == NO_SEG &&
1047 ins->oprs[c - 030].wrt == NO_SEG &&
1048 (ins->oprs[c - 030].offset < -65536L ||
1049 ins->oprs[c - 030].offset > 65535L)) {
1050 errfunc(ERR_WARNING, "word value exceeds bounds");
1052 data = ins->oprs[c - 030].offset;
1053 out(offset, segment, &data, OUT_ADDRESS + 2,
1054 ins->oprs[c - 030].segment, ins->oprs[c - 030].wrt);
1055 offset += 2;
1056 break;
1058 case 034:
1059 case 035:
1060 case 036:
1061 if (ins->oprs[c - 034].type & (BITS16 | BITS32))
1062 size = (ins->oprs[c - 034].type & BITS16) ? 2 : 4;
1063 else
1064 size = (bits == 16) ? 2 : 4;
1065 data = ins->oprs[c - 034].offset;
1066 if (size == 2 && (data < -65536L || data > 65535L))
1067 errfunc(ERR_WARNING, "word value exceeds bounds");
1068 out(offset, segment, &data, OUT_ADDRESS + size,
1069 ins->oprs[c - 034].segment, ins->oprs[c - 034].wrt);
1070 offset += size;
1071 break;
1073 case 037:
1074 if (ins->oprs[0].segment == NO_SEG)
1075 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1076 " relocatable");
1077 data = 0L;
1078 out(offset, segment, &data, OUT_ADDRESS + 2,
1079 outfmt->segbase(1 + ins->oprs[0].segment),
1080 ins->oprs[0].wrt);
1081 offset += 2;
1082 break;
1084 case 040:
1085 case 041:
1086 case 042:
1087 data = ins->oprs[c - 040].offset;
1088 out(offset, segment, &data, OUT_ADDRESS + 4,
1089 ins->oprs[c - 040].segment, ins->oprs[c - 040].wrt);
1090 offset += 4;
1091 break;
1093 case 044:
1094 case 045:
1095 case 046:
1096 data = ins->oprs[c - 044].offset;
1097 size = ((ins->oprs[c - 044].addr_size ?
1098 ins->oprs[c - 044].addr_size : bits) >> 3);
1099 if (size == 2 && (data < -65536L || data > 65535L))
1100 errfunc(ERR_WARNING, "word value exceeds bounds");
1101 out(offset, segment, &data, OUT_ADDRESS + size,
1102 ins->oprs[c - 044].segment, ins->oprs[c - 044].wrt);
1103 offset += size;
1104 break;
1106 case 050:
1107 case 051:
1108 case 052:
1109 if (ins->oprs[c - 050].segment != segment)
1110 errfunc(ERR_NONFATAL,
1111 "short relative jump outside segment");
1112 data = ins->oprs[c - 050].offset - insn_end;
1113 if (data > 127 || data < -128)
1114 errfunc(ERR_NONFATAL, "short jump is out of range");
1115 bytes[0] = data;
1116 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1117 offset += 1;
1118 break;
1120 case 054:
1121 case 055:
1122 case 056:
1123 data = (int64_t)ins->oprs[c - 054].offset;
1124 out(offset, segment, &data, OUT_ADDRESS + 8,
1125 ins->oprs[c - 054].segment, ins->oprs[c - 054].wrt);
1126 offset += 8;
1127 break;
1129 case 060:
1130 case 061:
1131 case 062:
1132 if (ins->oprs[c - 060].segment != segment) {
1133 data = ins->oprs[c - 060].offset;
1134 out(offset, segment, &data,
1135 OUT_REL2ADR + insn_end - offset,
1136 ins->oprs[c - 060].segment, ins->oprs[c - 060].wrt);
1137 } else {
1138 data = ins->oprs[c - 060].offset - insn_end;
1139 out(offset, segment, &data,
1140 OUT_ADDRESS + 2, NO_SEG, NO_SEG);
1142 offset += 2;
1143 break;
1145 case 064:
1146 case 065:
1147 case 066:
1148 if (ins->oprs[c - 064].type & (BITS16 | BITS32 | BITS64))
1149 size = (ins->oprs[c - 064].type & BITS16) ? 2 : 4;
1150 else
1151 size = (bits == 16) ? 2 : 4;
1152 if (ins->oprs[c - 064].segment != segment) {
1153 int32_t reltype = (size == 2 ? OUT_REL2ADR : OUT_REL4ADR);
1154 data = ins->oprs[c - 064].offset;
1155 out(offset, segment, &data, reltype + insn_end - offset,
1156 ins->oprs[c - 064].segment, ins->oprs[c - 064].wrt);
1157 } else {
1158 data = ins->oprs[c - 064].offset - insn_end;
1159 out(offset, segment, &data,
1160 OUT_ADDRESS + size, NO_SEG, NO_SEG);
1162 offset += size;
1163 break;
1165 case 070:
1166 case 071:
1167 case 072:
1168 if (ins->oprs[c - 070].segment != segment) {
1169 data = ins->oprs[c - 070].offset;
1170 out(offset, segment, &data,
1171 OUT_REL4ADR + insn_end - offset,
1172 ins->oprs[c - 070].segment, ins->oprs[c - 070].wrt);
1173 } else {
1174 data = ins->oprs[c - 070].offset - insn_end;
1175 out(offset, segment, &data,
1176 OUT_ADDRESS + 4, NO_SEG, NO_SEG);
1178 offset += 4;
1179 break;
1181 case 0130:
1182 case 0131:
1183 case 0132:
1184 data = ins->oprs[c - 0130].offset;
1185 if (is_sbyte(ins, c - 0130, 16)) {
1186 bytes[0] = data;
1187 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1188 NO_SEG);
1189 offset++;
1190 } else {
1191 if (ins->oprs[c - 0130].segment == NO_SEG &&
1192 ins->oprs[c - 0130].wrt == NO_SEG &&
1193 (data < -65536L || data > 65535L)) {
1194 errfunc(ERR_WARNING, "word value exceeds bounds");
1196 out(offset, segment, &data, OUT_ADDRESS + 2,
1197 ins->oprs[c - 0130].segment, ins->oprs[c - 0130].wrt);
1198 offset += 2;
1200 break;
1202 case 0133:
1203 case 0134:
1204 case 0135:
1205 if(ins->rex && (bits == 64)) { /* REX Supercedes all other Prefixes */
1206 ins->rex = (ins->rex&0x0F)+0x40;
1207 out(offset, segment, &ins->rex, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1208 ins->rex = 0;
1209 offset += 1;
1211 codes++;
1212 bytes[0] = *codes++;
1213 if (is_sbyte(ins, c - 0133, 16))
1214 bytes[0] |= 2; /* s-bit */
1215 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1216 offset++;
1217 break;
1219 case 0140:
1220 case 0141:
1221 case 0142:
1222 data = ins->oprs[c - 0140].offset;
1223 if (is_sbyte(ins, c - 0140, 32)) {
1224 bytes[0] = data;
1225 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1226 NO_SEG);
1227 offset++;
1228 } else {
1229 out(offset, segment, &data, OUT_ADDRESS + 4,
1230 ins->oprs[c - 0140].segment, ins->oprs[c - 0140].wrt);
1231 offset += 4;
1233 break;
1235 case 0143:
1236 case 0144:
1237 case 0145:
1238 if(ins->rex && (bits == 64)) { /* REX Supercedes all other Prefixes */
1239 ins->rex = (ins->rex&0x0F)+0x40;
1240 out(offset, segment, &ins->rex, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1241 ins->rex = 0;
1242 offset += 1;
1244 codes++;
1245 bytes[0] = *codes++;
1246 if (is_sbyte(ins, c - 0143, 32))
1247 bytes[0] |= 2; /* s-bit */
1248 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1249 offset++;
1250 break;
1252 case 0300:
1253 case 0301:
1254 case 0302:
1255 if (chsize(&ins->oprs[c - 0300], bits)) {
1256 *bytes = 0x67;
1257 out(offset, segment, bytes,
1258 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1259 offset += 1;
1260 } else
1261 offset += 0;
1262 break;
1264 case 0310:
1265 if (bits != 16) {
1266 *bytes = 0x67;
1267 out(offset, segment, bytes,
1268 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1269 offset += 1;
1270 } else
1271 offset += 0;
1272 break;
1274 case 0311:
1275 if (bits != 32) {
1276 *bytes = 0x67;
1277 out(offset, segment, bytes,
1278 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1279 offset += 1;
1280 } else
1281 offset += 0;
1282 break;
1284 case 0312:
1285 break;
1287 case 0313:
1288 ins->rex = 0;
1289 break;
1291 case 0320:
1292 if (bits != 16) {
1293 *bytes = 0x66;
1294 out(offset, segment, bytes,
1295 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1296 offset += 1;
1297 } else
1298 offset += 0;
1299 break;
1301 case 0321:
1302 if (bits == 16) {
1303 *bytes = 0x66;
1304 out(offset, segment, bytes,
1305 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1306 offset += 1;
1307 } else
1308 offset += 0;
1309 break;
1311 case 0322:
1312 case 0323:
1313 break;
1315 case 0324:
1316 ins->rex |= 0xF8;
1317 break;
1319 case 0330:
1320 *bytes = *codes++ ^ condval[ins->condition];
1321 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1322 offset += 1;
1323 break;
1325 case 0331:
1326 case 0332:
1327 break;
1329 case 0333:
1330 *bytes = 0xF3;
1331 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1332 offset += 1;
1333 break;
1335 case 0340:
1336 case 0341:
1337 case 0342:
1338 if (ins->oprs[0].segment != NO_SEG)
1339 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1340 else {
1341 int32_t size = ins->oprs[0].offset << (c - 0340);
1342 if (size > 0)
1343 out(offset, segment, NULL,
1344 OUT_RESERVE + size, NO_SEG, NO_SEG);
1345 offset += size;
1347 break;
1349 case 0370:
1350 case 0371:
1351 case 0372:
1352 break;
1354 case 0373:
1355 *bytes = bits == 16 ? 3 : 5;
1356 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1357 offset += 1;
1358 break;
1360 default: /* can't do it by 'case' statements */
1361 if ( c >= 0100 && c <= 0277) { /* it's an EA */
1362 ea ea_data;
1363 int rfield;
1364 uint8_t *p;
1365 int32_t s;
1367 if (c <= 0177) /* pick rfield from operand b */
1368 rfield = regval(&ins->oprs[c & 7]);
1369 else /* rfield is constant */
1370 rfield = c & 7;
1372 if (!process_ea
1373 (&ins->oprs[(c >> 3) & 7], &ea_data, bits, rfield,
1374 ins->forw_ref)) {
1375 errfunc(ERR_NONFATAL, "invalid effective address");
1378 p = bytes;
1379 *p++ = ea_data.modrm;
1380 if (ea_data.sib_present)
1381 *p++ = ea_data.sib;
1383 s = p - bytes;
1384 out(offset, segment, bytes, OUT_RAWDATA + s,
1385 NO_SEG, NO_SEG);
1387 switch (ea_data.bytes) {
1388 case 0:
1389 break;
1390 case 1:
1391 if (ins->oprs[(c >> 3) & 7].segment != NO_SEG) {
1392 data = ins->oprs[(c >> 3) & 7].offset;
1393 out(offset, segment, &data, OUT_ADDRESS + 1,
1394 ins->oprs[(c >> 3) & 7].segment,
1395 ins->oprs[(c >> 3) & 7].wrt);
1396 } else {
1397 *bytes = ins->oprs[(c >> 3) & 7].offset;
1398 out(offset, segment, bytes, OUT_RAWDATA + 1,
1399 NO_SEG, NO_SEG);
1401 s++;
1402 break;
1403 case 8:
1404 case 2:
1405 case 4:
1406 data = ins->oprs[(c >> 3) & 7].offset;
1407 out(offset, segment, &data, /* RIP = Relative, not Absolute */
1408 (ea_data.rip ? OUT_REL4ADR : OUT_ADDRESS) + ea_data.bytes,
1409 ins->oprs[(c >> 3) & 7].segment,
1410 ins->oprs[(c >> 3) & 7].wrt);
1411 s += ea_data.bytes;
1412 break;
1414 offset += s;
1415 } else
1416 errfunc(ERR_PANIC, "internal instruction table corrupt"
1417 ": instruction code 0x%02X given", c);
1421 static int regval(operand * o)
1423 if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1424 errfunc(ERR_PANIC, "invalid operand passed to regval()");
1426 return regvals[o->basereg];
1429 static int matches(struct itemplate *itemp, insn * instruction, int bits)
1431 int i, b, x, size[3], asize, oprs, ret;
1433 ret = 100;
1436 * Check the opcode
1438 if (itemp->opcode != instruction->opcode)
1439 return 0;
1442 * Count the operands
1444 if (itemp->operands != instruction->operands)
1445 return 0;
1448 * Check that no spurious colons or TOs are present
1450 for (i = 0; i < itemp->operands; i++)
1451 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1452 return 0;
1455 * Check that the operand flags all match up
1457 for (i = 0; i < itemp->operands; i++)
1458 if (itemp->opd[i] & ~instruction->oprs[i].type ||
1459 ((itemp->opd[i] & SIZE_MASK) &&
1460 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
1461 if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
1462 (instruction->oprs[i].type & SIZE_MASK))
1463 return 0;
1464 else
1465 return 1;
1469 * Check operand sizes
1471 if (itemp->flags & IF_ARMASK) {
1472 size[0] = size[1] = size[2] = 0;
1474 switch (itemp->flags & IF_ARMASK) {
1475 case IF_AR0:
1476 i = 0;
1477 break;
1478 case IF_AR1:
1479 i = 1;
1480 break;
1481 case IF_AR2:
1482 i = 2;
1483 break;
1484 default:
1485 break; /* Shouldn't happen */
1487 if (itemp->flags & IF_SB) {
1488 size[i] = BITS8;
1489 } else if (itemp->flags & IF_SW) {
1490 size[i] = BITS16;
1491 } else if (itemp->flags & IF_SD) {
1492 size[i] = BITS32;
1493 } else if (itemp->flags & IF_SQ) {
1494 if (bits != 64)
1495 return 2;
1496 size[i] = BITS64;
1498 } else {
1499 asize = 0;
1500 if (itemp->flags & IF_SB) {
1501 asize = BITS8;
1502 oprs = itemp->operands;
1503 } else if (itemp->flags & IF_SW) {
1504 asize = BITS16;
1505 oprs = itemp->operands;
1506 } else if (itemp->flags & IF_SD) {
1507 asize = BITS32;
1508 oprs = itemp->operands;
1509 } else if (itemp->flags & IF_SQ) {
1510 if (bits != 64)
1511 return 2;
1512 asize = BITS64;
1513 oprs = itemp->operands;
1515 size[0] = size[1] = size[2] = asize;
1518 if (itemp->flags & (IF_SM | IF_SM2)) {
1519 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
1520 asize = 0;
1521 for (i = 0; i < oprs; i++) {
1522 if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
1523 int j;
1524 for (j = 0; j < oprs; j++)
1525 size[j] = asize;
1526 break;
1529 } else {
1530 oprs = itemp->operands;
1533 for (i = 0; i < itemp->operands; i++) {
1534 if (!(itemp->opd[i] & SIZE_MASK) &&
1535 (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
1536 return 2;
1538 if ( (((itemp->opd[i] & SIZE_MASK) == BITS64) ||
1539 ((instruction->oprs[i].type & SIZE_MASK) == BITS64))
1540 && bits != 64)
1541 return 2;
1543 x = instruction->oprs[i].indexreg;
1544 b = instruction->oprs[i].basereg;
1546 if (x != -1 && x >= EXPR_REG_START && x < REG_ENUM_LIMIT)
1547 x = regvals[x];
1548 if (b != -1 && b >= EXPR_REG_START && b < REG_ENUM_LIMIT)
1549 b = regvals[b];
1551 if (((b >= 0400 && b <= 0500) || (x >= 0400 && x < 0500)) && bits != 64)
1552 return 2;
1556 * Check template is okay at the set cpu level
1558 if (((itemp->flags & IF_PLEVEL) > cpu))
1559 return 3;
1562 * Check if instruction is available in long mode
1564 if ((itemp->flags & IF_NOLONG) && (bits == 64))
1565 return 4;
1568 * Check if special handling needed for Jumps
1570 if ((uint8_t)(itemp->code[0]) >= 0370)
1571 return 99;
1573 return ret;
1576 static ea *process_ea(operand * input, ea * output, int addrbits,
1577 int rfield, int forw_ref)
1580 int rip = FALSE; /* Used for RIP-relative addressing */
1583 if (!(REGISTER & ~input->type)) { /* register direct */
1584 int i;
1585 if ( input->basereg < EXPR_REG_START /* Verify as Register */
1586 || input->basereg >= REG_ENUM_LIMIT)
1587 return NULL;
1588 i = regvals[input->basereg];
1589 if ( i >= 0100 && i < 0210) /* GPR's, MMX & XMM only */
1590 return NULL;
1592 if (i >= 0400 && i < 0500) { /* Calculate REX.B */
1593 if (i < 0410 || (i >= 0440 && i < 0450))
1594 output->rex |= 0xF0; /* Set REX.0 */
1595 else
1596 output->rex |= 0xF1; /* Set REX.B */
1597 if (i >= 0440)
1598 output->rex |= 0xF8; /* Set REX.W */
1601 if ((rfield >= 0400 && rfield < 0500) || /* Calculate REX.R */
1602 (rfield >= 0120 && rfield < 0200 && /* Include CR/DR/TR... */
1603 !(rfield & 0010))) { /* ... extensions, only */
1604 if ((rfield >= 0400 && rfield < 0410) || (rfield >= 0440 && rfield < 0450))
1605 output->rex |= 0xF0; /* Set REX.0 */
1606 else
1607 output->rex |= 0xF4; /* Set REX.R */
1608 if (rfield >= 0440)
1609 output->rex |= 0xF8; /* Set REX.W */
1612 output->sib_present = FALSE; /* no SIB necessary */
1613 output->bytes = 0; /* no offset necessary either */
1614 output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
1615 } else { /* it's a memory reference */
1617 if (input->basereg == -1
1618 && (input->indexreg == -1 || input->scale == 0)) {
1619 /* it's a pure offset */
1620 if (input->addr_size)
1621 addrbits = input->addr_size;
1623 if (rfield >= 0400 && rfield < 0500) { /* Calculate REX.R */
1624 if (rfield < 0410 || (rfield >= 0440 && rfield < 0450))
1625 output->rex |= 0xF0; /* Set REX.0 */
1626 else
1627 output->rex |= 0xF4; /* Set REX.R */
1628 if (rfield >= 0440)
1629 output->rex |= 0xF8; /* Set REX.W */
1632 output->sib_present = FALSE;
1633 output->bytes = (addrbits != 16 ? 4 : 2);
1634 output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
1635 } else { /* it's an indirection */
1636 int i = input->indexreg, b = input->basereg, s = input->scale;
1637 int32_t o = input->offset, seg = input->segment;
1638 int hb = input->hintbase, ht = input->hinttype;
1639 int t;
1640 int it, bt;
1642 if (s == 0)
1643 i = -1; /* make this easy, at least */
1645 if (i != -1 && i >= EXPR_REG_START
1646 && i < REG_ENUM_LIMIT)
1647 it = regvals[i];
1648 else
1649 it = -1;
1651 if (b != -1 && b >= EXPR_REG_START
1652 && b < REG_ENUM_LIMIT)
1653 bt = regvals[b];
1654 else
1655 bt = -1;
1657 /* check for a 32/64-bit memory reference... */
1658 if ((it >= 0020 && it < 0030) || (it >= 0430 && it < 0460) ||
1659 (bt >= 0020 && bt < 0030) || (bt >= 0430 && bt < 0460) ||
1660 bt == 0500) {
1661 /* it must be a 32/64-bit memory reference. Firstly we have
1662 * to check that all registers involved are type E/Rxx. */
1663 t = 1;
1664 if (it != -1) {
1665 if (it < 0020 || (it >= 0030 && it < 0430) || it >= 0460)
1666 return NULL;
1667 if (it >= 0440)
1668 t = 2;
1669 else
1670 t = 0;
1673 if (bt != -1) {
1674 if (bt < 0020 || (bt >= 0030 && bt < 0430) || (bt >= 0460 && bt < 0500))
1675 return NULL;
1676 if (bt == 0500) {
1677 bt = b = -1;
1678 rip = TRUE;
1679 } else if (bt >= 0440) {
1680 if (t < 1)
1681 return NULL;
1682 } else {
1683 if (t > 1)
1684 return NULL;
1688 /* While we're here, ensure the user didn't specify WORD. */
1689 if (input->addr_size == 16)
1690 return NULL;
1692 /* now reorganize base/index */
1693 if (s == 1 && bt != it && bt != -1 && it != -1 &&
1694 ((hb == bt && ht == EAH_NOTBASE)
1695 || (hb == it && ht == EAH_MAKEBASE)))
1696 t = bt, bt = it, it = t; /* swap if hints say so */
1697 if (bt == it) /* convert EAX+2*EAX to 3*EAX */
1698 bt = -1, s++;
1699 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE))
1700 bt = i, it = -1; /* make single reg base, unless hint */
1701 if (((s == 2 && (it & 7) != (REG_NUM_ESP & 7)
1702 && !(input->eaflags & EAF_TIMESTWO)) || s == 3
1703 || s == 5 || s == 9) && bt == -1)
1704 bt = it, s--; /* convert 3*EAX to EAX+2*EAX */
1705 if (it == -1 && (bt & 7) != (REG_NUM_ESP & 7)
1706 && (input->eaflags & EAF_TIMESTWO))
1707 it = bt, bt = -1, s = 1;
1708 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
1709 if (s == 1 && (it & 7) == (REG_NUM_ESP & 7)) /* swap ESP into base if scale is 1 */
1710 t = it, it = bt, bt = t;
1711 if ((it & 7) == (REG_NUM_ESP & 7)
1712 || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
1713 return NULL; /* wrong, for various reasons */
1715 if (i >= 0400 && i < 0500) { /* Calculate REX.X */
1716 if (i < 0410 || (i >= 0440 && i < 0450))
1717 output->rex |= 0xF0; /* Set REX.0 */
1718 else
1719 output->rex |= 0xF2; /* Set REX.X */
1720 if (i >= 0440)
1721 output->rex |= 0xF8; /* Set REX.W */
1724 if (b >= 0400 && b < 0500) { /* Calculate REX.B */
1725 if (b < 0410 || (b >= 0440 && b < 0450))
1726 output->rex |= 0xF0; /* Set REX.0 */
1727 else
1728 output->rex |= 0xF1; /* Set REX.B */
1729 if (b >= 0440)
1730 output->rex |= 0xF8; /* Set REX.W */
1733 if (rfield >= 0400 && rfield < 0500) { /* Calculate REX.R */
1734 if (rfield < 0410 || (rfield >= 0440 && rfield < 0450))
1735 output->rex |= 0xF0; /* Set REX.0 */
1736 else
1737 output->rex |= 0xF4; /* Set REX.R */
1738 if (rfield >= 0440)
1739 output->rex |= 0xF8; /* Set REX.W */
1742 if (it == -1 && (bt & 7) != (REG_NUM_ESP & 7)) { /* no SIB needed */
1743 int mod, rm;
1745 if (bt == -1) {
1746 rm = 5;
1747 mod = 0;
1748 } else {
1749 rm = (bt & 7);
1750 if (rm != (REG_NUM_EBP & 7) && o == 0 &&
1751 seg == NO_SEG && !forw_ref &&
1752 !(input->eaflags &
1753 (EAF_BYTEOFFS | EAF_WORDOFFS)))
1754 mod = 0;
1755 else if (input->eaflags & EAF_BYTEOFFS ||
1756 (o >= -128 && o <= 127 && seg == NO_SEG
1757 && !forw_ref
1758 && !(input->eaflags & EAF_WORDOFFS)))
1759 mod = 1;
1760 else
1761 mod = 2;
1764 output->sib_present = FALSE;
1765 output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
1766 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
1767 } else { /* we need a SIB */
1768 int mod, scale, index, base;
1770 if (it == -1)
1771 index = 4, s = 1;
1772 else
1773 index = (it & 7);
1775 switch (s) {
1776 case 1:
1777 scale = 0;
1778 break;
1779 case 2:
1780 scale = 1;
1781 break;
1782 case 4:
1783 scale = 2;
1784 break;
1785 case 8:
1786 scale = 3;
1787 break;
1788 default: /* then what the smeg is it? */
1789 return NULL; /* panic */
1792 if (bt == -1) {
1793 base = 5;
1794 mod = 0;
1795 } else {
1796 base = (bt & 7);
1797 if (base != (REG_NUM_EBP & 7) && o == 0 &&
1798 seg == NO_SEG && !forw_ref &&
1799 !(input->eaflags &
1800 (EAF_BYTEOFFS | EAF_WORDOFFS)))
1801 mod = 0;
1802 else if (input->eaflags & EAF_BYTEOFFS ||
1803 (o >= -128 && o <= 127 && seg == NO_SEG
1804 && !forw_ref
1805 && !(input->eaflags & EAF_WORDOFFS)))
1806 mod = 1;
1807 else
1808 mod = 2;
1811 output->sib_present = TRUE;
1812 output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
1813 output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
1814 output->sib = (scale << 6) | (index << 3) | base;
1816 } else { /* it's 16-bit */
1817 int mod, rm;
1819 /* check for 64-bit long mode */
1820 if (addrbits == 64)
1821 return NULL;
1823 /* check all registers are BX, BP, SI or DI */
1824 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
1825 && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
1826 && i != R_SI && i != R_DI))
1827 return NULL;
1829 /* ensure the user didn't specify DWORD/QWORD */
1830 if (input->addr_size == 32 || input->addr_size == 64)
1831 return NULL;
1833 if (s != 1 && i != -1)
1834 return NULL; /* no can do, in 16-bit EA */
1835 if (b == -1 && i != -1) {
1836 int tmp = b;
1837 b = i;
1838 i = tmp;
1839 } /* swap */
1840 if ((b == R_SI || b == R_DI) && i != -1) {
1841 int tmp = b;
1842 b = i;
1843 i = tmp;
1845 /* have BX/BP as base, SI/DI index */
1846 if (b == i)
1847 return NULL; /* shouldn't ever happen, in theory */
1848 if (i != -1 && b != -1 &&
1849 (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
1850 return NULL; /* invalid combinations */
1851 if (b == -1) /* pure offset: handled above */
1852 return NULL; /* so if it gets to here, panic! */
1854 rm = -1;
1855 if (i != -1)
1856 switch (i * 256 + b) {
1857 case R_SI * 256 + R_BX:
1858 rm = 0;
1859 break;
1860 case R_DI * 256 + R_BX:
1861 rm = 1;
1862 break;
1863 case R_SI * 256 + R_BP:
1864 rm = 2;
1865 break;
1866 case R_DI * 256 + R_BP:
1867 rm = 3;
1868 break;
1869 } else
1870 switch (b) {
1871 case R_SI:
1872 rm = 4;
1873 break;
1874 case R_DI:
1875 rm = 5;
1876 break;
1877 case R_BP:
1878 rm = 6;
1879 break;
1880 case R_BX:
1881 rm = 7;
1882 break;
1884 if (rm == -1) /* can't happen, in theory */
1885 return NULL; /* so panic if it does */
1887 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
1888 !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
1889 mod = 0;
1890 else if (input->eaflags & EAF_BYTEOFFS ||
1891 (o >= -128 && o <= 127 && seg == NO_SEG
1892 && !forw_ref
1893 && !(input->eaflags & EAF_WORDOFFS)))
1894 mod = 1;
1895 else
1896 mod = 2;
1898 output->sib_present = FALSE; /* no SIB - it's 16-bit */
1899 output->bytes = mod; /* bytes of offset needed */
1900 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
1905 /* Process RIP-relative Addressing */
1906 if (rip) {
1907 if ((output->modrm & 0xC7) != 0x05)
1908 return NULL;
1909 output->rip = TRUE;
1910 } else {
1911 output->rip = FALSE;
1912 if (globalbits == 64 && /* Actual Disp32 needs blank SIB on x64 */
1913 !(output->sib_present) && ((output->modrm & 0xC7) == 0x05)) {
1914 output->sib_present = TRUE;
1915 output->modrm --; /* RM Field = 4 (forward to Base of SIB) */
1916 output->sib = (4 << 3) | 5; /* Index = 4 (none), Base = 5 */
1919 output->size = 1 + output->sib_present + output->bytes;
1920 return output;
1923 static int chsize(operand * input, int addrbits)
1925 if (!(MEMORY & ~input->type)) {
1926 int i, b;
1928 if ( input->indexreg < EXPR_REG_START /* Verify as Register */
1929 || input->indexreg >= REG_ENUM_LIMIT)
1930 i = -1;
1931 else
1932 i = regvals[input->indexreg];
1934 if ( input->basereg < EXPR_REG_START /* Verify as Register */
1935 || input->basereg >= REG_ENUM_LIMIT)
1936 b = -1;
1937 else
1938 b = regvals[input->basereg];
1940 if (input->scale == 0)
1941 i = -1;
1943 if (i == -1 && b == -1) /* pure offset */
1944 return (input->addr_size != 0 && input->addr_size != addrbits);
1946 if ((i >= 0020 && i < 0030) || (i >= 0430 && i < 0440) ||
1947 (b >= 0020 && b < 0030) || (b >= 0430 && b < 0440))
1948 return (addrbits != 32);
1949 else
1950 return (addrbits == 32);
1951 } else
1952 return 0;