Exhaustive test for 8-bit floating point values
[nasm.git] / assemble.c
blob9ef917327e13d7ebd7cf8397a62566eba300fcdc
1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
14 * on operand 0
15 * \10..\13 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0..3
17 * \14..\17 - a signed byte immediate operand, from operand 0..3
18 * \20..\23 - a byte immediate operand, from operand 0..3
19 * \24..\27 - an unsigned byte immediate operand, from operand 0..3
20 * \30..\33 - a word immediate operand, from operand 0..3
21 * \34..\37 - select between \3[0-3] and \4[0-3] depending on 16/32 bit
22 * assembly mode or the operand-size override on the operand
23 * \40..\43 - a long immediate operand, from operand 0..3
24 * \44..\47 - select between \3[0-3], \4[0-3] and \5[4-7]
25 * depending on the address size of the instruction.
26 * \50..\53 - a byte relative operand, from operand 0..3
27 * \54..\57 - a qword immediate operand, from operand 0..3
28 * \60..\63 - a word relative operand, from operand 0..3
29 * \64..\67 - select between \6[0-3] and \7[0-3] depending on 16/32 bit
30 * assembly mode or the operand-size override on the operand
31 * \70..\73 - a long relative operand, from operand 0..3
32 * \74..\77 - a word constant, from the _segment_ part of operand 0..3
33 * \1ab - a ModRM, calculated on EA in operand a, with the spare
34 * field the register value of operand b.
35 * \140..\143 - an immediate word or signed byte for operand 0..3
36 * \144..\147 - or 2 (s-field) into next opcode byte if operand 0..3
37 * is a signed byte rather than a word.
38 * \150..\153 - an immediate dword or signed byte for operand 0..3
39 * \154..\157 - or 2 (s-field) into next opcode byte if operand 0..3
40 * is a signed byte rather than a dword.
41 * \160..\163 - this instruction uses DREX rather than REX, with the
42 * OC0 field set to 0, and the dest field taken from
43 * operand 0..3.
44 * \164..\167 - this instruction uses DREX rather than REX, with the
45 * OC0 field set to 1, and the dest field taken from
46 * operand 0..3.
47 * \170 - encodes the literal byte 0. (Some compilers don't take
48 * kindly to a zero byte in the _middle_ of a compile time
49 * string constant, so I had to put this hack in.)
50 * \171 - placement of DREX suffix in the absence of an EA
51 * \2ab - a ModRM, calculated on EA in operand a, with the spare
52 * field equal to digit b.
53 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
54 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
55 * \312 - (disassembler only) marker on LOOP, LOOPxx instructions.
56 * \313 - indicates fixed 64-bit address size, 0x67 invalid.
57 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
58 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
59 * \322 - indicates that this instruction is only valid when the
60 * operand size is the default (instruction to disassembler,
61 * generates no code in the assembler)
62 * \323 - indicates fixed 64-bit operand size, REX on extensions only.
63 * \324 - indicates 64-bit operand size requiring REX prefix.
64 * \330 - a literal byte follows in the code stream, to be added
65 * to the condition code value of the instruction.
66 * \331 - instruction not valid with REP prefix. Hint for
67 * disassembler only; for SSE instructions.
68 * \332 - REP prefix (0xF2 byte) used as opcode extension.
69 * \333 - REP prefix (0xF3 byte) used as opcode extension.
70 * \334 - LOCK prefix used instead of REX.R
71 * \335 - disassemble a rep (0xF3 byte) prefix as repe not rep.
72 * \340 - reserve <operand 0> bytes of uninitialized storage.
73 * Operand 0 had better be a segmentless constant.
74 * \364 - operand-size prefix (0x66) not permitted
75 * \365 - address-size prefix (0x67) not permitted
76 * \366 - operand-size prefix (0x66) used as opcode extension
77 * \367 - address-size prefix (0x67) used as opcode extension
78 * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
79 * 370 is used for Jcc, 371 is used for JMP.
80 * \373 - assemble 0x03 if bits==16, 0x05 if bits==32;
81 * used for conditional jump over longer jump
84 #include "compiler.h"
86 #include <stdio.h>
87 #include <string.h>
88 #include <inttypes.h>
90 #include "nasm.h"
91 #include "nasmlib.h"
92 #include "assemble.h"
93 #include "insns.h"
94 #include "preproc.h"
95 #include "regflags.c"
96 #include "regvals.c"
98 typedef struct {
99 int sib_present; /* is a SIB byte necessary? */
100 int bytes; /* # of bytes of offset needed */
101 int size; /* lazy - this is sib+bytes+1 */
102 uint8_t modrm, sib, rex, rip; /* the bytes themselves */
103 } ea;
105 static uint32_t cpu; /* cpu level received from nasm.c */
106 static efunc errfunc;
107 static struct ofmt *outfmt;
108 static ListGen *list;
110 static int32_t calcsize(int32_t, int32_t, int, insn *, const char *);
111 static void gencode(int32_t, int32_t, int, insn *, const char *, int32_t);
112 static int matches(const struct itemplate *, insn *, int bits);
113 static int32_t regflag(const operand *);
114 static int32_t regval(const operand *);
115 static int rexflags(int, int32_t, int);
116 static int op_rexflags(const operand *, int);
117 static ea *process_ea(operand *, ea *, int, int, int, int32_t, int);
118 static void add_asp(insn *, int);
120 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
122 return ins->prefixes[pos] == prefix;
125 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
127 if (ins->prefixes[pos])
128 errfunc(ERR_NONFATAL, "invalid %s prefix",
129 prefix_name(ins->prefixes[pos]));
132 static const char *size_name(int size)
134 switch (size) {
135 case 1:
136 return "byte";
137 case 2:
138 return "word";
139 case 4:
140 return "dword";
141 case 8:
142 return "qword";
143 case 10:
144 return "tword";
145 case 16:
146 return "oword";
147 default:
148 return "???";
152 static void warn_overflow(int size, int64_t data)
154 if (size < 8) {
155 int64_t lim = (1 << (size*8))-1;
157 if (data < ~lim || data > lim)
158 errfunc(ERR_WARNING, "%s data exceeds bounds", size_name(size));
162 * This routine wrappers the real output format's output routine,
163 * in order to pass a copy of the data off to the listing file
164 * generator at the same time.
166 static void out(int32_t offset, int32_t segto, const void *data,
167 uint32_t type, int32_t segment, int32_t wrt)
169 static int32_t lineno = 0; /* static!!! */
170 static char *lnfname = NULL;
172 if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
173 if (segment != NO_SEG || wrt != NO_SEG) {
175 * This address is relocated. We must write it as
176 * OUT_ADDRESS, so there's no work to be done here.
178 list->output(offset, data, type);
179 } else {
180 uint8_t p[8], *q = p;
182 * This is a non-relocated address, and we're going to
183 * convert it into RAWDATA format.
185 if ((type & OUT_SIZMASK) == 4) {
186 WRITELONG(q, *(int32_t *)data);
187 list->output(offset, p, OUT_RAWDATA + 4);
188 } else if ((type & OUT_SIZMASK) == 8) {
189 WRITEDLONG(q, *(int64_t *)data);
190 list->output(offset, p, OUT_RAWDATA + 8);
191 } else {
192 WRITESHORT(q, *(int32_t *)data);
193 list->output(offset, p, OUT_RAWDATA + 2);
196 } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
197 list->output(offset, data, type);
198 } else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
199 list->output(offset, NULL, type);
200 } else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
201 (type & OUT_TYPMASK) == OUT_REL4ADR) {
202 list->output(offset, data, type);
206 * this call to src_get determines when we call the
207 * debug-format-specific "linenum" function
208 * it updates lineno and lnfname to the current values
209 * returning 0 if "same as last time", -2 if lnfname
210 * changed, and the amount by which lineno changed,
211 * if it did. thus, these variables must be static
214 if (src_get(&lineno, &lnfname)) {
215 outfmt->current_dfmt->linenum(lnfname, lineno, segto);
218 outfmt->output(segto, data, type, segment, wrt);
221 static int jmp_match(int32_t segment, int32_t offset, int bits,
222 insn * ins, const char *code)
224 int32_t isize;
225 uint8_t c = code[0];
227 if (c != 0370 && c != 0371)
228 return 0;
229 if (ins->oprs[0].opflags & OPFLAG_FORWARD) {
230 if ((optimizing < 0 || (ins->oprs[0].type & STRICT))
231 && c == 0370)
232 return 1;
233 else
234 return (pass0 == 0); /* match a forward reference */
236 isize = calcsize(segment, offset, bits, ins, code);
237 if (ins->oprs[0].segment != segment)
238 return 0;
239 isize = ins->oprs[0].offset - offset - isize; /* isize is now the delta */
240 if (isize >= -128L && isize <= 127L)
241 return 1; /* it is byte size */
243 return 0;
246 int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
247 insn * instruction, struct ofmt *output, efunc error,
248 ListGen * listgen)
250 const struct itemplate *temp;
251 int j;
252 int size_prob;
253 int32_t insn_end;
254 int32_t itimes;
255 int32_t start = offset;
256 int32_t wsize = 0; /* size for DB etc. */
258 errfunc = error; /* to pass to other functions */
259 cpu = cp;
260 outfmt = output; /* likewise */
261 list = listgen; /* and again */
263 switch (instruction->opcode) {
264 case -1:
265 return 0;
266 case I_DB:
267 wsize = 1;
268 break;
269 case I_DW:
270 wsize = 2;
271 break;
272 case I_DD:
273 wsize = 4;
274 break;
275 case I_DQ:
276 wsize = 8;
277 break;
278 case I_DT:
279 wsize = 10;
280 break;
281 case I_DO:
282 wsize = 16;
283 break;
284 default:
285 break;
288 if (wsize) {
289 extop *e;
290 int32_t t = instruction->times;
291 if (t < 0)
292 errfunc(ERR_PANIC,
293 "instruction->times < 0 (%ld) in assemble()", t);
295 while (t--) { /* repeat TIMES times */
296 for (e = instruction->eops; e; e = e->next) {
297 if (e->type == EOT_DB_NUMBER) {
298 if (wsize == 1) {
299 if (e->segment != NO_SEG)
300 errfunc(ERR_NONFATAL,
301 "one-byte relocation attempted");
302 else {
303 uint8_t out_byte = e->offset;
304 out(offset, segment, &out_byte,
305 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
307 } else if (wsize > 8) {
308 errfunc(ERR_NONFATAL, "integer supplied to a DT or DO"
309 " instruction");
310 } else
311 out(offset, segment, &e->offset,
312 OUT_ADDRESS + wsize, e->segment, e->wrt);
313 offset += wsize;
314 } else if (e->type == EOT_DB_STRING) {
315 int align;
317 out(offset, segment, e->stringval,
318 OUT_RAWDATA + e->stringlen, NO_SEG, NO_SEG);
319 align = e->stringlen % wsize;
321 if (align) {
322 align = wsize - align;
323 out(offset, segment, "\0\0\0\0\0\0\0\0",
324 OUT_RAWDATA + align, NO_SEG, NO_SEG);
326 offset += e->stringlen + align;
329 if (t > 0 && t == instruction->times - 1) {
331 * Dummy call to list->output to give the offset to the
332 * listing module.
334 list->output(offset, NULL, OUT_RAWDATA);
335 list->uplevel(LIST_TIMES);
338 if (instruction->times > 1)
339 list->downlevel(LIST_TIMES);
340 return offset - start;
343 if (instruction->opcode == I_INCBIN) {
344 static char fname[FILENAME_MAX];
345 FILE *fp;
346 int32_t len;
347 char *prefix = "", *combine;
348 char **pPrevPath = NULL;
350 len = FILENAME_MAX - 1;
351 if (len > instruction->eops->stringlen)
352 len = instruction->eops->stringlen;
353 strncpy(fname, instruction->eops->stringval, len);
354 fname[len] = '\0';
356 while (1) { /* added by alexfru: 'incbin' uses include paths */
357 combine = nasm_malloc(strlen(prefix) + len + 1);
358 strcpy(combine, prefix);
359 strcat(combine, fname);
361 if ((fp = fopen(combine, "rb")) != NULL) {
362 nasm_free(combine);
363 break;
366 nasm_free(combine);
367 pPrevPath = pp_get_include_path_ptr(pPrevPath);
368 if (pPrevPath == NULL)
369 break;
370 prefix = *pPrevPath;
373 if (fp == NULL)
374 error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
375 fname);
376 else if (fseek(fp, 0L, SEEK_END) < 0)
377 error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
378 fname);
379 else {
380 static char buf[2048];
381 int32_t t = instruction->times;
382 int32_t base = 0;
384 len = ftell(fp);
385 if (instruction->eops->next) {
386 base = instruction->eops->next->offset;
387 len -= base;
388 if (instruction->eops->next->next &&
389 len > instruction->eops->next->next->offset)
390 len = instruction->eops->next->next->offset;
393 * Dummy call to list->output to give the offset to the
394 * listing module.
396 list->output(offset, NULL, OUT_RAWDATA);
397 list->uplevel(LIST_INCBIN);
398 while (t--) {
399 int32_t l;
401 fseek(fp, base, SEEK_SET);
402 l = len;
403 while (l > 0) {
404 int32_t m =
405 fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
406 fp);
407 if (!m) {
409 * This shouldn't happen unless the file
410 * actually changes while we are reading
411 * it.
413 error(ERR_NONFATAL,
414 "`incbin': unexpected EOF while"
415 " reading file `%s'", fname);
416 t = 0; /* Try to exit cleanly */
417 break;
419 out(offset, segment, buf, OUT_RAWDATA + m,
420 NO_SEG, NO_SEG);
421 l -= m;
424 list->downlevel(LIST_INCBIN);
425 if (instruction->times > 1) {
427 * Dummy call to list->output to give the offset to the
428 * listing module.
430 list->output(offset, NULL, OUT_RAWDATA);
431 list->uplevel(LIST_TIMES);
432 list->downlevel(LIST_TIMES);
434 fclose(fp);
435 return instruction->times * len;
437 return 0; /* if we're here, there's an error */
440 /* Check to see if we need an address-size prefix */
441 add_asp(instruction, bits);
443 size_prob = false;
445 for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
446 int m = matches(temp, instruction, bits);
448 if (m == 99)
449 m += jmp_match(segment, offset, bits, instruction, temp->code);
451 if (m == 100) { /* matches! */
452 const char *codes = temp->code;
453 int32_t insn_size = calcsize(segment, offset, bits,
454 instruction, codes);
455 itimes = instruction->times;
456 if (insn_size < 0) /* shouldn't be, on pass two */
457 error(ERR_PANIC, "errors made it through from pass one");
458 else
459 while (itimes--) {
460 for (j = 0; j < MAXPREFIX; j++) {
461 uint8_t c = 0;
462 switch (instruction->prefixes[j]) {
463 case P_LOCK:
464 c = 0xF0;
465 break;
466 case P_REPNE:
467 case P_REPNZ:
468 c = 0xF2;
469 break;
470 case P_REPE:
471 case P_REPZ:
472 case P_REP:
473 c = 0xF3;
474 break;
475 case R_CS:
476 if (bits == 64) {
477 error(ERR_WARNING,
478 "cs segment base ignored in 64-bit mode");
480 c = 0x2E;
481 break;
482 case R_DS:
483 if (bits == 64) {
484 error(ERR_WARNING,
485 "ds segment base ignored in 64-bit mode");
487 c = 0x3E;
488 break;
489 case R_ES:
490 if (bits == 64) {
491 error(ERR_WARNING,
492 "es segment base ignored in 64-bit mode");
494 c = 0x26;
495 break;
496 case R_FS:
497 c = 0x64;
498 break;
499 case R_GS:
500 c = 0x65;
501 break;
502 case R_SS:
503 if (bits == 64) {
504 error(ERR_WARNING,
505 "ss segment base ignored in 64-bit mode");
507 c = 0x36;
508 break;
509 case R_SEGR6:
510 case R_SEGR7:
511 error(ERR_NONFATAL,
512 "segr6 and segr7 cannot be used as prefixes");
513 break;
514 case P_A16:
515 if (bits == 64) {
516 error(ERR_NONFATAL,
517 "16-bit addressing is not supported "
518 "in 64-bit mode");
519 } else if (bits != 16)
520 c = 0x67;
521 break;
522 case P_A32:
523 if (bits != 32)
524 c = 0x67;
525 break;
526 case P_A64:
527 if (bits != 64) {
528 error(ERR_NONFATAL,
529 "64-bit addressing is only supported "
530 "in 64-bit mode");
532 break;
533 case P_ASP:
534 c = 0x67;
535 break;
536 case P_O16:
537 if (bits != 16)
538 c = 0x66;
539 break;
540 case P_O32:
541 if (bits == 16)
542 c = 0x66;
543 break;
544 case P_O64:
545 /* REX.W */
546 break;
547 case P_OSP:
548 c = 0x66;
549 break;
550 case P_none:
551 break;
552 default:
553 error(ERR_PANIC, "invalid instruction prefix");
555 if (c != 0) {
556 out(offset, segment, &c, OUT_RAWDATA + 1,
557 NO_SEG, NO_SEG);
558 offset++;
561 insn_end = offset + insn_size;
562 gencode(segment, offset, bits, instruction, codes,
563 insn_end);
564 offset += insn_size;
565 if (itimes > 0 && itimes == instruction->times - 1) {
567 * Dummy call to list->output to give the offset to the
568 * listing module.
570 list->output(offset, NULL, OUT_RAWDATA);
571 list->uplevel(LIST_TIMES);
574 if (instruction->times > 1)
575 list->downlevel(LIST_TIMES);
576 return offset - start;
577 } else if (m > 0 && m > size_prob) {
578 size_prob = m;
580 // temp++;
583 if (temp->opcode == -1) { /* didn't match any instruction */
584 switch (size_prob) {
585 case 1:
586 error(ERR_NONFATAL, "operation size not specified");
587 break;
588 case 2:
589 error(ERR_NONFATAL, "mismatch in operand sizes");
590 break;
591 case 3:
592 error(ERR_NONFATAL, "no instruction for this cpu level");
593 break;
594 case 4:
595 error(ERR_NONFATAL, "instruction not supported in 64-bit mode");
596 break;
597 default:
598 error(ERR_NONFATAL,
599 "invalid combination of opcode and operands");
600 break;
603 return 0;
606 int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp,
607 insn * instruction, efunc error)
609 const struct itemplate *temp;
611 errfunc = error; /* to pass to other functions */
612 cpu = cp;
614 if (instruction->opcode == -1)
615 return 0;
617 if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
618 instruction->opcode == I_DD || instruction->opcode == I_DQ ||
619 instruction->opcode == I_DT || instruction->opcode == I_DO) {
620 extop *e;
621 int32_t isize, osize, wsize = 0; /* placate gcc */
623 isize = 0;
624 switch (instruction->opcode) {
625 case I_DB:
626 wsize = 1;
627 break;
628 case I_DW:
629 wsize = 2;
630 break;
631 case I_DD:
632 wsize = 4;
633 break;
634 case I_DQ:
635 wsize = 8;
636 break;
637 case I_DT:
638 wsize = 10;
639 break;
640 case I_DO:
641 wsize = 16;
642 break;
643 default:
644 break;
647 for (e = instruction->eops; e; e = e->next) {
648 int32_t align;
650 osize = 0;
651 if (e->type == EOT_DB_NUMBER)
652 osize = 1;
653 else if (e->type == EOT_DB_STRING)
654 osize = e->stringlen;
656 align = (-osize) % wsize;
657 if (align < 0)
658 align += wsize;
659 isize += osize + align;
661 return isize * instruction->times;
664 if (instruction->opcode == I_INCBIN) {
665 char fname[FILENAME_MAX];
666 FILE *fp;
667 int32_t len;
668 char *prefix = "", *combine;
669 char **pPrevPath = NULL;
671 len = FILENAME_MAX - 1;
672 if (len > instruction->eops->stringlen)
673 len = instruction->eops->stringlen;
674 strncpy(fname, instruction->eops->stringval, len);
675 fname[len] = '\0';
677 /* added by alexfru: 'incbin' uses include paths */
678 while (1) {
679 combine = nasm_malloc(strlen(prefix) + len + 1);
680 strcpy(combine, prefix);
681 strcat(combine, fname);
683 if ((fp = fopen(combine, "rb")) != NULL) {
684 nasm_free(combine);
685 break;
688 nasm_free(combine);
689 pPrevPath = pp_get_include_path_ptr(pPrevPath);
690 if (pPrevPath == NULL)
691 break;
692 prefix = *pPrevPath;
695 if (fp == NULL)
696 error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
697 fname);
698 else if (fseek(fp, 0L, SEEK_END) < 0)
699 error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
700 fname);
701 else {
702 len = ftell(fp);
703 fclose(fp);
704 if (instruction->eops->next) {
705 len -= instruction->eops->next->offset;
706 if (instruction->eops->next->next &&
707 len > instruction->eops->next->next->offset) {
708 len = instruction->eops->next->next->offset;
711 return instruction->times * len;
713 return 0; /* if we're here, there's an error */
716 /* Check to see if we need an address-size prefix */
717 add_asp(instruction, bits);
719 for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
720 int m = matches(temp, instruction, bits);
721 if (m == 99)
722 m += jmp_match(segment, offset, bits, instruction, temp->code);
724 if (m == 100) {
725 /* we've matched an instruction. */
726 int32_t isize;
727 const char *codes = temp->code;
728 int j;
730 isize = calcsize(segment, offset, bits, instruction, codes);
731 if (isize < 0)
732 return -1;
733 for (j = 0; j < MAXPREFIX; j++) {
734 switch (instruction->prefixes[j]) {
735 case P_A16:
736 if (bits != 16)
737 isize++;
738 break;
739 case P_A32:
740 if (bits != 32)
741 isize++;
742 break;
743 case P_O16:
744 if (bits != 16)
745 isize++;
746 break;
747 case P_O32:
748 if (bits == 16)
749 isize++;
750 break;
751 case P_A64:
752 case P_O64:
753 case P_none:
754 break;
755 default:
756 isize++;
757 break;
760 return isize * instruction->times;
763 return -1; /* didn't match any instruction */
766 /* check that opn[op] is a signed byte of size 16 or 32,
767 and return the signed value*/
768 static int is_sbyte(insn * ins, int op, int size)
770 int32_t v;
771 int ret;
773 ret = !(ins->forw_ref && ins->oprs[op].opflags) && /* dead in the water on forward reference or External */
774 optimizing >= 0 &&
775 !(ins->oprs[op].type & STRICT) &&
776 ins->oprs[op].wrt == NO_SEG && ins->oprs[op].segment == NO_SEG;
778 v = ins->oprs[op].offset;
779 if (size == 16)
780 v = (int16_t)v; /* sign extend if 16 bits */
782 return ret && v >= -128L && v <= 127L;
785 static int32_t calcsize(int32_t segment, int32_t offset, int bits,
786 insn * ins, const char *codes)
788 int32_t length = 0;
789 uint8_t c;
790 int rex_mask = ~0;
791 ins->rex = 0; /* Ensure REX is reset */
792 struct operand *opx;
794 if (ins->prefixes[PPS_OSIZE] == P_O64)
795 ins->rex |= REX_W;
797 (void)segment; /* Don't warn that this parameter is unused */
798 (void)offset; /* Don't warn that this parameter is unused */
800 while (*codes) {
801 c = *codes++;
802 opx = &ins->oprs[c & 3];
803 switch (c) {
804 case 01:
805 case 02:
806 case 03:
807 codes += c, length += c;
808 break;
809 case 04:
810 case 05:
811 case 06:
812 case 07:
813 length++;
814 break;
815 case 010:
816 case 011:
817 case 012:
818 case 013:
819 ins->rex |=
820 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
821 codes++, length++;
822 break;
823 case 014:
824 case 015:
825 case 016:
826 case 017:
827 length++;
828 break;
829 case 020:
830 case 021:
831 case 022:
832 case 023:
833 length++;
834 break;
835 case 024:
836 case 025:
837 case 026:
838 case 027:
839 length++;
840 break;
841 case 030:
842 case 031:
843 case 032:
844 case 033:
845 length += 2;
846 break;
847 case 034:
848 case 035:
849 case 036:
850 case 037:
851 if (opx->type & (BITS16 | BITS32 | BITS64))
852 length += (opx->type & BITS16) ? 2 : 4;
853 else
854 length += (bits == 16) ? 2 : 4;
855 break;
856 case 040:
857 case 041:
858 case 042:
859 case 043:
860 length += 4;
861 break;
862 case 044:
863 case 045:
864 case 046:
865 case 047:
866 length += ins->addr_size >> 3;
867 break;
868 case 050:
869 case 051:
870 case 052:
871 case 053:
872 length++;
873 break;
874 case 054:
875 case 055:
876 case 056:
877 case 057:
878 length += 8; /* MOV reg64/imm */
879 break;
880 case 060:
881 case 061:
882 case 062:
883 case 063:
884 length += 2;
885 break;
886 case 064:
887 case 065:
888 case 066:
889 case 067:
890 if (opx->type & (BITS16 | BITS32 | BITS64))
891 length += (opx->type & BITS16) ? 2 : 4;
892 else
893 length += (bits == 16) ? 2 : 4;
894 break;
895 case 070:
896 case 071:
897 case 072:
898 case 073:
899 length += 4;
900 break;
901 case 074:
902 case 075:
903 case 076:
904 case 077:
905 length += 2;
906 break;
907 case 0140:
908 case 0141:
909 case 0142:
910 case 0143:
911 length += is_sbyte(ins, c & 3, 16) ? 1 : 2;
912 break;
913 case 0144:
914 case 0145:
915 case 0146:
916 case 0147:
917 codes += 2;
918 length++;
919 break;
920 case 0150:
921 case 0151:
922 case 0152:
923 case 0153:
924 length += is_sbyte(ins, c & 3, 32) ? 1 : 4;
925 break;
926 case 0154:
927 case 0155:
928 case 0156:
929 case 0157:
930 codes += 2;
931 length++;
932 break;
933 case 0160:
934 case 0161:
935 case 0162:
936 case 0163:
937 length++;
938 ins->rex |= REX_D;
939 ins->drexdst = regval(&ins->oprs[c & 3]);
940 break;
941 case 0164:
942 case 0165:
943 case 0166:
944 case 0167:
945 length++;
946 ins->rex |= REX_D|REX_OC;
947 ins->drexdst = regval(&ins->oprs[c & 3]);
948 break;
949 case 0170:
950 length++;
951 break;
952 case 0171:
953 break;
954 case 0300:
955 case 0301:
956 case 0302:
957 case 0303:
958 break;
959 case 0310:
960 if (bits == 64)
961 return -1;
962 length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
963 break;
964 case 0311:
965 length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
966 break;
967 case 0312:
968 break;
969 case 0313:
970 if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
971 has_prefix(ins, PPS_ASIZE, P_A32))
972 return -1;
973 break;
974 case 0320:
975 length += (bits != 16);
976 break;
977 case 0321:
978 length += (bits == 16);
979 break;
980 case 0322:
981 break;
982 case 0323:
983 rex_mask &= ~REX_W;
984 break;
985 case 0324:
986 ins->rex |= REX_W;
987 break;
988 case 0330:
989 codes++, length++;
990 break;
991 case 0331:
992 break;
993 case 0332:
994 case 0333:
995 length++;
996 break;
997 case 0334:
998 ins->rex |= REX_L;
999 break;
1000 case 0335:
1001 break;
1002 case 0340:
1003 case 0341:
1004 case 0342:
1005 if (ins->oprs[0].segment != NO_SEG)
1006 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1007 " quantity of BSS space");
1008 else
1009 length += ins->oprs[0].offset << (c & 3);
1010 break;
1011 case 0364:
1012 case 0365:
1013 break;
1014 case 0366:
1015 case 0367:
1016 length++;
1017 break;
1018 case 0370:
1019 case 0371:
1020 case 0372:
1021 break;
1022 case 0373:
1023 length++;
1024 break;
1025 default: /* can't do it by 'case' statements */
1026 if (c >= 0100 && c <= 0277) { /* it's an EA */
1027 ea ea_data;
1028 int rfield;
1029 int32_t rflags;
1030 ea_data.rex = 0; /* Ensure ea.REX is initially 0 */
1032 if (c <= 0177) {
1033 /* pick rfield from operand b */
1034 rflags = regflag(&ins->oprs[c & 7]);
1035 rfield = regvals[ins->oprs[c & 7].basereg];
1036 } else {
1037 rflags = 0;
1038 rfield = c & 7;
1041 if (!process_ea
1042 (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1043 ins->addr_size, rfield, rflags, ins->forw_ref)) {
1044 errfunc(ERR_NONFATAL, "invalid effective address");
1045 return -1;
1046 } else {
1047 ins->rex |= ea_data.rex;
1048 length += ea_data.size;
1050 } else {
1051 errfunc(ERR_PANIC, "internal instruction table corrupt"
1052 ": instruction code 0x%02X given", c);
1057 ins->rex &= rex_mask;
1059 if (ins->rex & REX_D) {
1060 if (ins->rex & REX_H) {
1061 errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1062 return -1;
1064 if (bits != 64 && ((ins->rex & (REX_W|REX_X|REX_B)) ||
1065 ins->drexdst > 7)) {
1066 errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1067 return -1;
1069 length++;
1070 } else if (ins->rex & REX_REAL) {
1071 if (ins->rex & REX_H) {
1072 errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1073 return -1;
1074 } else if (bits == 64) {
1075 length++;
1076 } else if ((ins->rex & REX_L) &&
1077 !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1078 cpu >= IF_X86_64) {
1079 /* LOCK-as-REX.R */
1080 assert_no_prefix(ins, PPS_LREP);
1081 length++;
1082 } else {
1083 errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1084 return -1;
1088 return length;
1091 #define EMIT_REX() \
1092 if (!(ins->rex & REX_D) && (ins->rex & REX_REAL) && (bits == 64)) { \
1093 ins->rex = (ins->rex & REX_REAL)|REX_P; \
1094 out(offset, segment, &ins->rex, OUT_RAWDATA+1, NO_SEG, NO_SEG); \
1095 ins->rex = 0; \
1096 offset += 1; \
1099 static void gencode(int32_t segment, int32_t offset, int bits,
1100 insn * ins, const char *codes, int32_t insn_end)
1102 static char condval[] = { /* conditional opcodes */
1103 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1104 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1105 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1107 uint8_t c;
1108 uint8_t bytes[4];
1109 int32_t size;
1110 int64_t data;
1111 struct operand *opx;
1113 while (*codes) {
1114 c = *codes++;
1115 opx = &ins->oprs[c & 3];
1116 switch (c) {
1117 case 01:
1118 case 02:
1119 case 03:
1120 EMIT_REX();
1121 out(offset, segment, codes, OUT_RAWDATA + c, NO_SEG, NO_SEG);
1122 codes += c;
1123 offset += c;
1124 break;
1126 case 04:
1127 case 06:
1128 switch (ins->oprs[0].basereg) {
1129 case R_CS:
1130 bytes[0] = 0x0E + (c == 0x04 ? 1 : 0);
1131 break;
1132 case R_DS:
1133 bytes[0] = 0x1E + (c == 0x04 ? 1 : 0);
1134 break;
1135 case R_ES:
1136 bytes[0] = 0x06 + (c == 0x04 ? 1 : 0);
1137 break;
1138 case R_SS:
1139 bytes[0] = 0x16 + (c == 0x04 ? 1 : 0);
1140 break;
1141 default:
1142 errfunc(ERR_PANIC,
1143 "bizarre 8086 segment register received");
1145 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1146 offset++;
1147 break;
1149 case 05:
1150 case 07:
1151 switch (ins->oprs[0].basereg) {
1152 case R_FS:
1153 bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0);
1154 break;
1155 case R_GS:
1156 bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0);
1157 break;
1158 default:
1159 errfunc(ERR_PANIC,
1160 "bizarre 386 segment register received");
1162 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1163 offset++;
1164 break;
1166 case 010:
1167 case 011:
1168 case 012:
1169 case 013:
1170 EMIT_REX();
1171 bytes[0] = *codes++ + ((regval(opx)) & 7);
1172 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1173 offset += 1;
1174 break;
1176 case 014:
1177 case 015:
1178 case 016:
1179 case 017:
1180 if (opx->offset < -128 || opx->offset > 127) {
1181 errfunc(ERR_WARNING, "signed byte value exceeds bounds");
1184 if (opx->segment != NO_SEG) {
1185 data = opx->offset;
1186 out(offset, segment, &data, OUT_ADDRESS + 1,
1187 opx->segment, opx->wrt);
1188 } else {
1189 bytes[0] = opx->offset;
1190 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1191 NO_SEG);
1193 offset += 1;
1194 break;
1196 case 020:
1197 case 021:
1198 case 022:
1199 case 023:
1200 if (opx->offset < -256 || opx->offset > 255) {
1201 errfunc(ERR_WARNING, "byte value exceeds bounds");
1203 if (opx->segment != NO_SEG) {
1204 data = opx->offset;
1205 out(offset, segment, &data, OUT_ADDRESS + 1,
1206 opx->segment, opx->wrt);
1207 } else {
1208 bytes[0] = opx->offset;
1209 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1210 NO_SEG);
1212 offset += 1;
1213 break;
1215 case 024:
1216 case 025:
1217 case 026:
1218 case 027:
1219 if (opx->offset < 0 || opx->offset > 255)
1220 errfunc(ERR_WARNING, "unsigned byte value exceeds bounds");
1221 if (opx->segment != NO_SEG) {
1222 data = opx->offset;
1223 out(offset, segment, &data, OUT_ADDRESS + 1,
1224 opx->segment, opx->wrt);
1225 } else {
1226 bytes[0] = opx->offset;
1227 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1228 NO_SEG);
1230 offset += 1;
1231 break;
1233 case 030:
1234 case 031:
1235 case 032:
1236 case 033:
1237 data = opx->offset;
1238 if (opx->segment == NO_SEG && opx->wrt == NO_SEG)
1239 warn_overflow(2, data);
1240 out(offset, segment, &data, OUT_ADDRESS + 2,
1241 opx->segment, opx->wrt);
1242 offset += 2;
1243 break;
1245 case 034:
1246 case 035:
1247 case 036:
1248 case 037:
1249 if (opx->type & (BITS16 | BITS32))
1250 size = (opx->type & BITS16) ? 2 : 4;
1251 else
1252 size = (bits == 16) ? 2 : 4;
1253 data = opx->offset;
1254 if (opx->segment == NO_SEG && opx->wrt == NO_SEG)
1255 warn_overflow(size, data);
1256 out(offset, segment, &data, OUT_ADDRESS + size,
1257 opx->segment, opx->wrt);
1258 offset += size;
1259 break;
1261 case 040:
1262 case 041:
1263 case 042:
1264 case 043:
1265 data = opx->offset;
1266 out(offset, segment, &data, OUT_ADDRESS + 4,
1267 opx->segment, opx->wrt);
1268 offset += 4;
1269 break;
1271 case 044:
1272 case 045:
1273 case 046:
1274 case 047:
1275 data = opx->offset;
1276 size = ins->addr_size >> 3;
1277 if (opx->segment == NO_SEG &&
1278 opx->wrt == NO_SEG)
1279 warn_overflow(size, data);
1280 out(offset, segment, &data, OUT_ADDRESS + size,
1281 opx->segment, opx->wrt);
1282 offset += size;
1283 break;
1285 case 050:
1286 case 051:
1287 case 052:
1288 case 053:
1289 if (opx->segment != segment)
1290 errfunc(ERR_NONFATAL,
1291 "short relative jump outside segment");
1292 data = opx->offset - insn_end;
1293 if (data > 127 || data < -128)
1294 errfunc(ERR_NONFATAL, "short jump is out of range");
1295 bytes[0] = data;
1296 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1297 offset += 1;
1298 break;
1300 case 054:
1301 case 055:
1302 case 056:
1303 case 057:
1304 data = (int64_t)opx->offset;
1305 out(offset, segment, &data, OUT_ADDRESS + 8,
1306 opx->segment, opx->wrt);
1307 offset += 8;
1308 break;
1310 case 060:
1311 case 061:
1312 case 062:
1313 case 063:
1314 if (opx->segment != segment) {
1315 data = opx->offset;
1316 out(offset, segment, &data,
1317 OUT_REL2ADR + insn_end - offset,
1318 opx->segment, opx->wrt);
1319 } else {
1320 data = opx->offset - insn_end;
1321 out(offset, segment, &data,
1322 OUT_ADDRESS + 2, NO_SEG, NO_SEG);
1324 offset += 2;
1325 break;
1327 case 064:
1328 case 065:
1329 case 066:
1330 case 067:
1331 if (opx->type & (BITS16 | BITS32 | BITS64))
1332 size = (opx->type & BITS16) ? 2 : 4;
1333 else
1334 size = (bits == 16) ? 2 : 4;
1335 if (opx->segment != segment) {
1336 int32_t reltype = (size == 2 ? OUT_REL2ADR : OUT_REL4ADR);
1337 data = opx->offset;
1338 out(offset, segment, &data, reltype + insn_end - offset,
1339 opx->segment, opx->wrt);
1340 } else {
1341 data = opx->offset - insn_end;
1342 out(offset, segment, &data,
1343 OUT_ADDRESS + size, NO_SEG, NO_SEG);
1345 offset += size;
1346 break;
1348 case 070:
1349 case 071:
1350 case 072:
1351 case 073:
1352 if (opx->segment != segment) {
1353 data = opx->offset;
1354 out(offset, segment, &data,
1355 OUT_REL4ADR + insn_end - offset,
1356 opx->segment, opx->wrt);
1357 } else {
1358 data = opx->offset - insn_end;
1359 out(offset, segment, &data,
1360 OUT_ADDRESS + 4, NO_SEG, NO_SEG);
1362 offset += 4;
1363 break;
1365 case 074:
1366 case 075:
1367 case 076:
1368 case 077:
1369 if (opx->segment == NO_SEG)
1370 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1371 " relocatable");
1372 data = 0L;
1373 out(offset, segment, &data, OUT_ADDRESS + 2,
1374 outfmt->segbase(1 + opx->segment),
1375 opx->wrt);
1376 offset += 2;
1377 break;
1379 case 0140:
1380 case 0141:
1381 case 0142:
1382 case 0143:
1383 data = opx->offset;
1384 if (is_sbyte(ins, c & 3, 16)) {
1385 bytes[0] = data;
1386 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1387 NO_SEG);
1388 offset++;
1389 } else {
1390 if (opx->segment == NO_SEG &&
1391 opx->wrt == NO_SEG)
1392 warn_overflow(2, data);
1393 out(offset, segment, &data, OUT_ADDRESS + 2,
1394 opx->segment, opx->wrt);
1395 offset += 2;
1397 break;
1399 case 0144:
1400 case 0145:
1401 case 0146:
1402 case 0147:
1403 EMIT_REX();
1404 codes++;
1405 bytes[0] = *codes++;
1406 if (is_sbyte(ins, c & 3, 16))
1407 bytes[0] |= 2; /* s-bit */
1408 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1409 offset++;
1410 break;
1412 case 0150:
1413 case 0151:
1414 case 0152:
1415 case 0153:
1416 data = opx->offset;
1417 if (is_sbyte(ins, c & 3, 32)) {
1418 bytes[0] = data;
1419 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1420 NO_SEG);
1421 offset++;
1422 } else {
1423 out(offset, segment, &data, OUT_ADDRESS + 4,
1424 opx->segment, opx->wrt);
1425 offset += 4;
1427 break;
1429 case 0154:
1430 case 0155:
1431 case 0156:
1432 case 0157:
1433 EMIT_REX();
1434 codes++;
1435 bytes[0] = *codes++;
1436 if (is_sbyte(ins, c & 3, 32))
1437 bytes[0] |= 2; /* s-bit */
1438 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1439 offset++;
1440 break;
1442 case 0160:
1443 case 0161:
1444 case 0162:
1445 case 0163:
1446 case 0164:
1447 case 0165:
1448 case 0166:
1449 case 0167:
1450 break;
1452 case 0170:
1453 EMIT_REX();
1454 bytes[0] = 0;
1455 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1456 offset += 1;
1457 break;
1459 case 0171:
1460 bytes[0] =
1461 (ins->drexdst << 4) |
1462 (ins->rex & REX_OC ? 0x08 : 0) |
1463 (ins->rex & (REX_R|REX_X|REX_B));
1464 ins->rex = 0;
1465 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1466 offset++;
1467 break;
1469 case 0300:
1470 case 0301:
1471 case 0302:
1472 case 0303:
1473 break;
1475 case 0310:
1476 if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1477 *bytes = 0x67;
1478 out(offset, segment, bytes,
1479 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1480 offset += 1;
1481 } else
1482 offset += 0;
1483 break;
1485 case 0311:
1486 if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1487 *bytes = 0x67;
1488 out(offset, segment, bytes,
1489 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1490 offset += 1;
1491 } else
1492 offset += 0;
1493 break;
1495 case 0312:
1496 break;
1498 case 0313:
1499 ins->rex = 0;
1500 break;
1502 case 0320:
1503 if (bits != 16) {
1504 *bytes = 0x66;
1505 out(offset, segment, bytes,
1506 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1507 offset += 1;
1508 } else
1509 offset += 0;
1510 break;
1512 case 0321:
1513 if (bits == 16) {
1514 *bytes = 0x66;
1515 out(offset, segment, bytes,
1516 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1517 offset += 1;
1518 } else
1519 offset += 0;
1520 break;
1522 case 0322:
1523 case 0323:
1524 break;
1526 case 0324:
1527 ins->rex |= REX_W;
1528 break;
1530 case 0330:
1531 *bytes = *codes++ ^ condval[ins->condition];
1532 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1533 offset += 1;
1534 break;
1536 case 0331:
1537 break;
1539 case 0332:
1540 case 0333:
1541 *bytes = c - 0332 + 0xF2;
1542 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1543 offset += 1;
1544 break;
1546 case 0334:
1547 if (ins->rex & REX_R) {
1548 *bytes = 0xF0;
1549 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1550 offset += 1;
1552 ins->rex &= ~(REX_L|REX_R);
1553 break;
1555 case 0335:
1556 break;
1558 case 0340:
1559 case 0341:
1560 case 0342:
1561 if (ins->oprs[0].segment != NO_SEG)
1562 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1563 else {
1564 int32_t size = ins->oprs[0].offset << (c & 3);
1565 if (size > 0)
1566 out(offset, segment, NULL,
1567 OUT_RESERVE + size, NO_SEG, NO_SEG);
1568 offset += size;
1570 break;
1572 case 0364:
1573 case 0365:
1574 break;
1576 case 0366:
1577 case 0367:
1578 *bytes = c - 0366 + 0x66;
1579 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1580 offset += 1;
1581 break;
1583 case 0370:
1584 case 0371:
1585 case 0372:
1586 break;
1588 case 0373:
1589 *bytes = bits == 16 ? 3 : 5;
1590 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1591 offset += 1;
1592 break;
1594 default: /* can't do it by 'case' statements */
1595 if (c >= 0100 && c <= 0277) { /* it's an EA */
1596 ea ea_data;
1597 int rfield;
1598 int32_t rflags;
1599 uint8_t *p;
1600 int32_t s;
1602 if (c <= 0177) {
1603 /* pick rfield from operand b */
1604 rflags = regflag(&ins->oprs[c & 7]);
1605 rfield = regvals[ins->oprs[c & 7].basereg];
1606 } else {
1607 /* rfield is constant */
1608 rflags = 0;
1609 rfield = c & 7;
1612 if (!process_ea
1613 (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1614 ins->addr_size, rfield, rflags, ins->forw_ref)) {
1615 errfunc(ERR_NONFATAL, "invalid effective address");
1618 p = bytes;
1619 *p++ = ea_data.modrm;
1620 if (ea_data.sib_present)
1621 *p++ = ea_data.sib;
1623 /* DREX suffixes come between the SIB and the displacement */
1624 if (ins->rex & REX_D) {
1625 *p++ =
1626 (ins->drexdst << 4) |
1627 (ins->rex & REX_OC ? 0x08 : 0) |
1628 (ins->rex & (REX_R|REX_X|REX_B));
1629 ins->rex = 0;
1632 s = p - bytes;
1633 out(offset, segment, bytes, OUT_RAWDATA + s,
1634 NO_SEG, NO_SEG);
1636 switch (ea_data.bytes) {
1637 case 0:
1638 break;
1639 case 1:
1640 if (ins->oprs[(c >> 3) & 7].segment != NO_SEG) {
1641 data = ins->oprs[(c >> 3) & 7].offset;
1642 out(offset, segment, &data, OUT_ADDRESS + 1,
1643 ins->oprs[(c >> 3) & 7].segment,
1644 ins->oprs[(c >> 3) & 7].wrt);
1645 } else {
1646 *bytes = ins->oprs[(c >> 3) & 7].offset;
1647 out(offset, segment, bytes, OUT_RAWDATA + 1,
1648 NO_SEG, NO_SEG);
1650 s++;
1651 break;
1652 case 8:
1653 case 2:
1654 case 4:
1655 data = ins->oprs[(c >> 3) & 7].offset;
1656 out(offset, segment, &data,
1657 (ea_data.rip ? OUT_REL4ADR : OUT_ADDRESS)
1658 + ea_data.bytes,
1659 ins->oprs[(c >> 3) & 7].segment,
1660 ins->oprs[(c >> 3) & 7].wrt);
1661 s += ea_data.bytes;
1662 break;
1664 offset += s;
1665 } else {
1666 errfunc(ERR_PANIC, "internal instruction table corrupt"
1667 ": instruction code 0x%02X given", c);
1673 static int32_t regflag(const operand * o)
1675 if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1676 errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1678 return reg_flags[o->basereg];
1681 static int32_t regval(const operand * o)
1683 if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1684 errfunc(ERR_PANIC, "invalid operand passed to regval()");
1686 return regvals[o->basereg];
1689 static int op_rexflags(const operand * o, int mask)
1691 int32_t flags;
1692 int val;
1694 if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1695 errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1698 flags = reg_flags[o->basereg];
1699 val = regvals[o->basereg];
1701 return rexflags(val, flags, mask);
1704 static int rexflags(int val, int32_t flags, int mask)
1706 int rex = 0;
1708 if (val >= 8)
1709 rex |= REX_B|REX_X|REX_R;
1710 if (flags & BITS64)
1711 rex |= REX_W;
1712 if (!(REG_HIGH & ~flags)) /* AH, CH, DH, BH */
1713 rex |= REX_H;
1714 else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1715 rex |= REX_P;
1717 return rex & mask;
1720 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1722 int i, size[MAX_OPERANDS], asize, oprs, ret;
1724 ret = 100;
1727 * Check the opcode
1729 if (itemp->opcode != instruction->opcode)
1730 return 0;
1733 * Count the operands
1735 if (itemp->operands != instruction->operands)
1736 return 0;
1739 * Check that no spurious colons or TOs are present
1741 for (i = 0; i < itemp->operands; i++)
1742 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1743 return 0;
1746 * Check that the operand flags all match up
1748 for (i = 0; i < itemp->operands; i++) {
1749 if (itemp->opd[i] & SAME_AS) {
1750 int j = itemp->opd[i] & ~SAME_AS;
1751 if (instruction->oprs[i].type != instruction->oprs[j].type ||
1752 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
1753 return 0;
1754 } else if (itemp->opd[i] & ~instruction->oprs[i].type ||
1755 ((itemp->opd[i] & SIZE_MASK) &&
1756 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
1757 if ((itemp->opd[i] & ~instruction->oprs[i].type & ~SIZE_MASK) ||
1758 (instruction->oprs[i].type & SIZE_MASK))
1759 return 0;
1760 else
1761 return 1;
1766 * Check operand sizes
1768 if (itemp->flags & IF_ARMASK) {
1769 memset(size, 0, sizeof size);
1771 switch (itemp->flags & IF_ARMASK) {
1772 case IF_AR0:
1773 i = 0;
1774 break;
1775 case IF_AR1:
1776 i = 1;
1777 break;
1778 case IF_AR2:
1779 i = 2;
1780 break;
1781 case IF_AR3:
1782 i = 3;
1783 break;
1784 default:
1785 break; /* Shouldn't happen */
1787 switch (itemp->flags & IF_SMASK) {
1788 case IF_SB:
1789 size[i] = BITS8;
1790 break;
1791 case IF_SW:
1792 size[i] = BITS16;
1793 break;
1794 case IF_SD:
1795 size[i] = BITS32;
1796 break;
1797 case IF_SQ:
1798 size[i] = BITS64;
1799 break;
1800 case IF_SO:
1801 size[i] = BITS128;
1802 break;
1803 default:
1804 break;
1806 } else {
1807 asize = 0;
1808 switch (itemp->flags & IF_SMASK) {
1809 case IF_SB:
1810 asize = BITS8;
1811 oprs = itemp->operands;
1812 break;
1813 case IF_SW:
1814 asize = BITS16;
1815 oprs = itemp->operands;
1816 break;
1817 case IF_SD:
1818 asize = BITS32;
1819 oprs = itemp->operands;
1820 break;
1821 case IF_SQ:
1822 asize = BITS64;
1823 oprs = itemp->operands;
1824 break;
1825 case IF_SO:
1826 asize = BITS128;
1827 oprs = itemp->operands;
1828 break;
1829 default:
1830 break;
1832 for (i = 0; i < MAX_OPERANDS; i++)
1833 size[i] = asize;
1836 if (itemp->flags & (IF_SM | IF_SM2)) {
1837 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
1838 asize = 0;
1839 for (i = 0; i < oprs; i++) {
1840 if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
1841 int j;
1842 for (j = 0; j < oprs; j++)
1843 size[j] = asize;
1844 break;
1847 } else {
1848 oprs = itemp->operands;
1851 for (i = 0; i < itemp->operands; i++) {
1852 if (!(itemp->opd[i] & SIZE_MASK) &&
1853 (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
1854 return 2;
1858 * Check template is okay at the set cpu level
1860 if (((itemp->flags & IF_PLEVEL) > cpu))
1861 return 3;
1864 * Check if instruction is available in long mode
1866 if ((itemp->flags & IF_NOLONG) && (bits == 64))
1867 return 4;
1870 * Check if special handling needed for Jumps
1872 if ((uint8_t)(itemp->code[0]) >= 0370)
1873 return 99;
1875 return ret;
1878 static ea *process_ea(operand * input, ea * output, int bits,
1879 int addrbits, int rfield, int32_t rflags, int forw_ref)
1881 output->rip = false;
1883 /* REX flags for the rfield operand */
1884 output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
1886 if (!(REGISTER & ~input->type)) { /* register direct */
1887 int i;
1888 int32_t f;
1890 if (input->basereg < EXPR_REG_START /* Verify as Register */
1891 || input->basereg >= REG_ENUM_LIMIT)
1892 return NULL;
1893 f = regflag(input);
1894 i = regvals[input->basereg];
1896 if (REG_EA & ~f)
1897 return NULL; /* Invalid EA register */
1899 output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
1901 output->sib_present = false; /* no SIB necessary */
1902 output->bytes = 0; /* no offset necessary either */
1903 output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
1904 } else { /* it's a memory reference */
1905 if (input->basereg == -1
1906 && (input->indexreg == -1 || input->scale == 0)) {
1907 /* it's a pure offset */
1908 if (bits == 64 && (~input->type & IP_REL)) {
1909 int scale, index, base;
1910 output->sib_present = true;
1911 scale = 0;
1912 index = 4;
1913 base = 5;
1914 output->sib = (scale << 6) | (index << 3) | base;
1915 output->bytes = 4;
1916 output->modrm = 4 | ((rfield & 7) << 3);
1917 output->rip = false;
1918 } else {
1919 output->sib_present = false;
1920 output->bytes = (addrbits != 16 ? 4 : 2);
1921 output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
1922 output->rip = bits == 64;
1924 } else { /* it's an indirection */
1925 int i = input->indexreg, b = input->basereg, s = input->scale;
1926 int32_t o = input->offset, seg = input->segment;
1927 int hb = input->hintbase, ht = input->hinttype;
1928 int t;
1929 int it, bt;
1930 int32_t ix, bx; /* register flags */
1932 if (s == 0)
1933 i = -1; /* make this easy, at least */
1935 if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
1936 it = regvals[i];
1937 ix = reg_flags[i];
1938 } else {
1939 it = -1;
1940 ix = 0;
1943 if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
1944 bt = regvals[b];
1945 bx = reg_flags[b];
1946 } else {
1947 bt = -1;
1948 bx = 0;
1951 /* check for a 32/64-bit memory reference... */
1952 if ((ix|bx) & (BITS32|BITS64)) {
1953 /* it must be a 32/64-bit memory reference. Firstly we have
1954 * to check that all registers involved are type E/Rxx. */
1955 int32_t sok = BITS32|BITS64;
1957 if (it != -1) {
1958 if (!(REG64 & ~ix) || !(REG32 & ~ix))
1959 sok &= ix;
1960 else
1961 return NULL;
1964 if (bt != -1) {
1965 if (REG_GPR & ~bx)
1966 return NULL; /* Invalid register */
1967 if (~sok & bx & SIZE_MASK)
1968 return NULL; /* Invalid size */
1969 sok &= bx;
1972 /* While we're here, ensure the user didn't specify
1973 WORD or QWORD. */
1974 if (input->disp_size == 16 || input->disp_size == 64)
1975 return NULL;
1977 if (addrbits == 16 ||
1978 (addrbits == 32 && !(sok & BITS32)) ||
1979 (addrbits == 64 && !(sok & BITS64)))
1980 return NULL;
1982 /* now reorganize base/index */
1983 if (s == 1 && bt != it && bt != -1 && it != -1 &&
1984 ((hb == b && ht == EAH_NOTBASE)
1985 || (hb == i && ht == EAH_MAKEBASE))) {
1986 /* swap if hints say so */
1987 t = bt, bt = it, it = t;
1988 t = bx, bx = ix, ix = t;
1990 if (bt == it) /* convert EAX+2*EAX to 3*EAX */
1991 bt = -1, bx = 0, s++;
1992 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
1993 /* make single reg base, unless hint */
1994 bt = it, bx = ix, it = -1, ix = 0;
1996 if (((s == 2 && it != REG_NUM_ESP
1997 && !(input->eaflags & EAF_TIMESTWO)) || s == 3
1998 || s == 5 || s == 9) && bt == -1)
1999 bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2000 if (it == -1 && (bt & 7) != REG_NUM_ESP
2001 && (input->eaflags & EAF_TIMESTWO))
2002 it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2003 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2004 if (s == 1 && it == REG_NUM_ESP) {
2005 /* swap ESP into base if scale is 1 */
2006 t = it, it = bt, bt = t;
2007 t = ix, ix = bx, bx = t;
2009 if (it == REG_NUM_ESP
2010 || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2011 return NULL; /* wrong, for various reasons */
2013 output->rex |= rexflags(it, ix, REX_X);
2014 output->rex |= rexflags(bt, bx, REX_B);
2016 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2017 /* no SIB needed */
2018 int mod, rm;
2020 if (bt == -1) {
2021 rm = 5;
2022 mod = 0;
2023 } else {
2024 rm = (bt & 7);
2025 if (rm != REG_NUM_EBP && o == 0 &&
2026 seg == NO_SEG && !forw_ref &&
2027 !(input->eaflags &
2028 (EAF_BYTEOFFS | EAF_WORDOFFS)))
2029 mod = 0;
2030 else if (input->eaflags & EAF_BYTEOFFS ||
2031 (o >= -128 && o <= 127 && seg == NO_SEG
2032 && !forw_ref
2033 && !(input->eaflags & EAF_WORDOFFS)))
2034 mod = 1;
2035 else
2036 mod = 2;
2039 output->sib_present = false;
2040 output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2041 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2042 } else {
2043 /* we need a SIB */
2044 int mod, scale, index, base;
2046 if (it == -1)
2047 index = 4, s = 1;
2048 else
2049 index = (it & 7);
2051 switch (s) {
2052 case 1:
2053 scale = 0;
2054 break;
2055 case 2:
2056 scale = 1;
2057 break;
2058 case 4:
2059 scale = 2;
2060 break;
2061 case 8:
2062 scale = 3;
2063 break;
2064 default: /* then what the smeg is it? */
2065 return NULL; /* panic */
2068 if (bt == -1) {
2069 base = 5;
2070 mod = 0;
2071 } else {
2072 base = (bt & 7);
2073 if (base != REG_NUM_EBP && o == 0 &&
2074 seg == NO_SEG && !forw_ref &&
2075 !(input->eaflags &
2076 (EAF_BYTEOFFS | EAF_WORDOFFS)))
2077 mod = 0;
2078 else if (input->eaflags & EAF_BYTEOFFS ||
2079 (o >= -128 && o <= 127 && seg == NO_SEG
2080 && !forw_ref
2081 && !(input->eaflags & EAF_WORDOFFS)))
2082 mod = 1;
2083 else
2084 mod = 2;
2087 output->sib_present = true;
2088 output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2089 output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2090 output->sib = (scale << 6) | (index << 3) | base;
2092 } else { /* it's 16-bit */
2093 int mod, rm;
2095 /* check for 64-bit long mode */
2096 if (addrbits == 64)
2097 return NULL;
2099 /* check all registers are BX, BP, SI or DI */
2100 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2101 && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2102 && i != R_SI && i != R_DI))
2103 return NULL;
2105 /* ensure the user didn't specify DWORD/QWORD */
2106 if (input->disp_size == 32 || input->disp_size == 64)
2107 return NULL;
2109 if (s != 1 && i != -1)
2110 return NULL; /* no can do, in 16-bit EA */
2111 if (b == -1 && i != -1) {
2112 int tmp = b;
2113 b = i;
2114 i = tmp;
2115 } /* swap */
2116 if ((b == R_SI || b == R_DI) && i != -1) {
2117 int tmp = b;
2118 b = i;
2119 i = tmp;
2121 /* have BX/BP as base, SI/DI index */
2122 if (b == i)
2123 return NULL; /* shouldn't ever happen, in theory */
2124 if (i != -1 && b != -1 &&
2125 (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2126 return NULL; /* invalid combinations */
2127 if (b == -1) /* pure offset: handled above */
2128 return NULL; /* so if it gets to here, panic! */
2130 rm = -1;
2131 if (i != -1)
2132 switch (i * 256 + b) {
2133 case R_SI * 256 + R_BX:
2134 rm = 0;
2135 break;
2136 case R_DI * 256 + R_BX:
2137 rm = 1;
2138 break;
2139 case R_SI * 256 + R_BP:
2140 rm = 2;
2141 break;
2142 case R_DI * 256 + R_BP:
2143 rm = 3;
2144 break;
2145 } else
2146 switch (b) {
2147 case R_SI:
2148 rm = 4;
2149 break;
2150 case R_DI:
2151 rm = 5;
2152 break;
2153 case R_BP:
2154 rm = 6;
2155 break;
2156 case R_BX:
2157 rm = 7;
2158 break;
2160 if (rm == -1) /* can't happen, in theory */
2161 return NULL; /* so panic if it does */
2163 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2164 !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2165 mod = 0;
2166 else if (input->eaflags & EAF_BYTEOFFS ||
2167 (o >= -128 && o <= 127 && seg == NO_SEG
2168 && !forw_ref
2169 && !(input->eaflags & EAF_WORDOFFS)))
2170 mod = 1;
2171 else
2172 mod = 2;
2174 output->sib_present = false; /* no SIB - it's 16-bit */
2175 output->bytes = mod; /* bytes of offset needed */
2176 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2181 output->size = 1 + output->sib_present + output->bytes;
2182 return output;
2185 static void add_asp(insn *ins, int addrbits)
2187 int j, valid;
2188 int defdisp;
2190 valid = (addrbits == 64) ? 64|32 : 32|16;
2192 switch (ins->prefixes[PPS_ASIZE]) {
2193 case P_A16:
2194 valid &= 16;
2195 break;
2196 case P_A32:
2197 valid &= 32;
2198 break;
2199 case P_A64:
2200 valid &= 64;
2201 break;
2202 case P_ASP:
2203 valid &= (addrbits == 32) ? 16 : 32;
2204 break;
2205 default:
2206 break;
2209 for (j = 0; j < ins->operands; j++) {
2210 if (!(MEMORY & ~ins->oprs[j].type)) {
2211 int32_t i, b;
2213 /* Verify as Register */
2214 if (ins->oprs[j].indexreg < EXPR_REG_START
2215 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2216 i = 0;
2217 else
2218 i = reg_flags[ins->oprs[j].indexreg];
2220 /* Verify as Register */
2221 if (ins->oprs[j].basereg < EXPR_REG_START
2222 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2223 b = 0;
2224 else
2225 b = reg_flags[ins->oprs[j].basereg];
2227 if (ins->oprs[j].scale == 0)
2228 i = 0;
2230 if (!i && !b) {
2231 int ds = ins->oprs[j].disp_size;
2232 if ((addrbits != 64 && ds > 8) ||
2233 (addrbits == 64 && ds == 16))
2234 valid &= ds;
2235 } else {
2236 if (!(REG16 & ~b))
2237 valid &= 16;
2238 if (!(REG32 & ~b))
2239 valid &= 32;
2240 if (!(REG64 & ~b))
2241 valid &= 64;
2243 if (!(REG16 & ~i))
2244 valid &= 16;
2245 if (!(REG32 & ~i))
2246 valid &= 32;
2247 if (!(REG64 & ~i))
2248 valid &= 64;
2253 if (valid & addrbits) {
2254 ins->addr_size = addrbits;
2255 } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2256 /* Add an address size prefix */
2257 enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2258 ins->prefixes[PPS_ASIZE] = pref;
2259 ins->addr_size = (addrbits == 32) ? 16 : 32;
2260 } else {
2261 /* Impossible... */
2262 errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2263 ins->addr_size = addrbits; /* Error recovery */
2266 defdisp = ins->addr_size == 16 ? 16 : 32;
2268 for (j = 0; j < ins->operands; j++) {
2269 if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2270 (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2271 != ins->addr_size) {
2272 /* mem_offs sizes must match the address size; if not,
2273 strip the MEM_OFFS bit and match only EA instructions */
2274 ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);