NASM 0.98p3.5
[nasm.git] / assemble.c
blobae1de206fe77fad5569fd25a87bb94c173626817
1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
14 * on operand 0
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the address-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
29 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
30 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
31 * assembly mode or the address-size override on the operand
32 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
33 * \1ab - a ModRM, calculated on EA in operand a, with the spare
34 * field the register value of operand b.
35 * \2ab - a ModRM, calculated on EA in operand a, with the spare
36 * field equal to digit b.
37 * \30x - might be an 0x67 byte, depending on the address size of
38 * the memory reference in operand x.
39 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
40 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
41 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
42 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
43 * \322 - indicates that this instruction is only valid when the
44 * operand size is the default (instruction to disassembler,
45 * generates no code in the assembler)
46 * \330 - a literal byte follows in the code stream, to be added
47 * to the condition code value of the instruction.
48 * \340 - reserve <operand 0> bytes of uninitialised storage.
49 * Operand 0 had better be a segmentless constant.
52 #include <stdio.h>
53 #include <string.h>
55 #include "nasm.h"
56 #include "nasmlib.h"
57 #include "assemble.h"
58 #include "insns.h"
60 extern struct itemplate *nasm_instructions[];
62 typedef struct {
63 int sib_present; /* is a SIB byte necessary? */
64 int bytes; /* # of bytes of offset needed */
65 int size; /* lazy - this is sib+bytes+1 */
66 unsigned char modrm, sib; /* the bytes themselves */
67 } ea;
69 static efunc errfunc;
70 static struct ofmt *outfmt;
71 static ListGen *list;
73 static long calcsize (long, long, int, insn *, char *);
74 static void gencode (long, long, int, insn *, char *, long);
75 static int regval (operand *o);
76 static int matches (struct itemplate *, insn *);
77 static ea * process_ea (operand *, ea *, int, int, int);
78 static int chsize (operand *, int);
81 * This routine wrappers the real output format's output routine,
82 * in order to pass a copy of the data off to the listing file
83 * generator at the same time.
85 static void out (long offset, long segto, void *data, unsigned long type,
86 long segment, long wrt)
88 static long lineno;
89 static char *lnfname;
91 if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
92 if (segment != NO_SEG || wrt != NO_SEG) {
94 * This address is relocated. We must write it as
95 * OUT_ADDRESS, so there's no work to be done here.
97 list->output (offset, data, type);
99 else {
100 unsigned char p[4], *q = p;
102 * This is a non-relocated address, and we're going to
103 * convert it into RAWDATA format.
105 if ((type & OUT_SIZMASK) == 4) {
106 WRITELONG (q, * (long *) data);
107 list->output (offset, p, OUT_RAWDATA+4);
109 else {
110 WRITESHORT (q, * (long *) data);
111 list->output (offset, p, OUT_RAWDATA+2);
115 else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
116 list->output (offset, data, type);
118 else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
119 list->output (offset, NULL, type);
121 else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
122 (type & OUT_TYPMASK) == OUT_REL4ADR) {
123 list->output (offset, data, type);
126 if (src_get(&lineno,&lnfname))
127 outfmt->current_dfmt->linenum(lnfname,lineno,segto);
129 outfmt->output (segto, data, type, segment, wrt);
132 long assemble (long segment, long offset, int bits,
133 insn *instruction, struct ofmt *output, efunc error,
134 ListGen *listgen)
136 struct itemplate *temp;
137 int j;
138 int size_prob;
139 long insn_end;
140 long itimes;
141 long start = offset;
142 long wsize = 0; /* size for DB etc. */
144 errfunc = error; /* to pass to other functions */
145 outfmt = output; /* likewise */
146 list = listgen; /* and again */
148 switch (instruction->opcode)
150 case -1: return 0;
151 case I_DB: wsize = 1; break;
152 case I_DW: wsize = 2; break;
153 case I_DD: wsize = 4; break;
154 case I_DQ: wsize = 8; break;
155 case I_DT: wsize = 10; break;
158 if (wsize) {
159 extop * e;
160 long t = instruction->times;
161 if (t < 0)
162 errfunc(ERR_PANIC, "instruction->times < 0 (%ld) in assemble()",t);
164 while (t--) /* repeat TIMES times */
166 for (e = instruction->eops; e; e = e->next)
168 if (e->type == EOT_DB_NUMBER)
170 if (wsize == 1) {
171 if (e->segment != NO_SEG)
172 errfunc (ERR_NONFATAL,
173 "one-byte relocation attempted");
174 else {
175 out (offset, segment, &e->offset, OUT_RAWDATA+1,
176 NO_SEG, NO_SEG);
179 else if (wsize > 5) {
180 errfunc (ERR_NONFATAL, "integer supplied to a D%c"
181 " instruction", wsize==8 ? 'Q' : 'T');
183 else
184 out (offset, segment, &e->offset,
185 OUT_ADDRESS+wsize, e->segment,
186 e->wrt);
187 offset += wsize;
189 else if (e->type == EOT_DB_STRING)
191 int align;
193 out (offset, segment, e->stringval,
194 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
195 align = e->stringlen % wsize;
197 if (align) {
198 align = wsize - align;
199 out (offset, segment, "\0\0\0\0\0\0\0\0",
200 OUT_RAWDATA+align, NO_SEG, NO_SEG);
202 offset += e->stringlen + align;
205 if (t > 0 && t == instruction->times-1)
208 * Dummy call to list->output to give the offset to the
209 * listing module.
211 list->output (offset, NULL, OUT_RAWDATA);
212 list->uplevel (LIST_TIMES);
215 if (instruction->times > 1)
216 list->downlevel (LIST_TIMES);
217 return offset - start;
220 if (instruction->opcode == I_INCBIN)
222 static char fname[FILENAME_MAX];
223 FILE * fp;
224 long len;
226 len = FILENAME_MAX-1;
227 if (len > instruction->eops->stringlen)
228 len = instruction->eops->stringlen;
229 strncpy (fname, instruction->eops->stringval, len);
230 fname[len] = '\0';
232 if ( (fp = fopen(fname, "rb")) == NULL)
233 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
234 else if (fseek(fp, 0L, SEEK_END) < 0)
235 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
236 fname);
237 else
239 static char buf[2048];
240 long t = instruction->times;
241 long base = 0;
243 len = ftell (fp);
244 if (instruction->eops->next) {
245 base = instruction->eops->next->offset;
246 len -= base;
247 if (instruction->eops->next->next &&
248 len > instruction->eops->next->next->offset)
249 len = instruction->eops->next->next->offset;
252 * Dummy call to list->output to give the offset to the
253 * listing module.
255 list->output (offset, NULL, OUT_RAWDATA);
256 list->uplevel(LIST_INCBIN);
257 while (t--)
259 long l;
261 fseek (fp, base, SEEK_SET);
262 l = len;
263 while (l > 0) {
264 long m = fread (buf, 1, (l>sizeof(buf)?sizeof(buf):l),
265 fp);
266 if (!m) {
268 * This shouldn't happen unless the file
269 * actually changes while we are reading
270 * it.
272 error (ERR_NONFATAL, "`incbin': unexpected EOF while"
273 " reading file `%s'", fname);
274 t=0; /* Try to exit cleanly */
275 break;
277 out (offset, segment, buf, OUT_RAWDATA+m,
278 NO_SEG, NO_SEG);
279 l -= m;
282 list->downlevel(LIST_INCBIN);
283 if (instruction->times > 1) {
285 * Dummy call to list->output to give the offset to the
286 * listing module.
288 list->output (offset, NULL, OUT_RAWDATA);
289 list->uplevel(LIST_TIMES);
290 list->downlevel(LIST_TIMES);
292 fclose (fp);
293 return instruction->times * len;
295 return 0; /* if we're here, there's an error */
298 size_prob = FALSE;
299 temp = nasm_instructions[instruction->opcode];
300 while (temp->opcode != -1) {
301 int m = matches (temp, instruction);
303 if (m == 100) /* matches! */
305 char *codes = temp->code;
306 long insn_size = calcsize(segment, offset, bits,
307 instruction, codes);
308 itimes = instruction->times;
309 if (insn_size < 0) /* shouldn't be, on pass two */
310 error (ERR_PANIC, "errors made it through from pass one");
311 else while (itimes--) {
312 insn_end = offset + insn_size;
313 for (j=0; j<instruction->nprefix; j++) {
314 unsigned char c=0;
315 switch (instruction->prefixes[j]) {
316 case P_LOCK:
317 c = 0xF0; break;
318 case P_REPNE: case P_REPNZ:
319 c = 0xF2; break;
320 case P_REPE: case P_REPZ: case P_REP:
321 c = 0xF3; break;
322 case R_CS: c = 0x2E; break;
323 case R_DS: c = 0x3E; break;
324 case R_ES: c = 0x26; break;
325 case R_FS: c = 0x64; break;
326 case R_GS: c = 0x65; break;
327 case R_SS: c = 0x36; break;
328 case P_A16:
329 if (bits != 16)
330 c = 0x67;
331 break;
332 case P_A32:
333 if (bits != 32)
334 c = 0x67;
335 break;
336 case P_O16:
337 if (bits != 16)
338 c = 0x66;
339 break;
340 case P_O32:
341 if (bits != 32)
342 c = 0x66;
343 break;
344 default:
345 error (ERR_PANIC,
346 "invalid instruction prefix");
348 if (c != 0) {
349 out (offset, segment, &c, OUT_RAWDATA+1,
350 NO_SEG, NO_SEG);
351 offset++;
354 gencode (segment, offset, bits, instruction, codes, insn_end);
355 offset += insn_size;
356 if (itimes > 0 && itimes == instruction->times-1) {
358 * Dummy call to list->output to give the offset to the
359 * listing module.
361 list->output (offset, NULL, OUT_RAWDATA);
362 list->uplevel (LIST_TIMES);
365 if (instruction->times > 1)
366 list->downlevel (LIST_TIMES);
367 return offset - start;
368 } else if (m > 0) {
369 size_prob = m;
371 temp++;
374 if (temp->opcode == -1) { /* didn't match any instruction */
375 if (size_prob == 1) /* would have matched, but for size */
376 error (ERR_NONFATAL, "operation size not specified");
377 else if (size_prob == 2)
378 error (ERR_NONFATAL, "mismatch in operand sizes");
379 else
380 error (ERR_NONFATAL,
381 "invalid combination of opcode and operands");
383 return 0;
386 long insn_size (long segment, long offset, int bits,
387 insn *instruction, efunc error)
389 struct itemplate *temp;
391 errfunc = error; /* to pass to other functions */
393 if (instruction->opcode == -1)
394 return 0;
396 if (instruction->opcode == I_DB ||
397 instruction->opcode == I_DW ||
398 instruction->opcode == I_DD ||
399 instruction->opcode == I_DQ ||
400 instruction->opcode == I_DT)
402 extop *e;
403 long isize, osize, wsize = 0; /* placate gcc */
405 isize = 0;
406 switch (instruction->opcode)
408 case I_DB: wsize = 1; break;
409 case I_DW: wsize = 2; break;
410 case I_DD: wsize = 4; break;
411 case I_DQ: wsize = 8; break;
412 case I_DT: wsize = 10; break;
415 for (e = instruction->eops; e; e = e->next)
417 long align;
419 osize = 0;
420 if (e->type == EOT_DB_NUMBER)
421 osize = 1;
422 else if (e->type == EOT_DB_STRING)
423 osize = e->stringlen;
425 align = (-osize) % wsize;
426 if (align < 0)
427 align += wsize;
428 isize += osize + align;
430 return isize * instruction->times;
433 if (instruction->opcode == I_INCBIN)
435 char fname[FILENAME_MAX];
436 FILE * fp;
437 long len;
439 len = FILENAME_MAX-1;
440 if (len > instruction->eops->stringlen)
441 len = instruction->eops->stringlen;
442 strncpy (fname, instruction->eops->stringval, len);
443 fname[len] = '\0';
444 if ( (fp = fopen(fname, "rb")) == NULL )
445 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
446 else if (fseek(fp, 0L, SEEK_END) < 0)
447 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
448 fname);
449 else
451 len = ftell (fp);
452 fclose (fp);
453 if (instruction->eops->next)
455 len -= instruction->eops->next->offset;
456 if (instruction->eops->next->next &&
457 len > instruction->eops->next->next->offset)
459 len = instruction->eops->next->next->offset;
462 return instruction->times * len;
464 return 0; /* if we're here, there's an error */
467 temp = nasm_instructions[instruction->opcode];
468 while (temp->opcode != -1) {
469 if (matches(temp, instruction) == 100) {
470 /* we've matched an instruction. */
471 long isize;
472 char * codes = temp->code;
473 int j;
475 isize = calcsize(segment, offset, bits, instruction, codes);
476 if (isize < 0)
477 return -1;
478 for (j = 0; j < instruction->nprefix; j++)
480 if ((instruction->prefixes[j] != P_A16 &&
481 instruction->prefixes[j] != P_O16 && bits==16) ||
482 (instruction->prefixes[j] != P_A32 &&
483 instruction->prefixes[j] != P_O32 && bits==32))
485 isize++;
488 return isize * instruction->times;
490 temp++;
492 return -1; /* didn't match any instruction */
495 static long calcsize (long segment, long offset, int bits,
496 insn *ins, char *codes)
498 long length = 0;
499 unsigned char c;
501 (void) segment; /* Don't warn that this parameter is unused */
502 (void) offset; /* Don't warn that this parameter is unused */
504 while (*codes) switch (c = *codes++) {
505 case 01: case 02: case 03:
506 codes += c, length += c; break;
507 case 04: case 05: case 06: case 07:
508 length++; break;
509 case 010: case 011: case 012:
510 codes++, length++; break;
511 case 017:
512 length++; break;
513 case 014: case 015: case 016:
514 length++; break;
515 case 020: case 021: case 022:
516 length++; break;
517 case 024: case 025: case 026:
518 length++; break;
519 case 030: case 031: case 032:
520 length += 2; break;
521 case 034: case 035: case 036:
522 length += ((ins->oprs[c-034].addr_size ?
523 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break;
524 case 037:
525 length += 2; break;
526 case 040: case 041: case 042:
527 length += 4; break;
528 case 050: case 051: case 052:
529 length++; break;
530 case 060: case 061: case 062:
531 length += 2; break;
532 case 064: case 065: case 066:
533 length += ((ins->oprs[c-064].addr_size ?
534 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break;
535 case 070: case 071: case 072:
536 length += 4; break;
537 case 0300: case 0301: case 0302:
538 length += chsize (&ins->oprs[c-0300], bits);
539 break;
540 case 0310:
541 length += (bits==32);
542 break;
543 case 0311:
544 length += (bits==16);
545 break;
546 case 0312:
547 break;
548 case 0320:
549 length += (bits==32);
550 break;
551 case 0321:
552 length += (bits==16);
553 break;
554 case 0322:
555 break;
556 case 0330:
557 codes++, length++; break;
558 case 0340: case 0341: case 0342:
559 if (ins->oprs[0].segment != NO_SEG)
560 errfunc (ERR_NONFATAL, "attempt to reserve non-constant"
561 " quantity of BSS space");
562 else
563 length += ins->oprs[0].offset << (c-0340);
564 break;
565 default: /* can't do it by 'case' statements */
566 if (c>=0100 && c<=0277) { /* it's an EA */
567 ea ea_data;
568 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0,
569 ins->forw_ref)) {
570 errfunc (ERR_NONFATAL, "invalid effective address");
571 return -1;
572 } else
573 length += ea_data.size;
574 } else
575 errfunc (ERR_PANIC, "internal instruction table corrupt"
576 ": instruction code 0x%02X given", c);
578 return length;
581 static void gencode (long segment, long offset, int bits,
582 insn *ins, char *codes, long insn_end)
584 static char condval[] = { /* conditional opcodes */
585 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
586 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
587 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
589 unsigned char c;
590 unsigned char bytes[4];
591 long data, size;
593 while (*codes)
594 switch (c = *codes++)
596 case 01: case 02: case 03:
597 out (offset, segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG);
598 codes += c;
599 offset += c;
600 break;
602 case 04: case 06:
603 switch (ins->oprs[0].basereg)
605 case R_CS:
606 bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break;
607 case R_DS:
608 bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break;
609 case R_ES:
610 bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break;
611 case R_SS:
612 bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break;
613 default:
614 errfunc (ERR_PANIC, "bizarre 8086 segment register received");
616 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
617 offset++;
618 break;
620 case 05: case 07:
621 switch (ins->oprs[0].basereg) {
622 case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break;
623 case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break;
624 default:
625 errfunc (ERR_PANIC, "bizarre 386 segment register received");
627 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
628 offset++;
629 break;
631 case 010: case 011: case 012:
632 bytes[0] = *codes++ + regval(&ins->oprs[c-010]);
633 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
634 offset += 1;
635 break;
637 case 017:
638 bytes[0] = 0;
639 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
640 offset += 1;
641 break;
643 case 014: case 015: case 016:
644 if (ins->oprs[c-014].offset < -128
645 || ins->oprs[c-014].offset > 127)
647 errfunc (ERR_WARNING, "signed byte value exceeds bounds");
650 if (ins->oprs[c-014].segment != NO_SEG)
652 data = ins->oprs[c-014].offset;
653 out (offset, segment, &data, OUT_ADDRESS+1,
654 ins->oprs[c-014].segment, ins->oprs[c-014].wrt);
656 else {
657 bytes[0] = ins->oprs[c-014].offset;
658 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
660 offset += 1;
661 break;
663 case 020: case 021: case 022:
664 if (ins->oprs[c-020].offset < -256
665 || ins->oprs[c-020].offset > 255)
667 errfunc (ERR_WARNING, "byte value exceeds bounds");
669 if (ins->oprs[c-020].segment != NO_SEG) {
670 data = ins->oprs[c-020].offset;
671 out (offset, segment, &data, OUT_ADDRESS+1,
672 ins->oprs[c-020].segment, ins->oprs[c-020].wrt);
674 else {
675 bytes[0] = ins->oprs[c-020].offset;
676 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
678 offset += 1;
679 break;
681 case 024: case 025: case 026:
682 if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255)
683 errfunc (ERR_WARNING, "unsigned byte value exceeds bounds");
684 if (ins->oprs[c-024].segment != NO_SEG) {
685 data = ins->oprs[c-024].offset;
686 out (offset, segment, &data, OUT_ADDRESS+1,
687 ins->oprs[c-024].segment, ins->oprs[c-024].wrt);
689 else {
690 bytes[0] = ins->oprs[c-024].offset;
691 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
693 offset += 1;
694 break;
696 case 030: case 031: case 032:
697 if (ins->oprs[c-030].segment == NO_SEG &&
698 ins->oprs[c-030].wrt == NO_SEG &&
699 (ins->oprs[c-030].offset < -65536L ||
700 ins->oprs[c-030].offset > 65535L))
702 errfunc (ERR_WARNING, "word value exceeds bounds");
704 data = ins->oprs[c-030].offset;
705 out (offset, segment, &data, OUT_ADDRESS+2,
706 ins->oprs[c-030].segment, ins->oprs[c-030].wrt);
707 offset += 2;
708 break;
710 case 034: case 035: case 036:
711 data = ins->oprs[c-034].offset;
712 size = ((ins->oprs[c-034].addr_size ?
713 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4);
714 if (size==16 && (data < -65536L || data > 65535L))
715 errfunc (ERR_WARNING, "word value exceeds bounds");
716 out (offset, segment, &data, OUT_ADDRESS+size,
717 ins->oprs[c-034].segment, ins->oprs[c-034].wrt);
718 offset += size;
719 break;
721 case 037:
722 if (ins->oprs[0].segment == NO_SEG)
723 errfunc (ERR_NONFATAL, "value referenced by FAR is not"
724 " relocatable");
725 data = 0L;
726 out (offset, segment, &data, OUT_ADDRESS+2,
727 outfmt->segbase(1+ins->oprs[0].segment),
728 ins->oprs[0].wrt);
729 offset += 2;
730 break;
732 case 040: case 041: case 042:
733 data = ins->oprs[c-040].offset;
734 out (offset, segment, &data, OUT_ADDRESS+4,
735 ins->oprs[c-040].segment, ins->oprs[c-040].wrt);
736 offset += 4;
737 break;
739 case 050: case 051: case 052:
740 if (ins->oprs[c-050].segment != segment)
741 errfunc (ERR_NONFATAL, "short relative jump outside segment");
742 data = ins->oprs[c-050].offset - insn_end;
743 if (data > 127 || data < -128)
744 errfunc (ERR_NONFATAL, "short jump is out of range");
745 bytes[0] = data;
746 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
747 offset += 1;
748 break;
750 case 060: case 061: case 062:
751 if (ins->oprs[c-060].segment != segment) {
752 data = ins->oprs[c-060].offset;
753 out (offset, segment, &data, OUT_REL2ADR+insn_end-offset,
754 ins->oprs[c-060].segment, ins->oprs[c-060].wrt);
755 } else {
756 data = ins->oprs[c-060].offset - insn_end;
757 out (offset, segment, &data,
758 OUT_ADDRESS+2, NO_SEG, NO_SEG);
760 offset += 2;
761 break;
763 case 064: case 065: case 066:
764 size = ((ins->oprs[c-064].addr_size ?
765 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4);
766 if (ins->oprs[c-064].segment != segment) {
767 data = ins->oprs[c-064].offset;
768 size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR);
769 out (offset, segment, &data, size+insn_end-offset,
770 ins->oprs[c-064].segment, ins->oprs[c-064].wrt);
771 size = (bits == 16 ? 2 : 4);
772 } else {
773 data = ins->oprs[c-064].offset - insn_end;
774 out (offset, segment, &data,
775 OUT_ADDRESS+size, NO_SEG, NO_SEG);
777 offset += size;
778 break;
780 case 070: case 071: case 072:
781 if (ins->oprs[c-070].segment != segment) {
782 data = ins->oprs[c-070].offset;
783 out (offset, segment, &data, OUT_REL4ADR+insn_end-offset,
784 ins->oprs[c-070].segment, ins->oprs[c-070].wrt);
785 } else {
786 data = ins->oprs[c-070].offset - insn_end;
787 out (offset, segment, &data,
788 OUT_ADDRESS+4, NO_SEG, NO_SEG);
790 offset += 4;
791 break;
793 case 0300: case 0301: case 0302:
794 if (chsize (&ins->oprs[c-0300], bits)) {
795 *bytes = 0x67;
796 out (offset, segment, bytes,
797 OUT_RAWDATA+1, NO_SEG, NO_SEG);
798 offset += 1;
799 } else
800 offset += 0;
801 break;
803 case 0310:
804 if (bits==32) {
805 *bytes = 0x67;
806 out (offset, segment, bytes,
807 OUT_RAWDATA+1, NO_SEG, NO_SEG);
808 offset += 1;
809 } else
810 offset += 0;
811 break;
813 case 0311:
814 if (bits==16) {
815 *bytes = 0x67;
816 out (offset, segment, bytes,
817 OUT_RAWDATA+1, NO_SEG, NO_SEG);
818 offset += 1;
819 } else
820 offset += 0;
821 break;
823 case 0312:
824 break;
826 case 0320:
827 if (bits==32) {
828 *bytes = 0x66;
829 out (offset, segment, bytes,
830 OUT_RAWDATA+1, NO_SEG, NO_SEG);
831 offset += 1;
832 } else
833 offset += 0;
834 break;
836 case 0321:
837 if (bits==16) {
838 *bytes = 0x66;
839 out (offset, segment, bytes,
840 OUT_RAWDATA+1, NO_SEG, NO_SEG);
841 offset += 1;
842 } else
843 offset += 0;
844 break;
846 case 0322:
847 break;
849 case 0330:
850 *bytes = *codes++ + condval[ins->condition];
851 out (offset, segment, bytes,
852 OUT_RAWDATA+1, NO_SEG, NO_SEG);
853 offset += 1;
854 break;
856 case 0340: case 0341: case 0342:
857 if (ins->oprs[0].segment != NO_SEG)
858 errfunc (ERR_PANIC, "non-constant BSS size in pass two");
859 else {
860 long size = ins->oprs[0].offset << (c-0340);
861 if (size > 0)
862 out (offset, segment, NULL,
863 OUT_RESERVE+size, NO_SEG, NO_SEG);
864 offset += size;
866 break;
868 default: /* can't do it by 'case' statements */
869 if (c>=0100 && c<=0277) { /* it's an EA */
870 ea ea_data;
871 int rfield;
872 unsigned char *p;
873 long s;
875 if (c<=0177) /* pick rfield from operand b */
876 rfield = regval (&ins->oprs[c&7]);
877 else /* rfield is constant */
878 rfield = c & 7;
880 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield,
881 ins->forw_ref))
883 errfunc (ERR_NONFATAL, "invalid effective address");
886 p = bytes;
887 *p++ = ea_data.modrm;
888 if (ea_data.sib_present)
889 *p++ = ea_data.sib;
891 s = p-bytes;
892 out (offset, segment, bytes, OUT_RAWDATA + s,
893 NO_SEG, NO_SEG);
895 switch (ea_data.bytes) {
896 case 0:
897 break;
898 case 1:
899 if (ins->oprs[(c>>3)&7].segment != NO_SEG) {
900 data = ins->oprs[(c>>3)&7].offset;
901 out (offset, segment, &data, OUT_ADDRESS+1,
902 ins->oprs[(c>>3)&7].segment,
903 ins->oprs[(c>>3)&7].wrt);
904 } else {
905 *bytes = ins->oprs[(c>>3)&7].offset;
906 out (offset, segment, bytes, OUT_RAWDATA+1,
907 NO_SEG, NO_SEG);
909 s++;
910 break;
911 case 2:
912 case 4:
913 data = ins->oprs[(c>>3)&7].offset;
914 out (offset, segment, &data,
915 OUT_ADDRESS+ea_data.bytes,
916 ins->oprs[(c>>3)&7].segment, ins->oprs[(c>>3)&7].wrt);
917 s += ea_data.bytes;
918 break;
920 offset += s;
921 } else
922 errfunc (ERR_PANIC, "internal instruction table corrupt"
923 ": instruction code 0x%02X given", c);
927 static int regval (operand *o)
929 switch (o->basereg) {
930 case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0:
931 case R_ST0: case R_MM0: case R_XMM0:
932 return 0;
933 case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1:
934 case R_MM1: case R_XMM1:
935 return 1;
936 case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2:
937 case R_ST2: case R_MM2: case R_XMM2:
938 return 2;
939 case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3:
940 case R_TR3: case R_ST3: case R_MM3: case R_XMM3:
941 return 3;
942 case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4:
943 case R_ST4: case R_MM4: case R_XMM4:
944 return 4;
945 case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5:
946 case R_MM5: case R_XMM5:
947 return 5;
948 case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6:
949 case R_MM6: case R_XMM6:
950 return 6;
951 case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7:
952 case R_MM7: case R_XMM7:
953 return 7;
954 default: /* panic */
955 errfunc (ERR_PANIC, "invalid register operand given to regval()");
956 return 0;
960 static int matches (struct itemplate *itemp, insn *instruction)
962 int i, size, oprs, ret;
964 ret = 100;
967 * Check the opcode
969 if (itemp->opcode != instruction->opcode) return 0;
972 * Count the operands
974 if (itemp->operands != instruction->operands) return 0;
977 * Check that no spurious colons or TOs are present
979 for (i=0; i<itemp->operands; i++)
980 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO))
981 return 0;
984 * Check that the operand flags all match up
986 for (i=0; i<itemp->operands; i++)
987 if (itemp->opd[i] & ~instruction->oprs[i].type ||
988 ((itemp->opd[i] & SIZE_MASK) &&
989 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK)))
991 if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
992 (instruction->oprs[i].type & SIZE_MASK))
993 return 0;
994 else
995 ret = 1;
999 * Check operand sizes
1001 if (itemp->flags & IF_SB) {
1002 size = BITS8;
1003 oprs = itemp->operands;
1004 } else if (itemp->flags & IF_SW) {
1005 size = BITS16;
1006 oprs = itemp->operands;
1007 } else if (itemp->flags & IF_SD) {
1008 size = BITS32;
1009 oprs = itemp->operands;
1010 } else if (itemp->flags & (IF_SM | IF_SM2)) {
1011 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
1012 size = 0; /* placate gcc */
1013 for (i=0; i<oprs; i++)
1014 if ( (size = itemp->opd[i] & SIZE_MASK) != 0)
1015 break;
1016 } else {
1017 size = 0;
1018 oprs = itemp->operands;
1021 for (i=0; i<itemp->operands; i++)
1022 if (!(itemp->opd[i] & SIZE_MASK) &&
1023 (instruction->oprs[i].type & SIZE_MASK & ~size))
1024 ret = 2;
1026 return ret;
1029 static ea *process_ea (operand *input, ea *output, int addrbits, int rfield,
1030 int forw_ref)
1032 if (!(REGISTER & ~input->type)) { /* it's a single register */
1033 static int regs[] = {
1034 R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH,
1035 R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI,
1036 R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI,
1037 R_MM0, R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7,
1038 R_XMM0, R_XMM1, R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7
1040 int i;
1042 for (i=0; i<elements(regs); i++)
1043 if (input->basereg == regs[i]) break;
1044 if (i<elements(regs)) {
1045 output->sib_present = FALSE;/* no SIB necessary */
1046 output->bytes = 0; /* no offset necessary either */
1047 output->modrm = 0xC0 | (rfield << 3) | (i & 7);
1049 else
1050 return NULL;
1051 } else { /* it's a memory reference */
1052 if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) {
1053 /* it's a pure offset */
1054 if (input->addr_size)
1055 addrbits = input->addr_size;
1056 output->sib_present = FALSE;
1057 output->bytes = (addrbits==32 ? 4 : 2);
1058 output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3);
1060 else { /* it's an indirection */
1061 int i=input->indexreg, b=input->basereg, s=input->scale;
1062 long o=input->offset, seg=input->segment;
1063 int hb=input->hintbase, ht=input->hinttype;
1064 int t;
1066 if (s==0) i = -1; /* make this easy, at least */
1068 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1069 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1070 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1071 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) {
1072 /* it must be a 32-bit memory reference. Firstly we have
1073 * to check that all registers involved are type Exx. */
1074 if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX
1075 && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI)
1076 return NULL;
1077 if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX
1078 && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI)
1079 return NULL;
1081 /* While we're here, ensure the user didn't specify WORD. */
1082 if (input->addr_size == 16)
1083 return NULL;
1085 /* now reorganise base/index */
1086 if (s == 1 && b != i && b != -1 && i != -1 &&
1087 ((hb==b&&ht==EAH_NOTBASE) || (hb==i&&ht==EAH_MAKEBASE)))
1088 t = b, b = i, i = t; /* swap if hints say so */
1089 if (b==i) /* convert EAX+2*EAX to 3*EAX */
1090 b = -1, s++;
1091 if (b==-1 && s==1 && !(hb == i && ht == EAH_NOTBASE))
1092 b = i, i = -1; /* make single reg base, unless hint */
1093 if (((s==2 && i!=R_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
1094 s==3 || s==5 || s==9) && b==-1)
1095 b = i, s--; /* convert 3*EAX to EAX+2*EAX */
1096 if (s==1 && i==R_ESP) /* swap ESP into base if scale is 1 */
1097 i = b, b = R_ESP;
1098 if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1))
1099 return NULL; /* wrong, for various reasons */
1101 if (i==-1 && b!=R_ESP) {/* no SIB needed */
1102 int mod, rm;
1103 switch(b) {
1104 case R_EAX: rm = 0; break;
1105 case R_ECX: rm = 1; break;
1106 case R_EDX: rm = 2; break;
1107 case R_EBX: rm = 3; break;
1108 case R_EBP: rm = 5; break;
1109 case R_ESI: rm = 6; break;
1110 case R_EDI: rm = 7; break;
1111 case -1: rm = 5; break;
1112 default: /* should never happen */
1113 return NULL;
1115 if (b==-1 || (b!=R_EBP && o==0 &&
1116 seg==NO_SEG && !forw_ref &&
1117 !(input->eaflags &
1118 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1119 mod = 0;
1120 else if (input->eaflags & EAF_BYTEOFFS ||
1121 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1122 !(input->eaflags & EAF_WORDOFFS))) {
1123 mod = 1;
1125 else
1126 mod = 2;
1128 output->sib_present = FALSE;
1129 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1130 output->modrm = (mod<<6) | (rfield<<3) | rm;
1132 else { /* we need a SIB */
1133 int mod, scale, index, base;
1135 switch (b) {
1136 case R_EAX: base = 0; break;
1137 case R_ECX: base = 1; break;
1138 case R_EDX: base = 2; break;
1139 case R_EBX: base = 3; break;
1140 case R_ESP: base = 4; break;
1141 case R_EBP: case -1: base = 5; break;
1142 case R_ESI: base = 6; break;
1143 case R_EDI: base = 7; break;
1144 default: /* then what the smeg is it? */
1145 return NULL; /* panic */
1148 switch (i) {
1149 case R_EAX: index = 0; break;
1150 case R_ECX: index = 1; break;
1151 case R_EDX: index = 2; break;
1152 case R_EBX: index = 3; break;
1153 case -1: index = 4; break;
1154 case R_EBP: index = 5; break;
1155 case R_ESI: index = 6; break;
1156 case R_EDI: index = 7; break;
1157 default: /* then what the smeg is it? */
1158 return NULL; /* panic */
1161 if (i==-1) s = 1;
1162 switch (s) {
1163 case 1: scale = 0; break;
1164 case 2: scale = 1; break;
1165 case 4: scale = 2; break;
1166 case 8: scale = 3; break;
1167 default: /* then what the smeg is it? */
1168 return NULL; /* panic */
1171 if (b==-1 || (b!=R_EBP && o==0 &&
1172 seg==NO_SEG && !forw_ref &&
1173 !(input->eaflags &
1174 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1175 mod = 0;
1176 else if (input->eaflags & EAF_BYTEOFFS ||
1177 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1178 !(input->eaflags & EAF_WORDOFFS)))
1179 mod = 1;
1180 else
1181 mod = 2;
1183 output->sib_present = TRUE;
1184 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1185 output->modrm = (mod<<6) | (rfield<<3) | 4;
1186 output->sib = (scale<<6) | (index<<3) | base;
1189 else { /* it's 16-bit */
1190 int mod, rm;
1192 /* check all registers are BX, BP, SI or DI */
1193 if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) ||
1194 (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI))
1195 return NULL;
1197 /* ensure the user didn't specify DWORD */
1198 if (input->addr_size == 32)
1199 return NULL;
1201 if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */
1202 if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */
1203 if ((b==R_SI || b==R_DI) && i!=-1)
1204 b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */
1205 if (b==i) return NULL;/* shouldn't ever happen, in theory */
1206 if (i!=-1 && b!=-1 &&
1207 (i==R_BP || i==R_BX || b==R_SI || b==R_DI))
1208 return NULL; /* invalid combinations */
1209 if (b==-1) /* pure offset: handled above */
1210 return NULL; /* so if it gets to here, panic! */
1212 rm = -1;
1213 if (i!=-1)
1214 switch (i*256 + b) {
1215 case R_SI*256+R_BX: rm=0; break;
1216 case R_DI*256+R_BX: rm=1; break;
1217 case R_SI*256+R_BP: rm=2; break;
1218 case R_DI*256+R_BP: rm=3; break;
1220 else
1221 switch (b) {
1222 case R_SI: rm=4; break;
1223 case R_DI: rm=5; break;
1224 case R_BP: rm=6; break;
1225 case R_BX: rm=7; break;
1227 if (rm==-1) /* can't happen, in theory */
1228 return NULL; /* so panic if it does */
1230 if (o==0 && seg==NO_SEG && !forw_ref && rm!=6 &&
1231 !(input->eaflags & (EAF_BYTEOFFS|EAF_WORDOFFS)))
1232 mod = 0;
1233 else if (input->eaflags & EAF_BYTEOFFS ||
1234 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1235 !(input->eaflags & EAF_WORDOFFS)))
1236 mod = 1;
1237 else
1238 mod = 2;
1240 output->sib_present = FALSE; /* no SIB - it's 16-bit */
1241 output->bytes = mod; /* bytes of offset needed */
1242 output->modrm = (mod<<6) | (rfield<<3) | rm;
1246 output->size = 1 + output->sib_present + output->bytes;
1247 return output;
1250 static int chsize (operand *input, int addrbits)
1252 if (!(MEMORY & ~input->type)) {
1253 int i=input->indexreg, b=input->basereg;
1255 if (input->scale==0) i = -1;
1257 if (i == -1 && b == -1) /* pure offset */
1258 return (input->addr_size != 0 && input->addr_size != addrbits);
1260 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1261 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1262 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1263 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI)
1264 return (addrbits==16);
1265 else
1266 return (addrbits==32);
1268 else
1269 return 0;