NASM 0.94
[nasm.git] / assemble.c
blob3e4bc1f19dbafb4100a7b54d5f58502acf127134
1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
14 * on operand 0
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the address-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
29 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
30 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
31 * assembly mode or the address-size override on the operand
32 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
33 * \1ab - a ModRM, calculated on EA in operand a, with the spare
34 * field the register value of operand b.
35 * \2ab - a ModRM, calculated on EA in operand a, with the spare
36 * field equal to digit b.
37 * \30x - might be an 0x67 byte, depending on the address size of
38 * the memory reference in operand x.
39 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
40 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
41 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
42 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
43 * \322 - indicates that this instruction is only valid when the
44 * operand size is the default (instruction to disassembler,
45 * generates no code in the assembler)
46 * \330 - a literal byte follows in the code stream, to be added
47 * to the condition code value of the instruction.
48 * \340 - reserve <operand 0> bytes of uninitialised storage.
49 * Operand 0 had better be a segmentless constant.
52 #include <stdio.h>
53 #include <string.h>
55 #include "nasm.h"
56 #include "assemble.h"
57 #include "insns.h"
59 extern struct itemplate *nasm_instructions[];
61 typedef struct {
62 int sib_present; /* is a SIB byte necessary? */
63 int bytes; /* # of bytes of offset needed */
64 int size; /* lazy - this is sib+bytes+1 */
65 unsigned char modrm, sib; /* the bytes themselves */
66 } ea;
68 static efunc errfunc;
69 static struct ofmt *outfmt;
71 static long calcsize (long, long, int, insn *, char *);
72 static void gencode (long, long, int, insn *, char *, long);
73 static int regval (operand *o);
74 static int matches (struct itemplate *, insn *);
75 static ea *process_ea (operand *, ea *, int, int, int);
76 static int chsize (operand *, int);
78 long assemble (long segment, long offset, int bits,
79 insn *instruction, struct ofmt *output, efunc error) {
80 int j, size_prob;
81 long insn_end, itimes;
82 long start = offset;
83 struct itemplate *temp;
85 errfunc = error; /* to pass to other functions */
86 outfmt = output; /* likewise */
88 if (instruction->opcode == -1)
89 return 0;
91 if (instruction->opcode == I_DB ||
92 instruction->opcode == I_DW ||
93 instruction->opcode == I_DD ||
94 instruction->opcode == I_DQ ||
95 instruction->opcode == I_DT) {
96 extop *e;
97 long wsize = 0; /* placate gcc */
98 long t = instruction->times;
100 switch (instruction->opcode) {
101 case I_DB: wsize = 1; break;
102 case I_DW: wsize = 2; break;
103 case I_DD: wsize = 4; break;
104 case I_DQ: wsize = 8; break;
105 case I_DT: wsize = 10; break;
108 while (t--) {
109 for (e = instruction->eops; e; e = e->next) {
110 if (e->type == EOT_DB_NUMBER) {
111 if (wsize == 1) {
112 if (e->segment != NO_SEG)
113 errfunc (ERR_NONFATAL,
114 "one-byte relocation attempted");
115 else {
116 unsigned char c = e->offset;
117 outfmt->output (segment, &c, OUT_RAWDATA+1,
118 NO_SEG, NO_SEG);
120 } else if (wsize > 5) {
121 errfunc (ERR_NONFATAL, "integer supplied to a D%c"
122 " instruction", wsize==8 ? 'Q' : 'T');
123 } else
124 outfmt->output (segment, &e->offset,
125 OUT_ADDRESS+wsize, e->segment,
126 e->wrt);
127 offset += wsize;
128 } else if (e->type == EOT_DB_STRING) {
129 int align;
131 align = (-e->stringlen) % wsize;
132 if (align < 0)
133 align += wsize;
134 outfmt->output (segment, e->stringval,
135 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
136 if (align)
137 outfmt->output (segment, "\0\0\0\0",
138 OUT_RAWDATA+align, NO_SEG, NO_SEG);
139 offset += e->stringlen + align;
143 return offset - start;
146 if (instruction->opcode == I_INCBIN) {
147 static char fname[FILENAME_MAX];
148 FILE *fp;
149 long len;
151 len = FILENAME_MAX-1;
152 if (len > instruction->eops->stringlen)
153 len = instruction->eops->stringlen;
154 strncpy (fname, instruction->eops->stringval, len);
155 fname[len] = '\0';
156 if (!(fp = fopen(fname, "rb")))
157 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
158 else if (fseek(fp, 0L, SEEK_END) < 0)
159 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
160 fname);
161 else {
162 static char buf[2048];
163 long t = instruction->times;
164 long l;
166 len = ftell (fp);
167 if (instruction->eops->next) {
168 len -= instruction->eops->next->offset;
169 if (instruction->eops->next->next &&
170 len > instruction->eops->next->next->offset)
171 len = instruction->eops->next->next->offset;
173 while (t--) {
174 fseek (fp,
175 (instruction->eops->next ?
176 instruction->eops->next->offset : 0),
177 SEEK_SET);
178 l = len;
179 while (l > 0) {
180 long m = fread (buf, 1, (l>sizeof(buf)?sizeof(buf):l),
181 fp);
182 if (!m) {
184 * This shouldn't happen unless the file
185 * actually changes while we are reading
186 * it.
188 error (ERR_NONFATAL, "`incbin': unexpected EOF while"
189 " reading file `%s'", fname);
190 return 0; /* it doesn't much matter... */
192 outfmt->output (segment, buf, OUT_RAWDATA+m,
193 NO_SEG, NO_SEG);
194 l -= m;
197 fclose (fp);
198 return instruction->times * len;
200 return 0; /* if we're here, there's an error */
203 size_prob = FALSE;
204 temp = nasm_instructions[instruction->opcode];
205 while (temp->opcode != -1) {
206 int m = matches (temp, instruction);
207 if (m == 100) { /* matches! */
208 char *codes = temp->code;
209 long insn_size = calcsize(segment, offset, bits,
210 instruction, codes);
211 itimes = instruction->times;
212 if (insn_size < 0) /* shouldn't be, on pass two */
213 error (ERR_PANIC, "errors made it through from pass one");
214 else while (itimes--) {
215 insn_end = offset + insn_size;
216 for (j=0; j<instruction->nprefix; j++) {
217 unsigned char c;
218 switch (instruction->prefixes[j]) {
219 case P_LOCK:
220 c = 0xF0; break;
221 case P_REPNE: case P_REPNZ:
222 c = 0xF2; break;
223 case P_REPE: case P_REPZ: case P_REP:
224 c = 0xF3; break;
225 case R_CS: c = 0x2E; break;
226 case R_DS: c = 0x3E; break;
227 case R_ES: c = 0x26; break;
228 case R_FS: c = 0x64; break;
229 case R_GS: c = 0x65; break;
230 case R_SS: c = 0x36; break;
231 case P_A16:
232 if (bits == 16)
233 c = 0; /* no prefix */
234 else
235 c = 0x67;
236 break;
237 case P_A32:
238 if (bits == 32)
239 c = 0; /* no prefix */
240 else
241 c = 0x67;
242 break;
243 case P_O16:
244 if (bits == 16)
245 c = 0; /* no prefix */
246 else
247 c = 0x66;
248 break;
249 case P_O32:
250 if (bits == 32)
251 c = 0; /* no prefix */
252 else
253 c = 0x66;
254 break;
255 default:
256 error (ERR_PANIC,
257 "invalid instruction prefix");
259 if (c != 0)
260 outfmt->output (segment, &c, OUT_RAWDATA+1,
261 NO_SEG, NO_SEG);
262 offset++;
264 gencode (segment, offset, bits, instruction, codes, insn_end);
265 offset += insn_size;
267 return offset - start;
268 } else if (m > 0) {
269 size_prob = m;
271 temp++;
273 if (temp->opcode == -1) { /* didn't match any instruction */
274 if (size_prob == 1) /* would have matched, but for size */
275 error (ERR_NONFATAL, "operation size not specified");
276 else if (size_prob == 2)
277 error (ERR_NONFATAL, "mismatch in operand sizes");
278 else
279 error (ERR_NONFATAL,
280 "invalid combination of opcode and operands");
282 return 0;
285 long insn_size (long segment, long offset, int bits,
286 insn *instruction, efunc error) {
287 struct itemplate *temp;
289 errfunc = error; /* to pass to other functions */
291 if (instruction->opcode == -1)
292 return 0;
294 if (instruction->opcode == I_DB ||
295 instruction->opcode == I_DW ||
296 instruction->opcode == I_DD ||
297 instruction->opcode == I_DQ ||
298 instruction->opcode == I_DT) {
299 extop *e;
300 long isize, osize, wsize = 0; /* placate gcc */
302 isize = 0;
303 switch (instruction->opcode) {
304 case I_DB: wsize = 1; break;
305 case I_DW: wsize = 2; break;
306 case I_DD: wsize = 4; break;
307 case I_DQ: wsize = 8; break;
308 case I_DT: wsize = 10; break;
311 for (e = instruction->eops; e; e = e->next) {
312 long align;
314 osize = 0;
315 if (e->type == EOT_DB_NUMBER)
316 osize = 1;
317 else if (e->type == EOT_DB_STRING)
318 osize = e->stringlen;
320 align = (-osize) % wsize;
321 if (align < 0)
322 align += wsize;
323 isize += osize + align;
325 return isize * instruction->times;
328 if (instruction->opcode == I_INCBIN) {
329 char fname[FILENAME_MAX];
330 FILE *fp;
331 long len;
333 len = FILENAME_MAX-1;
334 if (len > instruction->eops->stringlen)
335 len = instruction->eops->stringlen;
336 strncpy (fname, instruction->eops->stringval, len);
337 fname[len] = '\0';
338 if (!(fp = fopen(fname, "rb")))
339 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
340 else if (fseek(fp, 0L, SEEK_END) < 0)
341 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
342 fname);
343 else {
344 len = ftell (fp);
345 fclose (fp);
346 if (instruction->eops->next) {
347 len -= instruction->eops->next->offset;
348 if (instruction->eops->next->next &&
349 len > instruction->eops->next->next->offset)
350 len = instruction->eops->next->next->offset;
352 return instruction->times * len;
354 return 0; /* if we're here, there's an error */
357 temp = nasm_instructions[instruction->opcode];
358 while (temp->opcode != -1) {
359 if (matches(temp, instruction) == 100) {
360 /* we've matched an instruction. */
361 long isize;
362 char *codes = temp->code;
363 int j;
365 isize = calcsize(segment, offset, bits, instruction, codes);
366 if (isize < 0)
367 return -1;
368 for (j = 0; j < instruction->nprefix; j++) {
369 if ((instruction->prefixes[j] != P_A16 &&
370 instruction->prefixes[j] != P_O16 && bits==16) ||
371 (instruction->prefixes[j] != P_A32 &&
372 instruction->prefixes[j] != P_O32 && bits==32))
373 isize++;
375 return isize * instruction->times;
377 temp++;
379 return -1; /* didn't match any instruction */
382 static long calcsize (long segment, long offset, int bits,
383 insn *ins, char *codes) {
384 long length = 0;
385 unsigned char c;
387 while (*codes) switch (c = *codes++) {
388 case 01: case 02: case 03:
389 codes += c, length += c; break;
390 case 04: case 05: case 06: case 07:
391 length++; break;
392 case 010: case 011: case 012:
393 codes++, length++; break;
394 case 017:
395 length++; break;
396 case 014: case 015: case 016:
397 length++; break;
398 case 020: case 021: case 022:
399 length++; break;
400 case 024: case 025: case 026:
401 length++; break;
402 case 030: case 031: case 032:
403 length += 2; break;
404 case 034: case 035: case 036:
405 length += ((ins->oprs[c-034].addr_size ?
406 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break;
407 case 037:
408 length += 2; break;
409 case 040: case 041: case 042:
410 length += 4; break;
411 case 050: case 051: case 052:
412 length++; break;
413 case 060: case 061: case 062:
414 length += 2; break;
415 case 064: case 065: case 066:
416 length += ((ins->oprs[c-064].addr_size ?
417 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break;
418 case 070: case 071: case 072:
419 length += 4; break;
420 case 0300: case 0301: case 0302:
421 length += chsize (&ins->oprs[c-0300], bits);
422 break;
423 case 0310:
424 length += (bits==32);
425 break;
426 case 0311:
427 length += (bits==16);
428 break;
429 case 0312:
430 break;
431 case 0320:
432 length += (bits==32);
433 break;
434 case 0321:
435 length += (bits==16);
436 break;
437 case 0322:
438 break;
439 case 0330:
440 codes++, length++; break;
441 case 0340: case 0341: case 0342:
442 if (ins->oprs[0].segment != NO_SEG)
443 errfunc (ERR_NONFATAL, "attempt to reserve non-constant"
444 " quantity of BSS space");
445 else
446 length += ins->oprs[0].offset << (c-0340);
447 break;
448 default: /* can't do it by 'case' statements */
449 if (c>=0100 && c<=0277) { /* it's an EA */
450 ea ea_data;
451 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0,
452 ins->forw_ref)) {
453 errfunc (ERR_NONFATAL, "invalid effective address");
454 return -1;
455 } else
456 length += ea_data.size;
457 } else
458 errfunc (ERR_PANIC, "internal instruction table corrupt"
459 ": instruction code 0x%02X given", c);
461 return length;
464 static void gencode (long segment, long offset, int bits,
465 insn *ins, char *codes, long insn_end) {
466 static char condval[] = { /* conditional opcodes */
467 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
468 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
469 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
471 unsigned char c, bytes[4];
472 long data, size;
474 while (*codes) switch (c = *codes++) {
475 case 01: case 02: case 03:
476 outfmt->output (segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG);
477 codes += c;
478 offset += c;
479 break;
480 case 04: case 06:
481 switch (ins->oprs[0].basereg) {
482 case R_CS: bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break;
483 case R_DS: bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break;
484 case R_ES: bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break;
485 case R_SS: bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break;
486 default:
487 errfunc (ERR_PANIC, "bizarre 8086 segment register received");
489 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
490 offset++;
491 break;
492 case 05: case 07:
493 switch (ins->oprs[0].basereg) {
494 case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break;
495 case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break;
496 default:
497 errfunc (ERR_PANIC, "bizarre 386 segment register received");
499 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
500 offset++;
501 break;
502 case 010: case 011: case 012:
503 bytes[0] = *codes++ + regval(&ins->oprs[c-010]);
504 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
505 offset += 1;
506 break;
507 case 017:
508 bytes[0] = 0;
509 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
510 offset += 1;
511 break;
512 case 014: case 015: case 016:
513 if (ins->oprs[c-014].offset < -128 || ins->oprs[c-014].offset > 127)
514 errfunc (ERR_WARNING, "signed byte value exceeds bounds");
515 bytes[0] = ins->oprs[c-014].offset;
516 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
517 offset += 1;
518 break;
519 case 020: case 021: case 022:
520 if (ins->oprs[c-020].offset < -128 || ins->oprs[c-020].offset > 255)
521 errfunc (ERR_WARNING, "byte value exceeds bounds");
522 bytes[0] = ins->oprs[c-020].offset;
523 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
524 offset += 1;
525 break;
526 case 024: case 025: case 026:
527 if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255)
528 errfunc (ERR_WARNING, "unsigned byte value exceeds bounds");
529 bytes[0] = ins->oprs[c-024].offset;
530 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
531 offset += 1;
532 break;
533 case 030: case 031: case 032:
534 if (ins->oprs[c-030].segment == NO_SEG &&
535 ins->oprs[c-030].wrt == NO_SEG &&
536 (ins->oprs[c-030].offset < -32768L ||
537 ins->oprs[c-030].offset > 65535L))
538 errfunc (ERR_WARNING, "word value exceeds bounds");
539 data = ins->oprs[c-030].offset;
540 outfmt->output (segment, &data, OUT_ADDRESS+2,
541 ins->oprs[c-030].segment, ins->oprs[c-030].wrt);
542 offset += 2;
543 break;
544 case 034: case 035: case 036:
545 data = ins->oprs[c-034].offset;
546 size = ((ins->oprs[c-034].addr_size ?
547 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4);
548 if (size==16 && (data < -32768L || data > 65535L))
549 errfunc (ERR_WARNING, "word value exceeds bounds");
550 outfmt->output (segment, &data, OUT_ADDRESS+size,
551 ins->oprs[c-034].segment, ins->oprs[c-034].wrt);
552 offset += size;
553 break;
554 case 037:
555 if (ins->oprs[0].segment == NO_SEG)
556 errfunc (ERR_NONFATAL, "value referenced by FAR is not"
557 " relocatable");
558 data = 0L;
559 outfmt->output (segment, &data, OUT_ADDRESS+2,
560 outfmt->segbase(1+ins->oprs[0].segment),
561 ins->oprs[0].wrt);
562 offset += 2;
563 break;
564 case 040: case 041: case 042:
565 data = ins->oprs[c-040].offset;
566 outfmt->output (segment, &data, OUT_ADDRESS+4,
567 ins->oprs[c-040].segment, ins->oprs[c-040].wrt);
568 offset += 4;
569 break;
570 case 050: case 051: case 052:
571 if (ins->oprs[c-050].segment != segment)
572 errfunc (ERR_NONFATAL, "short relative jump outside segment");
573 data = ins->oprs[c-050].offset - insn_end;
574 if (data > 127 || data < -128)
575 errfunc (ERR_NONFATAL, "short jump is out of range");
576 bytes[0] = data;
577 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
578 offset += 1;
579 break;
580 case 060: case 061: case 062:
581 if (ins->oprs[c-060].segment != segment) {
582 data = ins->oprs[c-060].offset;
583 outfmt->output (segment, &data, OUT_REL2ADR+insn_end-offset,
584 ins->oprs[c-060].segment, ins->oprs[c-060].wrt);
585 } else {
586 data = ins->oprs[c-060].offset - insn_end;
587 outfmt->output (segment, &data, OUT_ADDRESS+2, NO_SEG, NO_SEG);
589 offset += 2;
590 break;
591 case 064: case 065: case 066:
592 size = ((ins->oprs[c-064].addr_size ?
593 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4);
594 if (ins->oprs[c-064].segment != segment) {
595 data = ins->oprs[c-064].offset;
596 size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR);
597 outfmt->output (segment, &data, size+insn_end-offset,
598 ins->oprs[c-064].segment, ins->oprs[c-064].wrt);
599 size = (bits == 16 ? 2 : 4);
600 } else {
601 data = ins->oprs[c-064].offset - insn_end;
602 outfmt->output (segment, &data, OUT_ADDRESS+size, NO_SEG, NO_SEG);
604 offset += size;
605 break;
606 case 070: case 071: case 072:
607 if (ins->oprs[c-070].segment != segment) {
608 data = ins->oprs[c-070].offset;
609 outfmt->output (segment, &data, OUT_REL4ADR+insn_end-offset,
610 ins->oprs[c-070].segment, ins->oprs[c-070].wrt);
611 } else {
612 data = ins->oprs[c-070].offset - insn_end;
613 outfmt->output (segment, &data, OUT_ADDRESS+4, NO_SEG, NO_SEG);
615 offset += 4;
616 break;
617 case 0300: case 0301: case 0302:
618 if (chsize (&ins->oprs[c-0300], bits)) {
619 *bytes = 0x67;
620 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
621 offset += 1;
622 } else
623 offset += 0;
624 break;
625 case 0310:
626 if (bits==32) {
627 *bytes = 0x67;
628 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
629 offset += 1;
630 } else
631 offset += 0;
632 break;
633 case 0311:
634 if (bits==16) {
635 *bytes = 0x67;
636 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
637 offset += 1;
638 } else
639 offset += 0;
640 break;
641 case 0312:
642 break;
643 case 0320:
644 if (bits==32) {
645 *bytes = 0x66;
646 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
647 offset += 1;
648 } else
649 offset += 0;
650 break;
651 case 0321:
652 if (bits==16) {
653 *bytes = 0x66;
654 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
655 offset += 1;
656 } else
657 offset += 0;
658 break;
659 case 0322:
660 break;
661 case 0330:
662 *bytes = *codes++ + condval[ins->condition];
663 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
664 offset += 1;
665 break;
666 case 0340: case 0341: case 0342:
667 if (ins->oprs[0].segment != NO_SEG)
668 errfunc (ERR_PANIC, "non-constant BSS size in pass two");
669 else {
670 long size = ins->oprs[0].offset << (c-0340);
671 outfmt->output (segment, NULL, OUT_RESERVE+size, NO_SEG, NO_SEG);
672 offset += size;
674 break;
675 default: /* can't do it by 'case' statements */
676 if (c>=0100 && c<=0277) { /* it's an EA */
677 ea ea_data;
678 int rfield;
679 unsigned char *p;
680 long s;
682 if (c<=0177) /* pick rfield from operand b */
683 rfield = regval (&ins->oprs[c&7]);
684 else /* rfield is constant */
685 rfield = c & 7;
686 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield,
687 ins->forw_ref))
688 errfunc (ERR_NONFATAL, "invalid effective address");
690 p = bytes;
691 *p++ = ea_data.modrm;
692 if (ea_data.sib_present)
693 *p++ = ea_data.sib;
695 * the cast in the next line is to placate MS C...
697 outfmt->output (segment, bytes, OUT_RAWDATA+(long)(p-bytes),
698 NO_SEG, NO_SEG);
699 s = p-bytes;
701 switch (ea_data.bytes) {
702 case 0:
703 break;
704 case 1:
705 *bytes = ins->oprs[(c>>3)&7].offset;
706 outfmt->output (segment, bytes, OUT_RAWDATA+1,
707 NO_SEG, NO_SEG);
708 s++;
709 break;
710 case 2:
711 case 4:
712 data = ins->oprs[(c>>3)&7].offset;
713 outfmt->output (segment, &data, OUT_ADDRESS+ea_data.bytes,
714 ins->oprs[(c>>3)&7].segment,
715 ins->oprs[(c>>3)&7].wrt);
716 s += ea_data.bytes;
717 break;
719 offset += s;
720 } else
721 errfunc (ERR_PANIC, "internal instruction table corrupt"
722 ": instruction code 0x%02X given", c);
726 static int regval (operand *o) {
727 switch (o->basereg) {
728 case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0:
729 case R_ST0: case R_MM0:
730 return 0;
731 case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1:
732 case R_MM1:
733 return 1;
734 case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2:
735 case R_ST2: case R_MM2:
736 return 2;
737 case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3:
738 case R_TR3: case R_ST3: case R_MM3:
739 return 3;
740 case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4:
741 case R_ST4: case R_MM4:
742 return 4;
743 case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5:
744 case R_MM5:
745 return 5;
746 case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6:
747 case R_MM6:
748 return 6;
749 case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7:
750 case R_MM7:
751 return 7;
752 default: /* panic */
753 errfunc (ERR_PANIC, "invalid register operand given to regval()");
754 return 0;
758 static int matches (struct itemplate *itemp, insn *instruction) {
759 int i, size, oprs, ret;
761 ret = 100;
764 * Check the opcode
766 if (itemp->opcode != instruction->opcode) return 0;
769 * Count the operands
771 if (itemp->operands != instruction->operands) return 0;
774 * Check that no spurious colons or TOs are present
776 for (i=0; i<itemp->operands; i++)
777 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO))
778 return 0;
781 * Check that the operand flags all match up
783 for (i=0; i<itemp->operands; i++)
784 if (itemp->opd[i] & ~instruction->oprs[i].type ||
785 ((itemp->opd[i] & SIZE_MASK) &&
786 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
787 if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
788 (instruction->oprs[i].type & SIZE_MASK))
789 return 0;
790 else
791 ret = 1;
795 * Check operand sizes
797 if (itemp->flags & IF_SB) {
798 size = BITS8;
799 oprs = itemp->operands;
800 } else if (itemp->flags & IF_SD) {
801 size = BITS32;
802 oprs = itemp->operands;
803 } else if (itemp->flags & (IF_SM | IF_SM2)) {
804 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
805 size = 0; /* placate gcc */
806 for (i=0; i<oprs; i++)
807 if ( (size = itemp->opd[i] & SIZE_MASK) != 0)
808 break;
809 } else {
810 size = 0;
811 oprs = itemp->operands;
814 for (i=0; i<itemp->operands; i++)
815 if (!(itemp->opd[i] & SIZE_MASK) &&
816 (instruction->oprs[i].type & SIZE_MASK & ~size))
817 ret = 2;
819 return ret;
822 static ea *process_ea (operand *input, ea *output, int addrbits, int rfield,
823 int forw_ref) {
824 if (!(REGISTER & ~input->type)) { /* it's a single register */
825 static int regs[] = {
826 R_MM0, R_EAX, R_AX, R_AL, R_MM1, R_ECX, R_CX, R_CL,
827 R_MM2, R_EDX, R_DX, R_DL, R_MM3, R_EBX, R_BX, R_BL,
828 R_MM4, R_ESP, R_SP, R_AH, R_MM5, R_EBP, R_BP, R_CH,
829 R_MM6, R_ESI, R_SI, R_DH, R_MM7, R_EDI, R_DI, R_BH
831 int i;
833 for (i=0; i<elements(regs); i++)
834 if (input->basereg == regs[i]) break;
835 if (i<elements(regs)) {
836 output->sib_present = FALSE;/* no SIB necessary */
837 output->bytes = 0; /* no offset necessary either */
838 output->modrm = 0xC0 | (rfield << 3) | (i/4);
839 } else
840 return NULL;
841 } else { /* it's a memory reference */
842 if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) {
843 /* it's a pure offset */
844 if (input->addr_size)
845 addrbits = input->addr_size;
846 output->sib_present = FALSE;
847 output->bytes = (addrbits==32 ? 4 : 2);
848 output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3);
849 } else { /* it's an indirection */
850 int i=input->indexreg, b=input->basereg, s=input->scale;
851 long o=input->offset, seg=input->segment;
853 if (s==0) i = -1; /* make this easy, at least */
855 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
856 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
857 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
858 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) {
859 /* it must be a 32-bit memory reference. Firstly we have
860 * to check that all registers involved are type Exx. */
861 if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX
862 && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI)
863 return NULL;
864 if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX
865 && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI)
866 return NULL;
868 /* While we're here, ensure the user didn't specify WORD. */
869 if (input->addr_size == 16)
870 return NULL;
872 /* now reorganise base/index */
873 if (b==i) /* convert EAX+2*EAX to 3*EAX */
874 b = -1, s++;
875 if (b==-1 && s==1) /* single register should be base */
876 b = i, i = -1;
877 if (((s==2 && i!=R_ESP) || s==3 || s==5 || s==9) && b==-1)
878 b = i, s--; /* convert 3*EAX to EAX+2*EAX */
879 if (s==1 && i==R_ESP) /* swap ESP into base if scale is 1 */
880 i = b, b = R_ESP;
881 if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1))
882 return NULL; /* wrong, for various reasons */
884 if (i==-1 && b!=R_ESP) {/* no SIB needed */
885 int mod, rm;
886 switch(b) {
887 case R_EAX: rm = 0; break;
888 case R_ECX: rm = 1; break;
889 case R_EDX: rm = 2; break;
890 case R_EBX: rm = 3; break;
891 case R_EBP: rm = 5; break;
892 case R_ESI: rm = 6; break;
893 case R_EDI: rm = 7; break;
894 case -1: rm = 5; break;
895 default: /* should never happen */
896 return NULL;
898 if (b==-1 || (b!=R_EBP && o==0 &&
899 seg==NO_SEG && !forw_ref))
900 mod = 0;
901 else if (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref)
902 mod = 1;
903 else
904 mod = 2;
906 output->sib_present = FALSE;
907 output->bytes = (b==-1 || mod==2 ? 4 : mod);
908 output->modrm = (mod<<6) | (rfield<<3) | rm;
909 } else { /* we need a SIB */
910 int mod, scale, index, base;
912 switch (b) {
913 case R_EAX: base = 0; break;
914 case R_ECX: base = 1; break;
915 case R_EDX: base = 2; break;
916 case R_EBX: base = 3; break;
917 case R_ESP: base = 4; break;
918 case R_EBP: case -1: base = 5; break;
919 case R_ESI: base = 6; break;
920 case R_EDI: base = 7; break;
921 default: /* then what the smeg is it? */
922 return NULL; /* panic */
925 switch (i) {
926 case R_EAX: index = 0; break;
927 case R_ECX: index = 1; break;
928 case R_EDX: index = 2; break;
929 case R_EBX: index = 3; break;
930 case -1: index = 4; break;
931 case R_EBP: index = 5; break;
932 case R_ESI: index = 6; break;
933 case R_EDI: index = 7; break;
934 default: /* then what the smeg is it? */
935 return NULL; /* panic */
938 if (i==-1) s = 1;
939 switch (s) {
940 case 1: scale = 0; break;
941 case 2: scale = 1; break;
942 case 4: scale = 2; break;
943 case 8: scale = 3; break;
944 default: /* then what the smeg is it? */
945 return NULL; /* panic */
948 if (b==-1 || (b!=R_EBP && o==0 &&
949 seg==NO_SEG && !forw_ref))
950 mod = 0;
951 else if (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref)
952 mod = 1;
953 else
954 mod = 2;
956 output->sib_present = TRUE;
957 output->bytes = (b==-1 || mod==2 ? 4 : mod);
958 output->modrm = (mod<<6) | (rfield<<3) | 4;
959 output->sib = (scale<<6) | (index<<3) | base;
961 } else { /* it's 16-bit */
962 int mod, rm;
964 /* check all registers are BX, BP, SI or DI */
965 if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) ||
966 (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI))
967 return NULL;
969 /* ensure the user didn't specify DWORD */
970 if (input->addr_size == 32)
971 return NULL;
973 if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */
974 if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */
975 if ((b==R_SI || b==R_DI) && i!=-1)
976 b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */
977 if (b==i) return NULL;/* shouldn't ever happen, in theory */
978 if (i!=-1 && b!=-1 &&
979 (i==R_BP || i==R_BX || b==R_SI || b==R_DI))
980 return NULL; /* invalid combinations */
981 if (b==-1) /* pure offset: handled above */
982 return NULL; /* so if it gets to here, panic! */
984 rm = -1;
985 if (i!=-1)
986 switch (i*256 + b) {
987 case R_SI*256+R_BX: rm=0; break;
988 case R_DI*256+R_BX: rm=1; break;
989 case R_SI*256+R_BP: rm=2; break;
990 case R_DI*256+R_BP: rm=3; break;
992 else
993 switch (b) {
994 case R_SI: rm=4; break;
995 case R_DI: rm=5; break;
996 case R_BP: rm=6; break;
997 case R_BX: rm=7; break;
999 if (rm==-1) /* can't happen, in theory */
1000 return NULL; /* so panic if it does */
1002 if (o==0 && seg==NO_SEG && !forw_ref && rm!=6)
1003 mod = 0;
1004 else if (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref)
1005 mod = 1;
1006 else
1007 mod = 2;
1009 output->sib_present = FALSE; /* no SIB - it's 16-bit */
1010 output->bytes = mod; /* bytes of offset needed */
1011 output->modrm = (mod<<6) | (rfield<<3) | rm;
1015 output->size = 1 + output->sib_present + output->bytes;
1016 return output;
1019 static int chsize (operand *input, int addrbits) {
1020 if (!(MEMORY & ~input->type)) {
1021 int i=input->indexreg, b=input->basereg;
1023 if (input->scale==0) i = -1;
1025 if (i == -1 && b == -1) /* pure offset */
1026 return (input->addr_size != 0 && input->addr_size != addrbits);
1028 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1029 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1030 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1031 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI)
1032 return (addrbits==16);
1033 else
1034 return (addrbits==32);
1035 } else
1036 return 0;