NASM 0.93
[nasm.git] / assemble.c
blobb4aed4c5d83a00b5d680b65fe7cc630052e965c0
1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
14 * on operand 0
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the address-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
29 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
30 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
31 * assembly mode or the address-size override on the operand
32 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
33 * \1ab - a ModRM, calculated on EA in operand a, with the spare
34 * field the register value of operand b.
35 * \2ab - a ModRM, calculated on EA in operand a, with the spare
36 * field equal to digit b.
37 * \30x - might be an 0x67 byte, depending on the address size of
38 * the memory reference in operand x.
39 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
40 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
41 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
42 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
43 * \322 - indicates that this instruction is only valid when the
44 * operand size is the default (instruction to disassembler,
45 * generates no code in the assembler)
46 * \330 - a literal byte follows in the code stream, to be added
47 * to the condition code value of the instruction.
48 * \340 - reserve <operand 0> bytes of uninitialised storage.
49 * Operand 0 had better be a segmentless constant.
52 #include <stdio.h>
53 #include <string.h>
55 #include "nasm.h"
56 #include "assemble.h"
57 #include "insns.h"
59 extern struct itemplate *nasm_instructions[];
61 typedef struct {
62 int sib_present; /* is a SIB byte necessary? */
63 int bytes; /* # of bytes of offset needed */
64 int size; /* lazy - this is sib+bytes+1 */
65 unsigned char modrm, sib; /* the bytes themselves */
66 } ea;
68 static efunc errfunc;
69 static struct ofmt *outfmt;
71 static long calcsize (long, long, int, insn *, char *);
72 static void gencode (long, long, int, insn *, char *, long);
73 static int regval (operand *o);
74 static int matches (struct itemplate *, insn *);
75 static ea *process_ea (operand *, ea *, int, int, int);
76 static int chsize (operand *, int);
78 long assemble (long segment, long offset, int bits,
79 insn *instruction, struct ofmt *output, efunc error) {
80 int j, itimes, size_prob;
81 long insn_end;
82 long start = offset;
83 struct itemplate *temp;
85 errfunc = error; /* to pass to other functions */
86 outfmt = output; /* likewise */
88 if (instruction->opcode == -1)
89 return 0;
91 if (instruction->opcode == I_DB ||
92 instruction->opcode == I_DW ||
93 instruction->opcode == I_DD ||
94 instruction->opcode == I_DQ ||
95 instruction->opcode == I_DT) {
96 extop *e;
97 long osize, wsize = 0; /* placate gcc */
98 int t = instruction->times;
100 switch (instruction->opcode) {
101 case I_DB: wsize = 1; break;
102 case I_DW: wsize = 2; break;
103 case I_DD: wsize = 4; break;
104 case I_DQ: wsize = 8; break;
105 case I_DT: wsize = 10; break;
108 while (t--) {
109 for (e = instruction->eops; e; e = e->next) {
110 osize = 0;
111 if (e->type == EOT_DB_NUMBER) {
112 if (wsize == 1) {
113 if (e->segment != NO_SEG)
114 errfunc (ERR_NONFATAL,
115 "one-byte relocation attempted");
116 else {
117 unsigned char c = e->offset;
118 outfmt->output (segment, &c, OUT_RAWDATA+1,
119 NO_SEG, NO_SEG);
121 } else if (wsize > 5) {
122 errfunc (ERR_NONFATAL, "integer supplied to a D%c"
123 " instruction", wsize==8 ? 'Q' : 'T');
124 } else
125 outfmt->output (segment, &e->offset,
126 OUT_ADDRESS+wsize, e->segment,
127 e->wrt);
128 offset += wsize;
129 } else if (e->type == EOT_DB_STRING) {
130 int align;
132 align = (-e->stringlen) % wsize;
133 if (align < 0)
134 align += wsize;
135 outfmt->output (segment, e->stringval,
136 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
137 if (align)
138 outfmt->output (segment, "\0\0\0\0",
139 OUT_RAWDATA+align, NO_SEG, NO_SEG);
140 offset += e->stringlen + align;
144 return offset - start;
147 size_prob = FALSE;
148 temp = nasm_instructions[instruction->opcode];
149 while (temp->opcode != -1) {
150 int m = matches (temp, instruction);
151 if (m == 100) { /* matches! */
152 char *codes = temp->code;
153 long insn_size = calcsize(segment, offset, bits,
154 instruction, codes);
155 itimes = instruction->times;
156 if (insn_size < 0) /* shouldn't be, on pass two */
157 error (ERR_PANIC, "errors made it through from pass one");
158 else while (itimes--) {
159 insn_end = offset + insn_size;
160 for (j=0; j<instruction->nprefix; j++) {
161 unsigned char c;
162 switch (instruction->prefixes[j]) {
163 case P_LOCK:
164 c = 0xF0; break;
165 case P_REPNE: case P_REPNZ:
166 c = 0xF2; break;
167 case P_REPE: case P_REPZ: case P_REP:
168 c = 0xF3; break;
169 case R_CS: c = 0x2E; break;
170 case R_DS: c = 0x3E; break;
171 case R_ES: c = 0x26; break;
172 case R_FS: c = 0x64; break;
173 case R_GS: c = 0x65; break;
174 case R_SS: c = 0x36; break;
175 case P_A16:
176 if (bits == 16)
177 c = 0; /* no prefix */
178 else
179 c = 0x67;
180 break;
181 case P_A32:
182 if (bits == 32)
183 c = 0; /* no prefix */
184 else
185 c = 0x67;
186 break;
187 case P_O16:
188 if (bits == 16)
189 c = 0; /* no prefix */
190 else
191 c = 0x66;
192 break;
193 case P_O32:
194 if (bits == 32)
195 c = 0; /* no prefix */
196 else
197 c = 0x66;
198 break;
199 default:
200 error (ERR_PANIC,
201 "invalid instruction prefix");
203 if (c != 0)
204 outfmt->output (segment, &c, OUT_RAWDATA+1,
205 NO_SEG, NO_SEG);
206 offset++;
208 gencode (segment, offset, bits, instruction, codes, insn_end);
209 offset += insn_size;
211 return offset - start;
212 } else if (m > 0) {
213 size_prob = m;
215 temp++;
217 if (temp->opcode == -1) { /* didn't match any instruction */
218 if (size_prob == 1) /* would have matched, but for size */
219 error (ERR_NONFATAL, "operation size not specified");
220 else if (size_prob == 2)
221 error (ERR_NONFATAL, "mismatch in operand sizes");
222 else
223 error (ERR_NONFATAL,
224 "invalid combination of opcode and operands");
226 return 0;
229 long insn_size (long segment, long offset, int bits,
230 insn *instruction, efunc error) {
231 struct itemplate *temp;
233 errfunc = error; /* to pass to other functions */
235 if (instruction->opcode == -1)
236 return 0;
238 if (instruction->opcode == I_DB ||
239 instruction->opcode == I_DW ||
240 instruction->opcode == I_DD ||
241 instruction->opcode == I_DQ ||
242 instruction->opcode == I_DT) {
243 extop *e;
244 long isize, osize, wsize = 0; /* placate gcc */
246 isize = 0;
247 switch (instruction->opcode) {
248 case I_DB: wsize = 1; break;
249 case I_DW: wsize = 2; break;
250 case I_DD: wsize = 4; break;
251 case I_DQ: wsize = 8; break;
252 case I_DT: wsize = 10; break;
255 for (e = instruction->eops; e; e = e->next) {
256 long align;
258 osize = 0;
259 if (e->type == EOT_DB_NUMBER)
260 osize = 1;
261 else if (e->type == EOT_DB_STRING)
262 osize = e->stringlen;
264 align = (-osize) % wsize;
265 if (align < 0)
266 align += wsize;
267 isize += osize + align;
269 return isize * instruction->times;
272 temp = nasm_instructions[instruction->opcode];
273 while (temp->opcode != -1) {
274 if (matches(temp, instruction) == 100) {
275 /* we've matched an instruction. */
276 long isize;
277 char *codes = temp->code;
278 int j;
280 isize = calcsize(segment, offset, bits, instruction, codes);
281 if (isize < 0)
282 return -1;
283 for (j = 0; j < instruction->nprefix; j++) {
284 if ((instruction->prefixes[j] != P_A16 &&
285 instruction->prefixes[j] != P_O16 && bits==16) ||
286 (instruction->prefixes[j] != P_A32 &&
287 instruction->prefixes[j] != P_O32 && bits==32))
288 isize++;
290 return isize * instruction->times;
292 temp++;
294 return -1; /* didn't match any instruction */
297 static long calcsize (long segment, long offset, int bits,
298 insn *ins, char *codes) {
299 long length = 0;
300 unsigned char c;
302 while (*codes) switch (c = *codes++) {
303 case 01: case 02: case 03:
304 codes += c, length += c; break;
305 case 04: case 05: case 06: case 07:
306 length++; break;
307 case 010: case 011: case 012:
308 codes++, length++; break;
309 case 017:
310 length++; break;
311 case 014: case 015: case 016:
312 length++; break;
313 case 020: case 021: case 022:
314 length++; break;
315 case 024: case 025: case 026:
316 length++; break;
317 case 030: case 031: case 032:
318 length += 2; break;
319 case 034: case 035: case 036:
320 length += ((ins->oprs[c-034].addr_size ?
321 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break;
322 case 037:
323 length += 2; break;
324 case 040: case 041: case 042:
325 length += 4; break;
326 case 050: case 051: case 052:
327 length++; break;
328 case 060: case 061: case 062:
329 length += 2; break;
330 case 064: case 065: case 066:
331 length += ((ins->oprs[c-064].addr_size ?
332 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break;
333 case 070: case 071: case 072:
334 length += 4; break;
335 case 0300: case 0301: case 0302:
336 length += chsize (&ins->oprs[c-0300], bits);
337 break;
338 case 0310:
339 length += (bits==32);
340 break;
341 case 0311:
342 length += (bits==16);
343 break;
344 case 0312:
345 break;
346 case 0320:
347 length += (bits==32);
348 break;
349 case 0321:
350 length += (bits==16);
351 break;
352 case 0322:
353 break;
354 case 0330:
355 codes++, length++; break;
356 case 0340: case 0341: case 0342:
357 if (ins->oprs[0].segment != NO_SEG)
358 errfunc (ERR_NONFATAL, "attempt to reserve non-constant"
359 " quantity of BSS space");
360 else
361 length += ins->oprs[0].offset << (c-0340);
362 break;
363 default: /* can't do it by 'case' statements */
364 if (c>=0100 && c<=0277) { /* it's an EA */
365 ea ea_data;
366 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0,
367 ins->forw_ref)) {
368 errfunc (ERR_NONFATAL, "invalid effective address");
369 return -1;
370 } else
371 length += ea_data.size;
372 } else
373 errfunc (ERR_PANIC, "internal instruction table corrupt"
374 ": instruction code 0x%02X given", c);
376 return length;
379 static void gencode (long segment, long offset, int bits,
380 insn *ins, char *codes, long insn_end) {
381 static char condval[] = { /* conditional opcodes */
382 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
383 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
384 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
386 unsigned char c, bytes[4];
387 long data, size;
389 while (*codes) switch (c = *codes++) {
390 case 01: case 02: case 03:
391 outfmt->output (segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG);
392 codes += c;
393 offset += c;
394 break;
395 case 04: case 06:
396 switch (ins->oprs[0].basereg) {
397 case R_CS: bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break;
398 case R_DS: bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break;
399 case R_ES: bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break;
400 case R_SS: bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break;
401 default:
402 errfunc (ERR_PANIC, "bizarre 8086 segment register received");
404 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
405 offset++;
406 break;
407 case 05: case 07:
408 switch (ins->oprs[0].basereg) {
409 case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break;
410 case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break;
411 default:
412 errfunc (ERR_PANIC, "bizarre 386 segment register received");
414 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
415 offset++;
416 break;
417 case 010: case 011: case 012:
418 bytes[0] = *codes++ + regval(&ins->oprs[c-010]);
419 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
420 offset += 1;
421 break;
422 case 017:
423 bytes[0] = 0;
424 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
425 offset += 1;
426 break;
427 case 014: case 015: case 016:
428 if (ins->oprs[c-014].offset < -128 || ins->oprs[c-014].offset > 127)
429 errfunc (ERR_WARNING, "signed byte value exceeds bounds");
430 bytes[0] = ins->oprs[c-014].offset;
431 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
432 offset += 1;
433 break;
434 case 020: case 021: case 022:
435 if (ins->oprs[c-020].offset < -128 || ins->oprs[c-020].offset > 255)
436 errfunc (ERR_WARNING, "byte value exceeds bounds");
437 bytes[0] = ins->oprs[c-020].offset;
438 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
439 offset += 1;
440 break;
441 case 024: case 025: case 026:
442 if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255)
443 errfunc (ERR_WARNING, "unsigned byte value exceeds bounds");
444 bytes[0] = ins->oprs[c-024].offset;
445 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
446 offset += 1;
447 break;
448 case 030: case 031: case 032:
449 if (ins->oprs[c-030].segment == NO_SEG &&
450 ins->oprs[c-030].wrt == NO_SEG &&
451 (ins->oprs[c-030].offset < -32768 ||
452 ins->oprs[c-030].offset > 65535))
453 errfunc (ERR_WARNING, "word value exceeds bounds");
454 data = ins->oprs[c-030].offset;
455 outfmt->output (segment, &data, OUT_ADDRESS+2,
456 ins->oprs[c-030].segment, ins->oprs[c-030].wrt);
457 offset += 2;
458 break;
459 case 034: case 035: case 036:
460 data = ins->oprs[c-034].offset;
461 size = ((ins->oprs[c-034].addr_size ?
462 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4);
463 if (size==16 && (data < -32768 || data > 65535))
464 errfunc (ERR_WARNING, "word value exceeds bounds");
465 outfmt->output (segment, &data, OUT_ADDRESS+size,
466 ins->oprs[c-034].segment, ins->oprs[c-034].wrt);
467 offset += size;
468 break;
469 case 037:
470 if (ins->oprs[0].segment == NO_SEG)
471 errfunc (ERR_NONFATAL, "value referenced by FAR is not"
472 " relocatable");
473 data = 0L;
474 outfmt->output (segment, &data, OUT_ADDRESS+2,
475 outfmt->segbase(1+ins->oprs[0].segment),
476 ins->oprs[0].wrt);
477 offset += 2;
478 break;
479 case 040: case 041: case 042:
480 data = ins->oprs[c-040].offset;
481 outfmt->output (segment, &data, OUT_ADDRESS+4,
482 ins->oprs[c-040].segment, ins->oprs[c-040].wrt);
483 offset += 4;
484 break;
485 case 050: case 051: case 052:
486 if (ins->oprs[c-050].segment != segment)
487 errfunc (ERR_NONFATAL, "short relative jump outside segment");
488 data = ins->oprs[c-050].offset - insn_end;
489 if (data > 127 || data < -128)
490 errfunc (ERR_NONFATAL, "short jump is out of range");
491 bytes[0] = data;
492 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
493 offset += 1;
494 break;
495 case 060: case 061: case 062:
496 if (ins->oprs[c-060].segment != segment) {
497 data = ins->oprs[c-060].offset;
498 outfmt->output (segment, &data, OUT_REL2ADR+insn_end-offset,
499 ins->oprs[c-060].segment, ins->oprs[c-060].wrt);
500 } else {
501 data = ins->oprs[c-060].offset - insn_end;
502 outfmt->output (segment, &data, OUT_ADDRESS+2, NO_SEG, NO_SEG);
504 offset += 2;
505 break;
506 case 064: case 065: case 066:
507 size = ((ins->oprs[c-064].addr_size ?
508 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4);
509 if (ins->oprs[c-064].segment != segment) {
510 data = ins->oprs[c-064].offset;
511 size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR);
512 outfmt->output (segment, &data, size+insn_end-offset,
513 ins->oprs[c-064].segment, ins->oprs[c-064].wrt);
514 size = (bits == 16 ? 2 : 4);
515 } else {
516 data = ins->oprs[c-064].offset - insn_end;
517 outfmt->output (segment, &data, OUT_ADDRESS+size, NO_SEG, NO_SEG);
519 offset += size;
520 break;
521 case 070: case 071: case 072:
522 if (ins->oprs[c-070].segment != segment) {
523 data = ins->oprs[c-070].offset;
524 outfmt->output (segment, &data, OUT_REL4ADR+insn_end-offset,
525 ins->oprs[c-070].segment, ins->oprs[c-070].wrt);
526 } else {
527 data = ins->oprs[c-070].offset - insn_end;
528 outfmt->output (segment, &data, OUT_ADDRESS+4, NO_SEG, NO_SEG);
530 offset += 4;
531 break;
532 case 0300: case 0301: case 0302:
533 if (chsize (&ins->oprs[c-0300], bits)) {
534 *bytes = 0x67;
535 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
536 offset += 1;
537 } else
538 offset += 0;
539 break;
540 case 0310:
541 if (bits==32) {
542 *bytes = 0x67;
543 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
544 offset += 1;
545 } else
546 offset += 0;
547 break;
548 case 0311:
549 if (bits==16) {
550 *bytes = 0x67;
551 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
552 offset += 1;
553 } else
554 offset += 0;
555 break;
556 case 0312:
557 break;
558 case 0320:
559 if (bits==32) {
560 *bytes = 0x66;
561 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
562 offset += 1;
563 } else
564 offset += 0;
565 break;
566 case 0321:
567 if (bits==16) {
568 *bytes = 0x66;
569 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
570 offset += 1;
571 } else
572 offset += 0;
573 break;
574 case 0322:
575 break;
576 case 0330:
577 *bytes = *codes++ + condval[ins->condition];
578 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
579 offset += 1;
580 break;
581 case 0340: case 0341: case 0342:
582 if (ins->oprs[0].segment != NO_SEG)
583 errfunc (ERR_PANIC, "non-constant BSS size in pass two");
584 else {
585 long size = ins->oprs[0].offset << (c-0340);
586 outfmt->output (segment, NULL, OUT_RESERVE+size, NO_SEG, NO_SEG);
587 offset += size;
589 break;
590 default: /* can't do it by 'case' statements */
591 if (c>=0100 && c<=0277) { /* it's an EA */
592 ea ea_data;
593 int rfield;
594 unsigned char *p;
595 long s;
597 if (c<=0177) /* pick rfield from operand b */
598 rfield = regval (&ins->oprs[c&7]);
599 else /* rfield is constant */
600 rfield = c & 7;
601 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield,
602 ins->forw_ref))
603 errfunc (ERR_NONFATAL, "invalid effective address");
605 p = bytes;
606 *p++ = ea_data.modrm;
607 if (ea_data.sib_present)
608 *p++ = ea_data.sib;
610 * the cast in the next line is to placate MS C...
612 outfmt->output (segment, bytes, OUT_RAWDATA+(long)(p-bytes),
613 NO_SEG, NO_SEG);
614 s = p-bytes;
616 switch (ea_data.bytes) {
617 case 0:
618 break;
619 case 1:
620 *bytes = ins->oprs[(c>>3)&7].offset;
621 outfmt->output (segment, bytes, OUT_RAWDATA+1,
622 NO_SEG, NO_SEG);
623 s++;
624 break;
625 case 2:
626 case 4:
627 data = ins->oprs[(c>>3)&7].offset;
628 outfmt->output (segment, &data, OUT_ADDRESS+ea_data.bytes,
629 ins->oprs[(c>>3)&7].segment,
630 ins->oprs[(c>>3)&7].wrt);
631 s += ea_data.bytes;
632 break;
634 offset += s;
635 } else
636 errfunc (ERR_PANIC, "internal instruction table corrupt"
637 ": instruction code 0x%02X given", c);
641 static int regval (operand *o) {
642 switch (o->basereg) {
643 case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0:
644 case R_ST0: case R_MM0:
645 return 0;
646 case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1:
647 case R_MM1:
648 return 1;
649 case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2:
650 case R_ST2: case R_MM2:
651 return 2;
652 case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3:
653 case R_TR3: case R_ST3: case R_MM3:
654 return 3;
655 case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4:
656 case R_ST4: case R_MM4:
657 return 4;
658 case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5:
659 case R_MM5:
660 return 5;
661 case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6:
662 case R_MM6:
663 return 6;
664 case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7:
665 case R_MM7:
666 return 7;
667 default: /* panic */
668 errfunc (ERR_PANIC, "invalid register operand given to regval()");
669 return 0;
673 static int matches (struct itemplate *itemp, insn *instruction) {
674 int i, size, oprs, ret;
676 ret = 100;
679 * Check the opcode
681 if (itemp->opcode != instruction->opcode) return 0;
684 * Count the operands
686 if (itemp->operands != instruction->operands) return 0;
689 * Check that no spurious colons or TOs are present
691 for (i=0; i<itemp->operands; i++)
692 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO))
693 return 0;
696 * Check that the operand flags all match up
698 for (i=0; i<itemp->operands; i++)
699 if (itemp->opd[i] & ~instruction->oprs[i].type ||
700 ((itemp->opd[i] & SIZE_MASK) &&
701 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
702 if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
703 (instruction->oprs[i].type & SIZE_MASK))
704 return 0;
705 else
706 ret = 1;
710 * Check operand sizes
712 if (itemp->flags & IF_SB) {
713 size = BITS8;
714 oprs = itemp->operands;
715 } else if (itemp->flags & IF_SD) {
716 size = BITS32;
717 oprs = itemp->operands;
718 } else if (itemp->flags & (IF_SM | IF_SM2)) {
719 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
720 size = 0; /* placate gcc */
721 for (i=0; i<oprs; i++)
722 if ( (size = itemp->opd[i] & SIZE_MASK) != 0)
723 break;
724 } else {
725 size = 0;
726 oprs = itemp->operands;
729 for (i=0; i<itemp->operands; i++)
730 if (!(itemp->opd[i] & SIZE_MASK) &&
731 (instruction->oprs[i].type & SIZE_MASK & ~size))
732 ret = 2;
734 return ret;
737 static ea *process_ea (operand *input, ea *output, int addrbits, int rfield,
738 int forw_ref) {
739 if (!(REGISTER & ~input->type)) { /* it's a single register */
740 static int regs[] = {
741 R_MM0, R_EAX, R_AX, R_AL, R_MM1, R_ECX, R_CX, R_CL,
742 R_MM2, R_EDX, R_DX, R_DL, R_MM3, R_EBX, R_BX, R_BL,
743 R_MM4, R_ESP, R_SP, R_AH, R_MM5, R_EBP, R_BP, R_CH,
744 R_MM6, R_ESI, R_SI, R_DH, R_MM7, R_EDI, R_DI, R_BH
746 int i;
748 for (i=0; i<elements(regs); i++)
749 if (input->basereg == regs[i]) break;
750 if (i<elements(regs)) {
751 output->sib_present = FALSE;/* no SIB necessary */
752 output->bytes = 0; /* no offset necessary either */
753 output->modrm = 0xC0 | (rfield << 3) | (i/4);
754 } else
755 return NULL;
756 } else { /* it's a memory reference */
757 if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) {
758 /* it's a pure offset */
759 if (input->addr_size)
760 addrbits = input->addr_size;
761 output->sib_present = FALSE;
762 output->bytes = (addrbits==32 ? 4 : 2);
763 output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3);
764 } else { /* it's an indirection */
765 int i=input->indexreg, b=input->basereg, s=input->scale;
766 long o=input->offset, seg=input->segment;
768 if (s==0) i = -1; /* make this easy, at least */
770 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
771 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
772 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
773 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) {
774 /* it must be a 32-bit memory reference. Firstly we have
775 * to check that all registers involved are type Exx. */
776 if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX
777 && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI)
778 return NULL;
779 if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX
780 && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI)
781 return NULL;
783 /* While we're here, ensure the user didn't specify WORD. */
784 if (input->addr_size == 16)
785 return NULL;
787 /* now reorganise base/index */
788 if (b==i) /* convert EAX+2*EAX to 3*EAX */
789 b = -1, s++;
790 if (b==-1 && s==1) /* single register should be base */
791 b = i, i = -1;
792 if (((s==2 && i!=R_ESP) || s==3 || s==5 || s==9) && b==-1)
793 b = i, s--; /* convert 3*EAX to EAX+2*EAX */
794 if (s==1 && i==R_ESP) /* swap ESP into base if scale is 1 */
795 i = b, b = R_ESP;
796 if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1))
797 return NULL; /* wrong, for various reasons */
799 if (i==-1 && b!=R_ESP) {/* no SIB needed */
800 int mod, rm;
801 switch(b) {
802 case R_EAX: rm = 0; break;
803 case R_ECX: rm = 1; break;
804 case R_EDX: rm = 2; break;
805 case R_EBX: rm = 3; break;
806 case R_EBP: rm = 5; break;
807 case R_ESI: rm = 6; break;
808 case R_EDI: rm = 7; break;
809 case -1: rm = 5; break;
810 default: /* should never happen */
811 return NULL;
813 if (b==-1 || (b!=R_EBP && o==0 &&
814 seg==NO_SEG && !forw_ref))
815 mod = 0;
816 else if (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref)
817 mod = 1;
818 else
819 mod = 2;
821 output->sib_present = FALSE;
822 output->bytes = (b==-1 || mod==2 ? 4 : mod);
823 output->modrm = (mod<<6) | (rfield<<3) | rm;
824 } else { /* we need a SIB */
825 int mod, scale, index, base;
827 switch (b) {
828 case R_EAX: base = 0; break;
829 case R_ECX: base = 1; break;
830 case R_EDX: base = 2; break;
831 case R_EBX: base = 3; break;
832 case R_ESP: base = 4; break;
833 case R_EBP: case -1: base = 5; break;
834 case R_ESI: base = 6; break;
835 case R_EDI: base = 7; break;
836 default: /* then what the smeg is it? */
837 return NULL; /* panic */
840 switch (i) {
841 case R_EAX: index = 0; break;
842 case R_ECX: index = 1; break;
843 case R_EDX: index = 2; break;
844 case R_EBX: index = 3; break;
845 case -1: index = 4; break;
846 case R_EBP: index = 5; break;
847 case R_ESI: index = 6; break;
848 case R_EDI: index = 7; break;
849 default: /* then what the smeg is it? */
850 return NULL; /* panic */
853 if (i==-1) s = 1;
854 switch (s) {
855 case 1: scale = 0; break;
856 case 2: scale = 1; break;
857 case 4: scale = 2; break;
858 case 8: scale = 3; break;
859 default: /* then what the smeg is it? */
860 return NULL; /* panic */
863 if (b==-1 || (b!=R_EBP && o==0 &&
864 seg==NO_SEG && !forw_ref))
865 mod = 0;
866 else if (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref)
867 mod = 1;
868 else
869 mod = 2;
871 output->sib_present = TRUE;
872 output->bytes = (b==-1 || mod==2 ? 4 : mod);
873 output->modrm = (mod<<6) | (rfield<<3) | 4;
874 output->sib = (scale<<6) | (index<<3) | base;
876 } else { /* it's 16-bit */
877 int mod, rm;
879 /* check all registers are BX, BP, SI or DI */
880 if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) ||
881 (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI))
882 return NULL;
884 /* ensure the user didn't specify DWORD */
885 if (input->addr_size == 32)
886 return NULL;
888 if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */
889 if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */
890 if ((b==R_SI || b==R_DI) && i!=-1)
891 b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */
892 if (b==i) return NULL;/* shouldn't ever happen, in theory */
893 if (i!=-1 && b!=-1 &&
894 (i==R_BP || i==R_BX || b==R_SI || b==R_DI))
895 return NULL; /* invalid combinations */
896 if (b==-1) /* pure offset: handled above */
897 return NULL; /* so if it gets to here, panic! */
899 rm = -1;
900 if (i!=-1)
901 switch (i*256 + b) {
902 case R_SI*256+R_BX: rm=0; break;
903 case R_DI*256+R_BX: rm=1; break;
904 case R_SI*256+R_BP: rm=2; break;
905 case R_DI*256+R_BP: rm=3; break;
907 else
908 switch (b) {
909 case R_SI: rm=4; break;
910 case R_DI: rm=5; break;
911 case R_BP: rm=6; break;
912 case R_BX: rm=7; break;
914 if (rm==-1) /* can't happen, in theory */
915 return NULL; /* so panic if it does */
917 if (o==0 && seg==NO_SEG && !forw_ref && rm!=6)
918 mod = 0;
919 else if (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref)
920 mod = 1;
921 else
922 mod = 2;
924 output->sib_present = FALSE; /* no SIB - it's 16-bit */
925 output->bytes = mod; /* bytes of offset needed */
926 output->modrm = (mod<<6) | (rfield<<3) | rm;
930 output->size = 1 + output->sib_present + output->bytes;
931 return output;
934 static int chsize (operand *input, int addrbits) {
935 if (!(MEMORY & ~input->type)) {
936 int i=input->indexreg, b=input->basereg;
938 if (input->scale==0) i = -1;
940 if (i == -1 && b == -1) /* pure offset */
941 return (input->addr_size != 0 && input->addr_size != addrbits);
943 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
944 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
945 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
946 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI)
947 return (addrbits==16);
948 else
949 return (addrbits==32);
950 } else
951 return 0;