NASM 0.94
[nasm.git] / parser.c
blob2cd7f1d0df629b2d3e33797227d64b11cb1808c9
1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
9 */
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <stddef.h>
14 #include <string.h>
15 #include <ctype.h>
17 #include "nasm.h"
18 #include "nasmlib.h"
19 #include "parser.h"
20 #include "float.h"
22 #include "names.c"
24 static long reg_flags[] = { /* sizes and special flags */
25 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
26 REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
27 REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
28 REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
29 REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
30 MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
31 REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
32 FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
33 REG_TREG
36 enum { /* special tokens */
37 S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO,
38 S_TWORD, S_WORD
41 static char *special_names[] = { /* and the actual text */
42 "byte", "dword", "far", "long", "near", "qword", "short", "to",
43 "tword", "word"
46 static char *prefix_names[] = {
47 "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
48 "repnz", "repz", "times"
52 * Evaluator datatype. Expressions, within the evaluator, are
53 * stored as an array of these beasts, terminated by a record with
54 * type==0. Mostly, it's a vector type: each type denotes some kind
55 * of a component, and the value denotes the multiple of that
56 * component present in the expression. The exception is the WRT
57 * type, whose `value' field denotes the segment to which the
58 * expression is relative. These segments will be segment-base
59 * types, i.e. either odd segment values or SEG_ABS types. So it is
60 * still valid to assume that anything with a `value' field of zero
61 * is insignificant.
63 typedef struct {
64 long type; /* a register, or EXPR_xxx */
65 long value; /* must be >= 32 bits */
66 } expr;
68 static void eval_reset(void);
69 static expr *evaluate(int);
72 * ASSUMPTION MADE HERE. The number of distinct register names
73 * (i.e. possible "type" fields for an expr structure) does not
74 * exceed 126.
76 #define EXPR_SIMPLE 126
77 #define EXPR_WRT 127
78 #define EXPR_SEGBASE 128
80 static int is_reloc(expr *);
81 static int is_simple(expr *);
82 static int is_really_simple (expr *);
83 static long reloc_value(expr *);
84 static long reloc_seg(expr *);
85 static long reloc_wrt(expr *);
87 enum { /* token types, other than chars */
88 TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM,
89 TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL,
90 TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT,
91 TOKEN_FLOAT
94 struct tokenval {
95 long t_integer, t_inttwo;
96 char *t_charptr;
99 static char tempstorage[1024], *q;
100 static int bsi (char *string, char **array, int size);/* binary search */
102 static int nexttoken (void);
103 static int is_comma_next (void);
105 static char *bufptr;
106 static int i;
107 static struct tokenval tokval;
108 static lfunc labelfunc;
109 static efunc error;
110 static char *label;
111 static struct ofmt *outfmt;
113 static long seg, ofs;
115 static int forward;
117 insn *parse_line (long segment, long offset, lfunc lookup_label, int pass,
118 char *buffer, insn *result, struct ofmt *output,
119 efunc errfunc) {
120 int operand;
121 int critical;
123 forward = result->forw_ref = FALSE;
124 q = tempstorage;
125 bufptr = buffer;
126 labelfunc = lookup_label;
127 outfmt = output;
128 error = errfunc;
129 seg = segment;
130 ofs = offset;
131 label = "";
133 i = nexttoken();
135 result->eops = NULL; /* must do this, whatever happens */
137 if (i==0) { /* blank line - ignore */
138 result->label = NULL; /* so, no label on it */
139 result->opcode = -1; /* and no instruction either */
140 return result;
142 if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
143 (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) {
144 error (ERR_NONFATAL, "label or instruction expected"
145 " at start of line");
146 result->label = NULL;
147 result->opcode = -1;
148 return result;
151 if (i == TOKEN_ID) { /* there's a label here */
152 label = result->label = tokval.t_charptr;
153 i = nexttoken();
154 if (i == ':') { /* skip over the optional colon */
155 i = nexttoken();
157 } else /* no label; so, moving swiftly on */
158 result->label = NULL;
160 if (i==0) {
161 result->opcode = -1; /* this line contains just a label */
162 return result;
165 result->nprefix = 0;
166 result->times = 1L;
168 while (i == TOKEN_PREFIX ||
169 (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
171 * Handle special case: the TIMES prefix.
173 if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
174 expr *value;
176 i = nexttoken();
177 eval_reset();
178 value = evaluate (pass);
179 if (!value) { /* but, error in evaluator */
180 result->opcode = -1; /* unrecoverable parse error: */
181 return result; /* ignore this instruction */
183 if (!is_simple (value)) {
184 error (ERR_NONFATAL,
185 "non-constant argument supplied to TIMES");
186 result->times = 1L;
187 } else {
188 result->times = value->value;
189 if (value->value < 0)
190 error(ERR_WARNING, "TIMES value %d is negative",
191 value->value);
193 } else {
194 if (result->nprefix == MAXPREFIX)
195 error (ERR_NONFATAL,
196 "instruction has more than %d prefixes", MAXPREFIX);
197 else
198 result->prefixes[result->nprefix++] = tokval.t_integer;
199 i = nexttoken();
203 if (i != TOKEN_INSN) {
204 error (ERR_NONFATAL, "parser: instruction expected");
205 result->opcode = -1;
206 return result;
209 result->opcode = tokval.t_integer;
210 result->condition = tokval.t_inttwo;
213 * RESB, RESW and RESD cannot be satisfied with incorrectly
214 * evaluated operands, since the correct values _must_ be known
215 * on the first pass. Hence, even in pass one, we set the
216 * `critical' flag on calling evaluate(), so that it will bomb
217 * out on undefined symbols. Nasty, but there's nothing we can
218 * do about it.
220 * For the moment, EQU has the same difficulty, so we'll
221 * include that.
223 if (result->opcode == I_RESB ||
224 result->opcode == I_RESW ||
225 result->opcode == I_RESD ||
226 result->opcode == I_RESQ ||
227 result->opcode == I_REST ||
228 result->opcode == I_EQU)
229 critical = pass;
230 else
231 critical = (pass==2 ? 2 : 0);
233 if (result->opcode == I_DB ||
234 result->opcode == I_DW ||
235 result->opcode == I_DD ||
236 result->opcode == I_DQ ||
237 result->opcode == I_DT ||
238 result->opcode == I_INCBIN) {
239 extop *eop, **tail = &result->eops;
240 int oper_num = 0;
243 * Begin to read the DB/DW/DD/DQ/DT operands.
245 while (1) {
246 i = nexttoken();
247 if (i == 0)
248 break;
249 eop = *tail = nasm_malloc(sizeof(extop));
250 tail = &eop->next;
251 eop->next = NULL;
252 eop->type = EOT_NOTHING;
253 oper_num++;
255 if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
256 eop->type = EOT_DB_STRING;
257 eop->stringval = tokval.t_charptr;
258 eop->stringlen = tokval.t_inttwo;
259 i = nexttoken(); /* eat the comma */
260 continue;
263 if (i == TOKEN_FLOAT || i == '-') {
264 long sign = +1L;
266 if (i == '-') {
267 char *save = bufptr;
268 i = nexttoken();
269 sign = -1L;
270 if (i != TOKEN_FLOAT) {
271 bufptr = save;
272 i = '-';
276 if (i == TOKEN_FLOAT) {
277 eop->type = EOT_DB_STRING;
278 eop->stringval = q;
279 if (result->opcode == I_DD)
280 eop->stringlen = 4;
281 else if (result->opcode == I_DQ)
282 eop->stringlen = 8;
283 else if (result->opcode == I_DT)
284 eop->stringlen = 10;
285 else {
286 error(ERR_NONFATAL, "floating-point constant"
287 " encountered in `D%c' instruction",
288 result->opcode == I_DW ? 'W' : 'B');
289 eop->type = EOT_NOTHING;
291 q += eop->stringlen;
292 if (!float_const (tokval.t_charptr, sign,
293 (unsigned char *)eop->stringval,
294 eop->stringlen, error))
295 eop->type = EOT_NOTHING;
296 i = nexttoken(); /* eat the comma */
297 continue;
301 /* anything else */ {
302 expr *value;
303 eval_reset();
304 value = evaluate (critical);
305 if (!value) { /* but, error in evaluator */
306 result->opcode = -1;/* unrecoverable parse error: */
307 return result; /* ignore this instruction */
309 if (is_reloc(value)) {
310 eop->type = EOT_DB_NUMBER;
311 eop->offset = reloc_value(value);
312 eop->segment = reloc_seg(value);
313 eop->wrt = reloc_wrt(value);
314 } else {
315 error (ERR_NONFATAL,
316 "`%s' operand %d: expression is not simple"
317 " or relocatable",
318 insn_names[result->opcode], oper_num);
323 if (result->opcode == I_INCBIN) {
325 * Correct syntax for INCBIN is that there should be
326 * one string operand, followed by one or two numeric
327 * operands.
329 if (!result->eops || result->eops->type != EOT_DB_STRING)
330 error (ERR_NONFATAL, "`incbin' expects a file name");
331 else if (result->eops->next &&
332 result->eops->next->type != EOT_DB_NUMBER)
333 error (ERR_NONFATAL, "`incbin': second parameter is",
334 " non-numeric");
335 else if (result->eops->next && result->eops->next->next &&
336 result->eops->next->next->type != EOT_DB_NUMBER)
337 error (ERR_NONFATAL, "`incbin': third parameter is",
338 " non-numeric");
339 else if (result->eops->next && result->eops->next->next &&
340 result->eops->next->next->next)
341 error (ERR_NONFATAL, "`incbin': more than three parameters");
342 else
343 return result;
345 * If we reach here, one of the above errors happened.
346 * Throw the instruction away.
348 result->opcode = -1;
349 return result;
352 return result;
355 /* right. Now we begin to parse the operands. There may be up to three
356 * of these, separated by commas, and terminated by a zero token. */
358 for (operand = 0; operand < 3; operand++) {
359 expr *seg, *value; /* used most of the time */
360 int mref; /* is this going to be a memory ref? */
362 result->oprs[operand].addr_size = 0;/* have to zero this whatever */
363 i = nexttoken();
364 if (i == 0) break; /* end of operands: get out of here */
365 result->oprs[operand].type = 0; /* so far, no override */
366 while (i == TOKEN_SPECIAL) {/* size specifiers */
367 switch ((int)tokval.t_integer) {
368 case S_BYTE:
369 result->oprs[operand].type |= BITS8;
370 break;
371 case S_WORD:
372 result->oprs[operand].type |= BITS16;
373 break;
374 case S_DWORD:
375 case S_LONG:
376 result->oprs[operand].type |= BITS32;
377 break;
378 case S_QWORD:
379 result->oprs[operand].type |= BITS64;
380 break;
381 case S_TWORD:
382 result->oprs[operand].type |= BITS80;
383 break;
384 case S_TO:
385 result->oprs[operand].type |= TO;
386 break;
387 case S_FAR:
388 result->oprs[operand].type |= FAR;
389 break;
390 case S_NEAR:
391 result->oprs[operand].type |= NEAR;
392 break;
393 case S_SHORT:
394 result->oprs[operand].type |= SHORT;
395 break;
397 i = nexttoken();
400 if (i == '[') { /* memory reference */
401 i = nexttoken();
402 mref = TRUE;
403 if (i == TOKEN_SPECIAL) { /* check for address size override */
404 switch ((int)tokval.t_integer) {
405 case S_WORD:
406 result->oprs[operand].addr_size = 16;
407 break;
408 case S_DWORD:
409 case S_LONG:
410 result->oprs[operand].addr_size = 32;
411 break;
412 default:
413 error (ERR_NONFATAL, "invalid size specification in"
414 " effective address");
416 i = nexttoken();
418 } else /* immediate operand, or register */
419 mref = FALSE;
421 eval_reset();
423 value = evaluate (critical);
424 if (forward)
425 result->forw_ref = TRUE;
426 if (!value) { /* error in evaluator */
427 result->opcode = -1; /* unrecoverable parse error: */
428 return result; /* ignore this instruction */
430 if (i == ':' && mref) { /* it was seg:offset */
431 seg = value; /* so shift this into the segment */
432 i = nexttoken(); /* then skip the colon */
433 if (i == TOKEN_SPECIAL) { /* another check for size override */
434 switch ((int)tokval.t_integer) {
435 case S_WORD:
436 result->oprs[operand].addr_size = 16;
437 break;
438 case S_DWORD:
439 case S_LONG:
440 result->oprs[operand].addr_size = 32;
441 break;
442 default:
443 error (ERR_NONFATAL, "invalid size specification in"
444 " effective address");
446 i = nexttoken();
448 value = evaluate (critical);
449 if (forward)
450 result->forw_ref = TRUE;
451 /* and get the offset */
452 if (!value) { /* but, error in evaluator */
453 result->opcode = -1; /* unrecoverable parse error: */
454 return result; /* ignore this instruction */
456 } else seg = NULL;
457 if (mref) { /* find ] at the end */
458 if (i != ']') {
459 error (ERR_NONFATAL, "parser: expecting ]");
460 do { /* error recovery again */
461 i = nexttoken();
462 } while (i != 0 && i != ',');
463 } else /* we got the required ] */
464 i = nexttoken();
465 } else { /* immediate operand */
466 if (i != 0 && i != ',' && i != ':') {
467 error (ERR_NONFATAL, "comma or end of line expected");
468 do { /* error recovery */
469 i = nexttoken();
470 } while (i != 0 && i != ',');
471 } else if (i == ':') {
472 result->oprs[operand].type |= COLON;
476 /* now convert the exprs returned from evaluate() into operand
477 * descriptions... */
479 if (mref) { /* it's a memory reference */
480 expr *e = value;
481 int b, i, s; /* basereg, indexreg, scale */
482 long o; /* offset */
484 if (seg) { /* segment override */
485 if (seg[1].type!=0 || seg->value!=1 ||
486 REG_SREG & ~reg_flags[seg->type])
487 error (ERR_NONFATAL, "invalid segment override");
488 else if (result->nprefix == MAXPREFIX)
489 error (ERR_NONFATAL,
490 "instruction has more than %d prefixes",
491 MAXPREFIX);
492 else
493 result->prefixes[result->nprefix++] = seg->type;
496 b = i = -1, o = s = 0;
498 if (e->type < EXPR_SIMPLE) { /* this bit's a register */
499 if (e->value == 1) /* in fact it can be basereg */
500 b = e->type;
501 else /* no, it has to be indexreg */
502 i = e->type, s = e->value;
503 e++;
505 if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */
506 if (e->value != 1) { /* it has to be indexreg */
507 if (i != -1) { /* but it can't be */
508 error(ERR_NONFATAL, "invalid effective address");
509 result->opcode = -1;
510 return result;
511 } else
512 i = e->type, s = e->value;
513 } else { /* it can be basereg */
514 if (b != -1) /* or can it? */
515 i = e->type, s = 1;
516 else
517 b = e->type;
519 e++;
521 if (e->type != 0) { /* is there an offset? */
522 if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */
523 error (ERR_NONFATAL, "invalid effective address");
524 result->opcode = -1;
525 return result;
526 } else {
527 if (e->type == EXPR_SIMPLE) {
528 o = e->value;
529 e++;
531 if (e->type == EXPR_WRT) {
532 result->oprs[operand].wrt = e->value;
533 e++;
534 } else
535 result->oprs[operand].wrt = NO_SEG;
537 * Look for a segment base type.
539 if (e->type && e->type < EXPR_SEGBASE) {
540 error (ERR_NONFATAL, "invalid effective address");
541 result->opcode = -1;
542 return result;
544 while (e->type && e->value == 0)
545 e++;
546 if (e->type && e->value != 1) {
547 error (ERR_NONFATAL, "invalid effective address");
548 result->opcode = -1;
549 return result;
551 if (e->type) {
552 result->oprs[operand].segment = e->type-EXPR_SEGBASE;
553 e++;
554 } else
555 result->oprs[operand].segment = NO_SEG;
556 while (e->type && e->value == 0)
557 e++;
558 if (e->type) {
559 error (ERR_NONFATAL, "invalid effective address");
560 result->opcode = -1;
561 return result;
564 } else {
565 o = 0;
566 result->oprs[operand].wrt = NO_SEG;
567 result->oprs[operand].segment = NO_SEG;
570 if (e->type != 0) { /* there'd better be nothing left! */
571 error (ERR_NONFATAL, "invalid effective address");
572 result->opcode = -1;
573 return result;
576 result->oprs[operand].type |= MEMORY;
577 if (b==-1 && (i==-1 || s==0))
578 result->oprs[operand].type |= MEM_OFFS;
579 result->oprs[operand].basereg = b;
580 result->oprs[operand].indexreg = i;
581 result->oprs[operand].scale = s;
582 result->oprs[operand].offset = o;
583 } else { /* it's not a memory reference */
584 if (is_reloc(value)) { /* it's immediate */
585 result->oprs[operand].type |= IMMEDIATE;
586 result->oprs[operand].offset = reloc_value(value);
587 result->oprs[operand].segment = reloc_seg(value);
588 result->oprs[operand].wrt = reloc_wrt(value);
589 if (is_simple(value) && reloc_value(value)==1)
590 result->oprs[operand].type |= UNITY;
591 } else { /* it's a register */
592 if (value->type>=EXPR_SIMPLE || value->value!=1) {
593 error (ERR_NONFATAL, "invalid operand type");
594 result->opcode = -1;
595 return result;
597 /* clear overrides, except TO which applies to FPU regs */
598 result->oprs[operand].type &= TO;
599 result->oprs[operand].type |= REGISTER;
600 result->oprs[operand].type |= reg_flags[value->type];
601 result->oprs[operand].basereg = value->type;
606 result->operands = operand; /* set operand count */
608 while (operand<3) /* clear remaining operands */
609 result->oprs[operand++].type = 0;
612 * Transform RESW, RESD, RESQ, REST into RESB.
614 switch (result->opcode) {
615 case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
616 case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
617 case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
618 case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
621 return result;
624 static int is_comma_next (void) {
625 char *p;
627 p = bufptr;
628 while (isspace(*p)) p++;
629 return (*p == ',' || *p == ';' || !*p);
633 * This tokeniser routine has only one side effect, that of
634 * updating `bufptr'. Hence by saving `bufptr', lookahead may be
635 * performed.
638 static int nexttoken (void) {
639 char ourcopy[256], *r, *s;
641 while (isspace(*bufptr)) bufptr++;
642 if (!*bufptr) return 0;
644 /* we have a token; either an id, a number or a char */
645 if (isidstart(*bufptr) ||
646 (*bufptr == '$' && isidstart(bufptr[1]))) {
647 /* now we've got an identifier */
648 int i;
649 int is_sym = FALSE;
651 if (*bufptr == '$') {
652 is_sym = TRUE;
653 bufptr++;
656 tokval.t_charptr = q;
657 *q++ = *bufptr++;
658 while (isidchar(*bufptr)) *q++ = *bufptr++;
659 *q++ = '\0';
660 for (s=tokval.t_charptr, r=ourcopy; *s; s++)
661 *r++ = tolower (*s);
662 *r = '\0';
663 if (is_sym)
664 return TOKEN_ID; /* bypass all other checks */
665 /* right, so we have an identifier sitting in temp storage. now,
666 * is it actually a register or instruction name, or what? */
667 if ((tokval.t_integer=bsi(ourcopy, reg_names,
668 elements(reg_names)))>=0)
669 return TOKEN_REG;
670 if ((tokval.t_integer=bsi(ourcopy, insn_names,
671 elements(insn_names)))>=0)
672 return TOKEN_INSN;
673 for (i=0; i<elements(icn); i++)
674 if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
675 char *p = ourcopy + strlen(icn[i]);
676 tokval.t_integer = ico[i];
677 if ((tokval.t_inttwo=bsi(p, conditions,
678 elements(conditions)))>=0)
679 return TOKEN_INSN;
681 if ((tokval.t_integer=bsi(ourcopy, prefix_names,
682 elements(prefix_names)))>=0) {
683 tokval.t_integer += PREFIX_ENUM_START;
684 return TOKEN_PREFIX;
686 if ((tokval.t_integer=bsi(ourcopy, special_names,
687 elements(special_names)))>=0)
688 return TOKEN_SPECIAL;
689 if (!strcmp(ourcopy, "seg"))
690 return TOKEN_SEG;
691 if (!strcmp(ourcopy, "wrt"))
692 return TOKEN_WRT;
693 return TOKEN_ID;
694 } else if (*bufptr == '$' && !isnumchar(bufptr[1])) {
696 * It's a $ sign with no following hex number; this must
697 * mean it's a Here token ($), evaluating to the current
698 * assembly location, or a Base token ($$), evaluating to
699 * the base of the current segment.
701 bufptr++;
702 if (*bufptr == '$') {
703 bufptr++;
704 return TOKEN_BASE;
706 return TOKEN_HERE;
707 } else if (isnumstart(*bufptr)) { /* now we've got a number */
708 char *r = q;
709 int rn_error;
711 *q++ = *bufptr++;
712 while (isnumchar(*bufptr)) {
713 *q++ = *bufptr++;
715 if (*bufptr == '.') {
717 * a floating point constant
719 *q++ = *bufptr++;
720 while (isnumchar(*bufptr)) {
721 *q++ = *bufptr++;
723 *q++ = '\0';
724 tokval.t_charptr = r;
725 return TOKEN_FLOAT;
727 *q++ = '\0';
728 tokval.t_integer = readnum(r, &rn_error);
729 if (rn_error)
730 return TOKEN_ERRNUM; /* some malformation occurred */
731 tokval.t_charptr = NULL;
732 return TOKEN_NUM;
733 } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */
734 char quote = *bufptr++, *r;
735 r = tokval.t_charptr = bufptr;
736 while (*bufptr && *bufptr != quote) bufptr++;
737 tokval.t_inttwo = bufptr - r; /* store full version */
738 if (!*bufptr)
739 return TOKEN_ERRNUM; /* unmatched quotes */
740 tokval.t_integer = 0;
741 r = bufptr++; /* skip over final quote */
742 while (quote != *--r) {
743 tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r;
745 return TOKEN_NUM;
746 } else if (*bufptr == ';') { /* a comment has happened - stay */
747 return 0;
748 } else if ((*bufptr == '>' || *bufptr == '<' ||
749 *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) {
750 bufptr += 2;
751 return (bufptr[-2] == '>' ? TOKEN_SHR :
752 bufptr[-2] == '<' ? TOKEN_SHL :
753 bufptr[-2] == '/' ? TOKEN_SDIV :
754 TOKEN_SMOD);
755 } else /* just an ordinary char */
756 return (unsigned char) (*bufptr++);
759 /* return index of "string" in "array", or -1 if no match. */
760 static int bsi (char *string, char **array, int size) {
761 int i = -1, j = size; /* always, i < index < j */
762 while (j-i >= 2) {
763 int k = (i+j)/2;
764 int l = strcmp(string, array[k]);
765 if (l<0) /* it's in the first half */
766 j = k;
767 else if (l>0) /* it's in the second half */
768 i = k;
769 else /* we've got it :) */
770 return k;
772 return -1; /* we haven't got it :( */
775 void cleanup_insn (insn *i) {
776 extop *e;
778 while (i->eops) {
779 e = i->eops;
780 i->eops = i->eops->next;
781 nasm_free (e);
785 /* ------------- Evaluator begins here ------------------ */
787 static expr exprtempstorage[1024], *tempptr; /* store exprs in here */
790 * Add two vector datatypes. We have some bizarre behaviour on far-
791 * absolute segment types: we preserve them during addition _only_
792 * if one of the segments is a truly pure scalar.
794 static expr *add_vectors(expr *p, expr *q) {
795 expr *r = tempptr;
796 int preserve;
798 preserve = is_really_simple(p) || is_really_simple(q);
800 while (p->type && q->type &&
801 p->type < EXPR_SEGBASE+SEG_ABS &&
802 q->type < EXPR_SEGBASE+SEG_ABS)
803 if (p->type > q->type) {
804 tempptr->type = q->type;
805 tempptr->value = q->value;
806 tempptr++, q++;
807 } else if (p->type < q->type) {
808 tempptr->type = p->type;
809 tempptr->value = p->value;
810 tempptr++, p++;
811 } else { /* *p and *q have same type */
812 tempptr->type = p->type;
813 tempptr->value = p->value + q->value;
814 tempptr++, p++, q++;
816 while (p->type &&
817 (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) {
818 tempptr->type = p->type;
819 tempptr->value = p->value;
820 tempptr++, p++;
822 while (q->type &&
823 (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) {
824 tempptr->type = q->type;
825 tempptr->value = q->value;
826 tempptr++, q++;
828 (tempptr++)->type = 0;
830 return r;
834 * Multiply a vector by a scalar. Strip far-absolute segment part
835 * if present.
837 static expr *scalar_mult(expr *vect, long scalar) {
838 expr *p = vect;
840 while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) {
841 p->value = scalar * (p->value);
842 p++;
844 p->type = 0;
846 return vect;
849 static expr *scalarvect (long scalar) {
850 expr *p = tempptr;
851 tempptr->type = EXPR_SIMPLE;
852 tempptr->value = scalar;
853 tempptr++;
854 tempptr->type = 0;
855 tempptr++;
856 return p;
860 * Return TRUE if the argument is a simple scalar. (Or a far-
861 * absolute, which counts.)
863 static int is_simple (expr *vect) {
864 while (vect->type && !vect->value)
865 vect++;
866 if (!vect->type)
867 return 1;
868 if (vect->type != EXPR_SIMPLE)
869 return 0;
870 do {
871 vect++;
872 } while (vect->type && !vect->value);
873 if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0;
874 return 1;
878 * Return TRUE if the argument is a simple scalar, _NOT_ a far-
879 * absolute.
881 static int is_really_simple (expr *vect) {
882 while (vect->type && !vect->value)
883 vect++;
884 if (!vect->type)
885 return 1;
886 if (vect->type != EXPR_SIMPLE)
887 return 0;
888 do {
889 vect++;
890 } while (vect->type && !vect->value);
891 if (vect->type) return 0;
892 return 1;
896 * Return TRUE if the argument is relocatable (i.e. a simple
897 * scalar, plus at most one segment-base, plus possibly a WRT).
899 static int is_reloc (expr *vect) {
900 while (vect->type && !vect->value)
901 vect++;
902 if (!vect->type)
903 return 1;
904 if (vect->type < EXPR_SIMPLE)
905 return 0;
906 if (vect->type == EXPR_SIMPLE) {
907 do {
908 vect++;
909 } while (vect->type && !vect->value);
910 if (!vect->type)
911 return 1;
913 do {
914 vect++;
915 } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
916 if (!vect->type)
917 return 1;
918 return 1;
922 * Return the scalar part of a relocatable vector. (Including
923 * simple scalar vectors - those qualify as relocatable.)
925 static long reloc_value (expr *vect) {
926 while (vect->type && !vect->value)
927 vect++;
928 if (!vect->type) return 0;
929 if (vect->type == EXPR_SIMPLE)
930 return vect->value;
931 else
932 return 0;
936 * Return the segment number of a relocatable vector, or NO_SEG for
937 * simple scalars.
939 static long reloc_seg (expr *vect) {
940 while (vect->type && (vect->type == EXPR_WRT || !vect->value))
941 vect++;
942 if (vect->type == EXPR_SIMPLE) {
943 do {
944 vect++;
945 } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
947 if (!vect->type)
948 return NO_SEG;
949 else
950 return vect->type - EXPR_SEGBASE;
954 * Return the WRT segment number of a relocatable vector, or NO_SEG
955 * if no WRT part is present.
957 static long reloc_wrt (expr *vect) {
958 while (vect->type && vect->type < EXPR_WRT)
959 vect++;
960 if (vect->type == EXPR_WRT) {
961 return vect->value;
962 } else
963 return NO_SEG;
966 static void eval_reset(void) {
967 tempptr = exprtempstorage; /* initialise temporary storage */
971 * The SEG operator: calculate the segment part of a relocatable
972 * value. Return NULL, as usual, if an error occurs. Report the
973 * error too.
975 static expr *segment_part (expr *e) {
976 long seg;
978 if (!is_reloc(e)) {
979 error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
980 return NULL;
983 seg = reloc_seg(e);
984 if (seg == NO_SEG) {
985 error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
986 return NULL;
987 } else if (seg & SEG_ABS)
988 return scalarvect(seg & ~SEG_ABS);
989 else {
990 expr *f = tempptr++;
991 tempptr++->type = 0;
992 f->type = EXPR_SEGBASE+outfmt->segbase(seg+1);
993 f->value = 1;
994 return f;
999 * Recursive-descent parser. Called with a single boolean operand,
1000 * which is TRUE if the evaluation is critical (i.e. unresolved
1001 * symbols are an error condition). Must update the global `i' to
1002 * reflect the token after the parsed string. May return NULL.
1004 * evaluate() should report its own errors: on return it is assumed
1005 * that if NULL has been returned, the error has already been
1006 * reported.
1010 * Grammar parsed is:
1012 * expr : expr0 [ WRT expr6 ]
1013 * expr0 : expr1 [ {|} expr1]
1014 * expr1 : expr2 [ {^} expr2]
1015 * expr2 : expr3 [ {&} expr3]
1016 * expr3 : expr4 [ {<<,>>} expr4...]
1017 * expr4 : expr5 [ {+,-} expr5...]
1018 * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
1019 * expr6 : { ~,+,-,SEG } expr6
1020 * | (expr0)
1021 * | symbol
1022 * | $
1023 * | number
1026 static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int);
1027 static expr *expr4(int), *expr5(int), *expr6(int);
1029 static expr *expr0(int critical) {
1030 expr *e, *f;
1032 e = expr1(critical);
1033 if (!e)
1034 return NULL;
1035 while (i == '|') {
1036 i = nexttoken();
1037 f = expr1(critical);
1038 if (!f)
1039 return NULL;
1040 if (!is_simple(e) || !is_simple(f)) {
1041 error(ERR_NONFATAL, "`|' operator may only be applied to"
1042 " scalar values");
1044 e = scalarvect (reloc_value(e) | reloc_value(f));
1046 return e;
1049 static expr *expr1(int critical) {
1050 expr *e, *f;
1052 e = expr2(critical);
1053 if (!e)
1054 return NULL;
1055 while (i == '^') {
1056 i = nexttoken();
1057 f = expr2(critical);
1058 if (!f)
1059 return NULL;
1060 if (!is_simple(e) || !is_simple(f)) {
1061 error(ERR_NONFATAL, "`^' operator may only be applied to"
1062 " scalar values");
1064 e = scalarvect (reloc_value(e) ^ reloc_value(f));
1066 return e;
1069 static expr *expr2(int critical) {
1070 expr *e, *f;
1072 e = expr3(critical);
1073 if (!e)
1074 return NULL;
1075 while (i == '&') {
1076 i = nexttoken();
1077 f = expr3(critical);
1078 if (!f)
1079 return NULL;
1080 if (!is_simple(e) || !is_simple(f)) {
1081 error(ERR_NONFATAL, "`&' operator may only be applied to"
1082 " scalar values");
1084 e = scalarvect (reloc_value(e) & reloc_value(f));
1086 return e;
1089 static expr *expr3(int critical) {
1090 expr *e, *f;
1092 e = expr4(critical);
1093 if (!e)
1094 return NULL;
1095 while (i == TOKEN_SHL || i == TOKEN_SHR) {
1096 int j = i;
1097 i = nexttoken();
1098 f = expr4(critical);
1099 if (!f)
1100 return NULL;
1101 if (!is_simple(e) || !is_simple(f)) {
1102 error(ERR_NONFATAL, "shift operator may only be applied to"
1103 " scalar values");
1105 switch (j) {
1106 case TOKEN_SHL:
1107 e = scalarvect (reloc_value(e) << reloc_value(f));
1108 break;
1109 case TOKEN_SHR:
1110 e = scalarvect (((unsigned long)reloc_value(e)) >>
1111 reloc_value(f));
1112 break;
1115 return e;
1118 static expr *expr4(int critical) {
1119 expr *e, *f;
1121 e = expr5(critical);
1122 if (!e)
1123 return NULL;
1124 while (i == '+' || i == '-') {
1125 int j = i;
1126 i = nexttoken();
1127 f = expr5(critical);
1128 if (!f)
1129 return NULL;
1130 switch (j) {
1131 case '+':
1132 e = add_vectors (e, f);
1133 break;
1134 case '-':
1135 e = add_vectors (e, scalar_mult(f, -1L));
1136 break;
1139 return e;
1142 static expr *expr5(int critical) {
1143 expr *e, *f;
1145 e = expr6(critical);
1146 if (!e)
1147 return NULL;
1148 while (i == '*' || i == '/' || i == '*' ||
1149 i == TOKEN_SDIV || i == TOKEN_SMOD) {
1150 int j = i;
1151 i = nexttoken();
1152 f = expr6(critical);
1153 if (!f)
1154 return NULL;
1155 if (j != '*' && (!is_simple(e) || !is_simple(f))) {
1156 error(ERR_NONFATAL, "division operator may only be applied to"
1157 " scalar values");
1158 return NULL;
1160 if (j != '*' && reloc_value(f) == 0) {
1161 error(ERR_NONFATAL, "division by zero");
1162 return NULL;
1164 switch (j) {
1165 case '*':
1166 if (is_simple(e))
1167 e = scalar_mult (f, reloc_value(e));
1168 else if (is_simple(f))
1169 e = scalar_mult (e, reloc_value(f));
1170 else {
1171 error(ERR_NONFATAL, "unable to multiply two "
1172 "non-scalar objects");
1173 return NULL;
1175 break;
1176 case '/':
1177 e = scalarvect (((unsigned long)reloc_value(e)) /
1178 ((unsigned long)reloc_value(f)));
1179 break;
1180 case '%':
1181 e = scalarvect (((unsigned long)reloc_value(e)) %
1182 ((unsigned long)reloc_value(f)));
1183 break;
1184 case TOKEN_SDIV:
1185 e = scalarvect (((signed long)reloc_value(e)) /
1186 ((signed long)reloc_value(f)));
1187 break;
1188 case TOKEN_SMOD:
1189 e = scalarvect (((signed long)reloc_value(e)) %
1190 ((signed long)reloc_value(f)));
1191 break;
1194 return e;
1197 static expr *expr6(int critical) {
1198 expr *e;
1199 long label_seg, label_ofs;
1201 if (i == '-') {
1202 i = nexttoken();
1203 e = expr6(critical);
1204 if (!e)
1205 return NULL;
1206 return scalar_mult (e, -1L);
1207 } else if (i == '+') {
1208 i = nexttoken();
1209 return expr6(critical);
1210 } else if (i == '~') {
1211 i = nexttoken();
1212 e = expr6(critical);
1213 if (!e)
1214 return NULL;
1215 if (!is_simple(e)) {
1216 error(ERR_NONFATAL, "`~' operator may only be applied to"
1217 " scalar values");
1218 return NULL;
1220 return scalarvect(~reloc_value(e));
1221 } else if (i == TOKEN_SEG) {
1222 i = nexttoken();
1223 e = expr6(critical);
1224 if (!e)
1225 return NULL;
1226 return segment_part(e);
1227 } else if (i == '(') {
1228 i = nexttoken();
1229 e = expr0(critical);
1230 if (!e)
1231 return NULL;
1232 if (i != ')') {
1233 error(ERR_NONFATAL, "expecting `)'");
1234 return NULL;
1236 i = nexttoken();
1237 return e;
1238 } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID ||
1239 i == TOKEN_HERE || i == TOKEN_BASE) {
1240 e = tempptr;
1241 switch (i) {
1242 case TOKEN_NUM:
1243 e->type = EXPR_SIMPLE;
1244 e->value = tokval.t_integer;
1245 break;
1246 case TOKEN_REG:
1247 e->type = tokval.t_integer;
1248 e->value = 1;
1249 break;
1250 case TOKEN_ID:
1251 case TOKEN_HERE:
1252 case TOKEN_BASE:
1254 * Since the whole line is parsed before the label it
1255 * defines is given to the label manager, we have
1256 * problems with lines such as
1258 * end: TIMES 512-(end-start) DB 0
1260 * where `end' is not known on pass one, despite not
1261 * really being a forward reference, and due to
1262 * criticality it is _needed_. Hence we check our label
1263 * against the currently defined one, and do our own
1264 * resolution of it if we have to.
1266 if (i == TOKEN_BASE) {
1267 label_seg = seg;
1268 label_ofs = 0;
1269 } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) {
1270 label_seg = seg;
1271 label_ofs = ofs;
1272 } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) {
1273 if (critical == 2) {
1274 error (ERR_NONFATAL, "symbol `%s' undefined",
1275 tokval.t_charptr);
1276 return NULL;
1277 } else if (critical == 1) {
1278 error (ERR_NONFATAL, "symbol `%s' not defined before use",
1279 tokval.t_charptr);
1280 return NULL;
1281 } else {
1282 forward = TRUE;
1283 label_seg = seg;
1284 label_ofs = ofs;
1287 e->type = EXPR_SIMPLE;
1288 e->value = label_ofs;
1289 if (label_seg!=NO_SEG) {
1290 tempptr++;
1291 tempptr->type = EXPR_SEGBASE + label_seg;
1292 tempptr->value = 1;
1294 break;
1296 tempptr++;
1297 tempptr->type = 0;
1298 tempptr++;
1299 i = nexttoken();
1300 return e;
1301 } else {
1302 error(ERR_NONFATAL, "expression syntax error");
1303 return NULL;
1307 static expr *evaluate (int critical) {
1308 expr *e;
1309 expr *f = NULL;
1311 e = expr0 (critical);
1312 if (!e)
1313 return NULL;
1315 if (i == TOKEN_WRT) {
1316 if (!is_reloc(e)) {
1317 error(ERR_NONFATAL, "invalid left-hand operand to WRT");
1318 return NULL;
1320 i = nexttoken(); /* eat the WRT */
1321 f = expr6 (critical);
1322 if (!f)
1323 return NULL;
1325 e = scalar_mult (e, 1L); /* strip far-absolute segment part */
1326 if (f) {
1327 expr *g = tempptr++;
1328 tempptr++->type = 0;
1329 g->type = EXPR_WRT;
1330 if (!is_reloc(f)) {
1331 error(ERR_NONFATAL, "invalid right-hand operand to WRT");
1332 return NULL;
1334 g->value = reloc_seg(f);
1335 if (g->value == NO_SEG)
1336 g->value = reloc_value(f) | SEG_ABS;
1337 else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) {
1338 error(ERR_NONFATAL, "invalid right-hand operand to WRT");
1339 return NULL;
1341 e = add_vectors (e, g);
1343 return e;