NASM 0.91
[nasm/sigaren-mirror.git] / parser.c
blob14c7a5ba710c8e40d11ce2478336f170055d077a
1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
9 */
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <stddef.h>
14 #include <string.h>
15 #include <ctype.h>
17 #include "nasm.h"
18 #include "nasmlib.h"
19 #include "parser.h"
20 #include "float.h"
22 #include "names.c"
25 static long reg_flags[] = { /* sizes and special flags */
26 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
27 REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
28 REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
29 REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
30 REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
31 MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
32 REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
33 FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
34 REG_TREG
37 enum { /* special tokens */
38 S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO,
39 S_TWORD, S_WORD
42 static char *special_names[] = { /* and the actual text */
43 "byte", "dword", "far", "long", "near", "qword", "short", "to",
44 "tword", "word"
47 static char *prefix_names[] = {
48 "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
49 "repnz", "repz", "times"
53 * Evaluator datatype. Expressions, within the evaluator, are
54 * stored as an array of these beasts, terminated by a record with
55 * type==0. Mostly, it's a vector type: each type denotes some kind
56 * of a component, and the value denotes the multiple of that
57 * component present in the expression. The exception is the WRT
58 * type, whose `value' field denotes the segment to which the
59 * expression is relative. These segments will be segment-base
60 * types, i.e. either odd segment values or SEG_ABS types. So it is
61 * still valid to assume that anything with a `value' field of zero
62 * is insignificant.
64 typedef struct {
65 long type; /* a register, or EXPR_xxx */
66 long value; /* must be >= 32 bits */
67 } expr;
69 static void eval_reset(void);
70 static expr *evaluate(int);
73 * ASSUMPTION MADE HERE. The number of distinct register names
74 * (i.e. possible "type" fields for an expr structure) does not
75 * exceed 126.
77 #define EXPR_SIMPLE 126
78 #define EXPR_WRT 127
79 #define EXPR_SEGBASE 128
81 static int is_reloc(expr *);
82 static int is_simple(expr *);
83 static int is_really_simple (expr *);
84 static long reloc_value(expr *);
85 static long reloc_seg(expr *);
86 static long reloc_wrt(expr *);
88 enum { /* token types, other than chars */
89 TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM,
90 TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL,
91 TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT,
92 TOKEN_FLOAT
95 struct tokenval {
96 long t_integer, t_inttwo;
97 char *t_charptr;
100 static char tempstorage[1024], *q;
101 static int bsi (char *string, char **array, int size);/* binary search */
103 static int nexttoken (void);
104 static int is_comma_next (void);
106 static char *bufptr;
107 static int i;
108 static struct tokenval tokval;
109 static lfunc labelfunc;
110 static efunc error;
111 static char *label;
112 static struct ofmt *outfmt;
114 static long seg, ofs;
116 insn *parse_line (long segment, long offset, lfunc lookup_label, int pass,
117 char *buffer, insn *result, struct ofmt *output,
118 efunc errfunc) {
119 int operand;
120 int critical;
122 q = tempstorage;
123 bufptr = buffer;
124 labelfunc = lookup_label;
125 outfmt = output;
126 error = errfunc;
127 seg = segment;
128 ofs = offset;
129 label = "";
131 i = nexttoken();
133 result->eops = NULL; /* must do this, whatever happens */
135 if (i==0) { /* blank line - ignore */
136 result->label = NULL; /* so, no label on it */
137 result->opcode = -1; /* and no instruction either */
138 return result;
140 if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
141 (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) {
142 error (ERR_NONFATAL, "label or instruction expected"
143 " at start of line");
144 result->label = NULL;
145 result->opcode = -1;
146 return result;
149 if (i == TOKEN_ID) { /* there's a label here */
150 label = result->label = tokval.t_charptr;
151 i = nexttoken();
152 if (i == ':') { /* skip over the optional colon */
153 i = nexttoken();
155 } else /* no label; so, moving swiftly on */
156 result->label = NULL;
158 if (i==0) {
159 result->opcode = -1; /* this line contains just a label */
160 return result;
163 result->nprefix = 0;
164 result->times = 1;
166 while (i == TOKEN_PREFIX ||
167 (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
169 * Handle special case: the TIMES prefix.
171 if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
172 expr *value;
174 i = nexttoken();
175 eval_reset();
176 value = evaluate (pass);
177 if (!value) { /* but, error in evaluator */
178 result->opcode = -1; /* unrecoverable parse error: */
179 return result; /* ignore this instruction */
181 if (!is_simple (value)) {
182 error (ERR_NONFATAL,
183 "non-constant argument supplied to TIMES");
184 result->times = 1;
185 } else
186 result->times = value->value;
187 } else {
188 if (result->nprefix == MAXPREFIX)
189 error (ERR_NONFATAL,
190 "instruction has more than %d prefixes", MAXPREFIX);
191 else
192 result->prefixes[result->nprefix++] = tokval.t_integer;
193 i = nexttoken();
197 if (i != TOKEN_INSN) {
198 error (ERR_NONFATAL, "parser: instruction expected");
199 result->opcode = -1;
200 return result;
203 result->opcode = tokval.t_integer;
204 result->condition = tokval.t_inttwo;
207 * RESB, RESW and RESD cannot be satisfied with incorrectly
208 * evaluated operands, since the correct values _must_ be known
209 * on the first pass. Hence, even in pass one, we set the
210 * `critical' flag on calling evaluate(), so that it will bomb
211 * out on undefined symbols. Nasty, but there's nothing we can
212 * do about it.
214 * For the moment, EQU has the same difficulty, so we'll
215 * include that.
217 if (result->opcode == I_RESB ||
218 result->opcode == I_RESW ||
219 result->opcode == I_RESD ||
220 result->opcode == I_RESQ ||
221 result->opcode == I_REST ||
222 result->opcode == I_EQU)
223 critical = pass;
224 else
225 critical = (pass==2 ? 2 : 0);
227 if (result->opcode == I_DB ||
228 result->opcode == I_DW ||
229 result->opcode == I_DD ||
230 result->opcode == I_DQ ||
231 result->opcode == I_DT) {
232 extop *eop, **tail = &result->eops;
233 int oper_num = 0;
236 * Begin to read the DB/DW/DD/DQ/DT operands.
238 while (1) {
239 i = nexttoken();
240 if (i == 0)
241 break;
242 eop = *tail = nasm_malloc(sizeof(extop));
243 tail = &eop->next;
244 eop->next = NULL;
245 eop->type = EOT_NOTHING;
246 oper_num++;
248 if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
249 eop->type = EOT_DB_STRING;
250 eop->stringval = tokval.t_charptr;
251 eop->stringlen = tokval.t_inttwo;
252 i = nexttoken(); /* eat the comma */
253 continue;
256 if (i == TOKEN_FLOAT || i == '-') {
257 long sign = +1L;
259 if (i == '-') {
260 char *save = bufptr;
261 i = nexttoken();
262 sign = -1L;
263 if (i != TOKEN_FLOAT) {
264 bufptr = save;
265 i = '-';
269 if (i == TOKEN_FLOAT) {
270 eop->type = EOT_DB_STRING;
271 eop->stringval = q;
272 if (result->opcode == I_DD)
273 eop->stringlen = 4;
274 else if (result->opcode == I_DQ)
275 eop->stringlen = 8;
276 else if (result->opcode == I_DT)
277 eop->stringlen = 10;
278 else {
279 error(ERR_NONFATAL, "floating-point constant"
280 " encountered in `D%c' instruction",
281 result->opcode == I_DW ? 'W' : 'B');
282 eop->type = EOT_NOTHING;
284 q += eop->stringlen;
285 if (!float_const (tokval.t_charptr, sign,
286 (unsigned char *)eop->stringval,
287 eop->stringlen, error))
288 eop->type = EOT_NOTHING;
289 i = nexttoken(); /* eat the comma */
290 continue;
294 /* anything else */ {
295 expr *value;
296 eval_reset();
297 value = evaluate (critical);
298 if (!value) { /* but, error in evaluator */
299 result->opcode = -1;/* unrecoverable parse error: */
300 return result; /* ignore this instruction */
302 if (is_reloc(value)) {
303 eop->type = EOT_DB_NUMBER;
304 eop->offset = reloc_value(value);
305 eop->segment = reloc_seg(value);
306 eop->wrt = reloc_wrt(value);
307 } else {
308 error (ERR_NONFATAL,
309 "`%s' operand %d: expression is not simple"
310 " or relocatable",
311 insn_names[result->opcode], oper_num);
315 return result;
318 /* right. Now we begin to parse the operands. There may be up to three
319 * of these, separated by commas, and terminated by a zero token. */
321 for (operand = 0; operand < 3; operand++) {
322 expr *seg, *value; /* used most of the time */
323 int mref; /* is this going to be a memory ref? */
325 result->oprs[operand].addr_size = 0;/* have to zero this whatever */
326 i = nexttoken();
327 if (i == 0) break; /* end of operands: get out of here */
328 result->oprs[operand].type = 0; /* so far, no override */
329 while (i == TOKEN_SPECIAL) {/* size specifiers */
330 switch ((int)tokval.t_integer) {
331 case S_BYTE:
332 result->oprs[operand].type |= BITS8;
333 break;
334 case S_WORD:
335 result->oprs[operand].type |= BITS16;
336 break;
337 case S_DWORD:
338 case S_LONG:
339 result->oprs[operand].type |= BITS32;
340 break;
341 case S_QWORD:
342 result->oprs[operand].type |= BITS64;
343 break;
344 case S_TWORD:
345 result->oprs[operand].type |= BITS80;
346 break;
347 case S_TO:
348 result->oprs[operand].type |= TO;
349 break;
350 case S_FAR:
351 result->oprs[operand].type |= FAR;
352 break;
353 case S_NEAR:
354 result->oprs[operand].type |= NEAR;
355 break;
356 case S_SHORT:
357 result->oprs[operand].type |= SHORT;
358 break;
360 i = nexttoken();
363 if (i == '[') { /* memory reference */
364 i = nexttoken();
365 mref = TRUE;
366 if (i == TOKEN_SPECIAL) { /* check for address size override */
367 switch ((int)tokval.t_integer) {
368 case S_WORD:
369 result->oprs[operand].addr_size = 16;
370 break;
371 case S_DWORD:
372 case S_LONG:
373 result->oprs[operand].addr_size = 32;
374 break;
375 default:
376 error (ERR_NONFATAL, "invalid size specification in"
377 " effective address");
379 i = nexttoken();
381 } else /* immediate operand, or register */
382 mref = FALSE;
384 eval_reset();
386 value = evaluate (critical);
387 if (!value) { /* error in evaluator */
388 result->opcode = -1; /* unrecoverable parse error: */
389 return result; /* ignore this instruction */
391 if (i == ':' && mref) { /* it was seg:offset */
392 seg = value; /* so shift this into the segment */
393 i = nexttoken(); /* then skip the colon */
394 if (i == TOKEN_SPECIAL) { /* another check for size override */
395 switch ((int)tokval.t_integer) {
396 case S_WORD:
397 result->oprs[operand].addr_size = 16;
398 break;
399 case S_DWORD:
400 case S_LONG:
401 result->oprs[operand].addr_size = 32;
402 break;
403 default:
404 error (ERR_NONFATAL, "invalid size specification in"
405 " effective address");
407 i = nexttoken();
409 value = evaluate (critical);
410 /* and get the offset */
411 if (!value) { /* but, error in evaluator */
412 result->opcode = -1; /* unrecoverable parse error: */
413 return result; /* ignore this instruction */
415 } else seg = NULL;
416 if (mref) { /* find ] at the end */
417 if (i != ']') {
418 error (ERR_NONFATAL, "parser: expecting ]");
419 do { /* error recovery again */
420 i = nexttoken();
421 } while (i != 0 && i != ',');
422 } else /* we got the required ] */
423 i = nexttoken();
424 } else { /* immediate operand */
425 if (i != 0 && i != ',' && i != ':') {
426 error (ERR_NONFATAL, "comma or end of line expected");
427 do { /* error recovery */
428 i = nexttoken();
429 } while (i != 0 && i != ',');
430 } else if (i == ':') {
431 result->oprs[operand].type |= COLON;
435 /* now convert the exprs returned from evaluate() into operand
436 * descriptions... */
438 if (mref) { /* it's a memory reference */
439 expr *e = value;
440 int b, i, s; /* basereg, indexreg, scale */
441 long o; /* offset */
443 if (seg) { /* segment override */
444 if (seg[1].type!=0 || seg->value!=1 ||
445 REG_SREG & ~reg_flags[seg->type])
446 error (ERR_NONFATAL, "invalid segment override");
447 else if (result->nprefix == MAXPREFIX)
448 error (ERR_NONFATAL,
449 "instruction has more than %d prefixes",
450 MAXPREFIX);
451 else
452 result->prefixes[result->nprefix++] = seg->type;
455 b = i = -1, o = s = 0;
457 if (e->type < EXPR_SIMPLE) { /* this bit's a register */
458 if (e->value == 1) /* in fact it can be basereg */
459 b = e->type;
460 else /* no, it has to be indexreg */
461 i = e->type, s = e->value;
462 e++;
464 if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */
465 if (e->value != 1) { /* it has to be indexreg */
466 if (i != -1) { /* but it can't be */
467 error(ERR_NONFATAL, "invalid effective address");
468 result->opcode = -1;
469 return result;
470 } else
471 i = e->type, s = e->value;
472 } else { /* it can be basereg */
473 if (b != -1) /* or can it? */
474 i = e->type, s = 1;
475 else
476 b = e->type;
478 e++;
480 if (e->type != 0) { /* is there an offset? */
481 if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */
482 error (ERR_NONFATAL, "invalid effective address");
483 result->opcode = -1;
484 return result;
485 } else {
486 if (e->type == EXPR_SIMPLE) {
487 o = e->value;
488 e++;
490 if (e->type == EXPR_WRT) {
491 result->oprs[operand].wrt = e->value;
492 e++;
493 } else
494 result->oprs[operand].wrt = NO_SEG;
495 if (e->type != 0) { /* is there a segment id? */
496 if (e->type < EXPR_SEGBASE) {
497 error (ERR_NONFATAL,
498 "invalid effective address");
499 result->opcode = -1;
500 return result;
501 } else
502 result->oprs[operand].segment = (e->type -
503 EXPR_SEGBASE);
504 e++;
505 } else
506 result->oprs[operand].segment = NO_SEG;
508 } else {
509 o = 0;
510 result->oprs[operand].wrt = NO_SEG;
511 result->oprs[operand].segment = NO_SEG;
514 if (e->type != 0) { /* there'd better be nothing left! */
515 error (ERR_NONFATAL, "invalid effective address");
516 result->opcode = -1;
517 return result;
520 result->oprs[operand].type |= MEMORY;
521 if (b==-1 && (i==-1 || s==0))
522 result->oprs[operand].type |= MEM_OFFS;
523 result->oprs[operand].basereg = b;
524 result->oprs[operand].indexreg = i;
525 result->oprs[operand].scale = s;
526 result->oprs[operand].offset = o;
527 } else { /* it's not a memory reference */
528 if (is_reloc(value)) { /* it's immediate */
529 result->oprs[operand].type |= IMMEDIATE;
530 result->oprs[operand].offset = reloc_value(value);
531 result->oprs[operand].segment = reloc_seg(value);
532 result->oprs[operand].wrt = reloc_wrt(value);
533 if (is_simple(value) && reloc_value(value)==1)
534 result->oprs[operand].type |= UNITY;
535 } else { /* it's a register */
536 if (value->type>=EXPR_SIMPLE || value->value!=1) {
537 error (ERR_NONFATAL, "invalid operand type");
538 result->opcode = -1;
539 return result;
541 /* clear overrides, except TO which applies to FPU regs */
542 result->oprs[operand].type &= TO;
543 result->oprs[operand].type |= REGISTER;
544 result->oprs[operand].type |= reg_flags[value->type];
545 result->oprs[operand].basereg = value->type;
550 result->operands = operand; /* set operand count */
552 while (operand<3) /* clear remaining operands */
553 result->oprs[operand++].type = 0;
556 * Transform RESW, RESD, RESQ, REST into RESB.
558 switch (result->opcode) {
559 case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
560 case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
561 case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
562 case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
565 return result;
568 static int is_comma_next (void) {
569 char *p;
571 p = bufptr;
572 while (isspace(*p)) p++;
573 return (*p == ',' || *p == ';' || !*p);
576 /* isidstart matches any character that may start an identifier, and isidchar
577 * matches any character that may appear at places other than the start of an
578 * identifier. E.g. a period may only appear at the start of an identifier
579 * (for local labels), whereas a number may appear anywhere *but* at the
580 * start. */
582 #define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' )
583 #define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \
584 || (c)=='@' || (c)=='~' )
586 /* Ditto for numeric constants. */
588 #define isnumstart(c) ( isdigit(c) || (c)=='$' )
589 #define isnumchar(c) ( isalnum(c) )
591 /* This returns the numeric value of a given 'digit'. */
593 #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
596 * This tokeniser routine has only one side effect, that of
597 * updating `bufptr'. Hence by saving `bufptr', lookahead may be
598 * performed.
601 static int nexttoken (void) {
602 char ourcopy[256], *r, *s;
604 while (isspace(*bufptr)) bufptr++;
605 if (!*bufptr) return 0;
607 /* we have a token; either an id, a number or a char */
608 if (isidstart(*bufptr) ||
609 (*bufptr == '$' && isidstart(bufptr[1]))) {
610 /* now we've got an identifier */
611 int i;
612 int is_sym = FALSE;
614 if (*bufptr == '$') {
615 is_sym = TRUE;
616 bufptr++;
619 tokval.t_charptr = q;
620 *q++ = *bufptr++;
621 while (isidchar(*bufptr)) *q++ = *bufptr++;
622 *q++ = '\0';
623 for (s=tokval.t_charptr, r=ourcopy; *s; s++)
624 *r++ = tolower (*s);
625 *r = '\0';
626 if (is_sym)
627 return TOKEN_ID; /* bypass all other checks */
628 /* right, so we have an identifier sitting in temp storage. now,
629 * is it actually a register or instruction name, or what? */
630 if ((tokval.t_integer=bsi(ourcopy, reg_names,
631 elements(reg_names)))>=0)
632 return TOKEN_REG;
633 if ((tokval.t_integer=bsi(ourcopy, insn_names,
634 elements(insn_names)))>=0)
635 return TOKEN_INSN;
636 for (i=0; i<elements(icn); i++)
637 if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
638 char *p = ourcopy + strlen(icn[i]);
639 tokval.t_integer = ico[i];
640 if ((tokval.t_inttwo=bsi(p, conditions,
641 elements(conditions)))>=0)
642 return TOKEN_INSN;
644 if ((tokval.t_integer=bsi(ourcopy, prefix_names,
645 elements(prefix_names)))>=0) {
646 tokval.t_integer += PREFIX_ENUM_START;
647 return TOKEN_PREFIX;
649 if ((tokval.t_integer=bsi(ourcopy, special_names,
650 elements(special_names)))>=0)
651 return TOKEN_SPECIAL;
652 if (!strcmp(ourcopy, "seg"))
653 return TOKEN_SEG;
654 if (!strcmp(ourcopy, "wrt"))
655 return TOKEN_WRT;
656 return TOKEN_ID;
657 } else if (*bufptr == '$' && !isnumchar(bufptr[1])) {
659 * It's a $ sign with no following hex number; this must
660 * mean it's a Here token ($), evaluating to the current
661 * assembly location, or a Base token ($$), evaluating to
662 * the base of the current segment.
664 bufptr++;
665 if (*bufptr == '$') {
666 bufptr++;
667 return TOKEN_BASE;
669 return TOKEN_HERE;
670 } else if (isnumstart(*bufptr)) { /* now we've got a number */
671 char *r = q;
672 int rn_error;
674 *q++ = *bufptr++;
675 while (isnumchar(*bufptr)) {
676 *q++ = *bufptr++;
678 if (*bufptr == '.') {
680 * a floating point constant
682 *q++ = *bufptr++;
683 while (isnumchar(*bufptr)) {
684 *q++ = *bufptr++;
686 *q++ = '\0';
687 tokval.t_charptr = r;
688 return TOKEN_FLOAT;
690 *q++ = '\0';
691 tokval.t_integer = readnum(r, &rn_error);
692 if (rn_error)
693 return TOKEN_ERRNUM; /* some malformation occurred */
694 tokval.t_charptr = NULL;
695 return TOKEN_NUM;
696 } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */
697 char quote = *bufptr++, *r;
698 r = tokval.t_charptr = bufptr;
699 while (*bufptr && *bufptr != quote) bufptr++;
700 tokval.t_inttwo = bufptr - r; /* store full version */
701 if (!*bufptr)
702 return TOKEN_ERRNUM; /* unmatched quotes */
703 tokval.t_integer = 0;
704 r = bufptr++; /* skip over final quote */
705 while (quote != *--r) {
706 tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r;
708 return TOKEN_NUM;
709 } else if (*bufptr == ';') { /* a comment has happened - stay */
710 return 0;
711 } else if ((*bufptr == '>' || *bufptr == '<' ||
712 *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) {
713 bufptr += 2;
714 return (bufptr[-2] == '>' ? TOKEN_SHR :
715 bufptr[-2] == '<' ? TOKEN_SHL :
716 bufptr[-2] == '/' ? TOKEN_SDIV :
717 TOKEN_SMOD);
718 } else /* just an ordinary char */
719 return (unsigned char) (*bufptr++);
722 /* return index of "string" in "array", or -1 if no match. */
723 static int bsi (char *string, char **array, int size) {
724 int i = -1, j = size; /* always, i < index < j */
725 while (j-i >= 2) {
726 int k = (i+j)/2;
727 int l = strcmp(string, array[k]);
728 if (l<0) /* it's in the first half */
729 j = k;
730 else if (l>0) /* it's in the second half */
731 i = k;
732 else /* we've got it :) */
733 return k;
735 return -1; /* we haven't got it :( */
738 void cleanup_insn (insn *i) {
739 extop *e;
741 while (i->eops) {
742 e = i->eops;
743 i->eops = i->eops->next;
744 nasm_free (e);
748 /* ------------- Evaluator begins here ------------------ */
750 static expr exprtempstorage[1024], *tempptr; /* store exprs in here */
753 * Add two vector datatypes. We have some bizarre behaviour on far-
754 * absolute segment types: we preserve them during addition _only_
755 * if one of the segments is a truly pure scalar.
757 static expr *add_vectors(expr *p, expr *q) {
758 expr *r = tempptr;
759 int preserve;
761 preserve = is_really_simple(p) || is_really_simple(q);
763 while (p->type && q->type &&
764 p->type < EXPR_SEGBASE+SEG_ABS &&
765 q->type < EXPR_SEGBASE+SEG_ABS)
766 if (p->type > q->type) {
767 tempptr->type = q->type;
768 tempptr->value = q->value;
769 tempptr++, q++;
770 } else if (p->type < q->type) {
771 tempptr->type = p->type;
772 tempptr->value = p->value;
773 tempptr++, p++;
774 } else { /* *p and *q have same type */
775 tempptr->type = p->type;
776 tempptr->value = p->value + q->value;
777 tempptr++, p++, q++;
779 while (p->type &&
780 (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) {
781 tempptr->type = p->type;
782 tempptr->value = p->value;
783 tempptr++, p++;
785 while (q->type &&
786 (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) {
787 tempptr->type = q->type;
788 tempptr->value = q->value;
789 tempptr++, q++;
791 (tempptr++)->type = 0;
793 return r;
797 * Multiply a vector by a scalar. Strip far-absolute segment part
798 * if present.
800 static expr *scalar_mult(expr *vect, long scalar) {
801 expr *p = vect;
803 while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) {
804 p->value = scalar * (p->value);
805 p++;
807 p->type = 0;
809 return vect;
812 static expr *scalarvect (long scalar) {
813 expr *p = tempptr;
814 tempptr->type = EXPR_SIMPLE;
815 tempptr->value = scalar;
816 tempptr++;
817 tempptr->type = 0;
818 tempptr++;
819 return p;
823 * Return TRUE if the argument is a simple scalar. (Or a far-
824 * absolute, which counts.)
826 static int is_simple (expr *vect) {
827 while (vect->type && !vect->value)
828 vect++;
829 if (!vect->type)
830 return 1;
831 if (vect->type != EXPR_SIMPLE)
832 return 0;
833 do {
834 vect++;
835 } while (vect->type && !vect->value);
836 if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0;
837 return 1;
841 * Return TRUE if the argument is a simple scalar, _NOT_ a far-
842 * absolute.
844 static int is_really_simple (expr *vect) {
845 while (vect->type && !vect->value)
846 vect++;
847 if (!vect->type)
848 return 1;
849 if (vect->type != EXPR_SIMPLE)
850 return 0;
851 do {
852 vect++;
853 } while (vect->type && !vect->value);
854 if (vect->type) return 0;
855 return 1;
859 * Return TRUE if the argument is relocatable (i.e. a simple
860 * scalar, plus at most one segment-base, plus possibly a WRT).
862 static int is_reloc (expr *vect) {
863 while (vect->type && !vect->value)
864 vect++;
865 if (!vect->type)
866 return 1;
867 if (vect->type < EXPR_SIMPLE)
868 return 0;
869 if (vect->type == EXPR_SIMPLE) {
870 do {
871 vect++;
872 } while (vect->type && !vect->value);
873 if (!vect->type)
874 return 1;
876 do {
877 vect++;
878 } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
879 if (!vect->type)
880 return 1;
881 return 1;
885 * Return the scalar part of a relocatable vector. (Including
886 * simple scalar vectors - those qualify as relocatable.)
888 static long reloc_value (expr *vect) {
889 while (vect->type && !vect->value)
890 vect++;
891 if (!vect->type) return 0;
892 if (vect->type == EXPR_SIMPLE)
893 return vect->value;
894 else
895 return 0;
899 * Return the segment number of a relocatable vector, or NO_SEG for
900 * simple scalars.
902 static long reloc_seg (expr *vect) {
903 while (vect->type && (vect->type == EXPR_WRT || !vect->value))
904 vect++;
905 if (vect->type == EXPR_SIMPLE) {
906 do {
907 vect++;
908 } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
910 if (!vect->type)
911 return NO_SEG;
912 else
913 return vect->type - EXPR_SEGBASE;
917 * Return the WRT segment number of a relocatable vector, or NO_SEG
918 * if no WRT part is present.
920 static long reloc_wrt (expr *vect) {
921 while (vect->type && vect->type < EXPR_WRT)
922 vect++;
923 if (vect->type == EXPR_WRT) {
924 return vect->value;
925 } else
926 return NO_SEG;
929 static void eval_reset(void) {
930 tempptr = exprtempstorage; /* initialise temporary storage */
934 * The SEG operator: calculate the segment part of a relocatable
935 * value. Return NULL, as usual, if an error occurs. Report the
936 * error too.
938 static expr *segment_part (expr *e) {
939 long seg;
941 if (!is_reloc(e)) {
942 error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
943 return NULL;
946 seg = reloc_seg(e);
947 if (seg == NO_SEG) {
948 error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
949 return NULL;
950 } else if (seg & SEG_ABS)
951 return scalarvect(seg & ~SEG_ABS);
952 else {
953 expr *f = tempptr++;
954 tempptr++->type = 0;
955 f->type = EXPR_SEGBASE+outfmt->segbase(seg+1);
956 f->value = 1;
957 return f;
962 * Recursive-descent parser. Called with a single boolean operand,
963 * which is TRUE if the evaluation is critical (i.e. unresolved
964 * symbols are an error condition). Must update the global `i' to
965 * reflect the token after the parsed string. May return NULL.
967 * evaluate() should report its own errors: on return it is assumed
968 * that if NULL has been returned, the error has already been
969 * reported.
973 * Grammar parsed is:
975 * expr : expr0 [ WRT expr6 ]
976 * expr0 : expr1 [ {|} expr1]
977 * expr1 : expr2 [ {^} expr2]
978 * expr2 : expr3 [ {&} expr3]
979 * expr3 : expr4 [ {<<,>>} expr4...]
980 * expr4 : expr5 [ {+,-} expr5...]
981 * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
982 * expr6 : { ~,+,-,SEG } expr6
983 * | (expr0)
984 * | symbol
985 * | $
986 * | number
989 static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int);
990 static expr *expr4(int), *expr5(int), *expr6(int);
992 static expr *expr0(int critical) {
993 expr *e, *f;
995 e = expr1(critical);
996 if (!e)
997 return NULL;
998 while (i == '|') {
999 i = nexttoken();
1000 f = expr1(critical);
1001 if (!f)
1002 return NULL;
1003 if (!is_simple(e) || !is_simple(f)) {
1004 error(ERR_NONFATAL, "`|' operator may only be applied to"
1005 " scalar values");
1007 e = scalarvect (reloc_value(e) | reloc_value(f));
1009 return e;
1012 static expr *expr1(int critical) {
1013 expr *e, *f;
1015 e = expr2(critical);
1016 if (!e)
1017 return NULL;
1018 while (i == '^') {
1019 i = nexttoken();
1020 f = expr2(critical);
1021 if (!f)
1022 return NULL;
1023 if (!is_simple(e) || !is_simple(f)) {
1024 error(ERR_NONFATAL, "`^' operator may only be applied to"
1025 " scalar values");
1027 e = scalarvect (reloc_value(e) ^ reloc_value(f));
1029 return e;
1032 static expr *expr2(int critical) {
1033 expr *e, *f;
1035 e = expr3(critical);
1036 if (!e)
1037 return NULL;
1038 while (i == '&') {
1039 i = nexttoken();
1040 f = expr3(critical);
1041 if (!f)
1042 return NULL;
1043 if (!is_simple(e) || !is_simple(f)) {
1044 error(ERR_NONFATAL, "`&' operator may only be applied to"
1045 " scalar values");
1047 e = scalarvect (reloc_value(e) & reloc_value(f));
1049 return e;
1052 static expr *expr3(int critical) {
1053 expr *e, *f;
1055 e = expr4(critical);
1056 if (!e)
1057 return NULL;
1058 while (i == TOKEN_SHL || i == TOKEN_SHR) {
1059 int j = i;
1060 i = nexttoken();
1061 f = expr4(critical);
1062 if (!f)
1063 return NULL;
1064 if (!is_simple(e) || !is_simple(f)) {
1065 error(ERR_NONFATAL, "shift operator may only be applied to"
1066 " scalar values");
1068 switch (j) {
1069 case TOKEN_SHL:
1070 e = scalarvect (reloc_value(e) << reloc_value(f));
1071 break;
1072 case TOKEN_SHR:
1073 e = scalarvect (((unsigned long)reloc_value(e)) >>
1074 reloc_value(f));
1075 break;
1078 return e;
1081 static expr *expr4(int critical) {
1082 expr *e, *f;
1084 e = expr5(critical);
1085 if (!e)
1086 return NULL;
1087 while (i == '+' || i == '-') {
1088 int j = i;
1089 i = nexttoken();
1090 f = expr5(critical);
1091 if (!f)
1092 return NULL;
1093 switch (j) {
1094 case '+':
1095 e = add_vectors (e, f);
1096 break;
1097 case '-':
1098 e = add_vectors (e, scalar_mult(f, -1L));
1099 break;
1102 return e;
1105 static expr *expr5(int critical) {
1106 expr *e, *f;
1108 e = expr6(critical);
1109 if (!e)
1110 return NULL;
1111 while (i == '*' || i == '/' || i == '*' ||
1112 i == TOKEN_SDIV || i == TOKEN_SMOD) {
1113 int j = i;
1114 i = nexttoken();
1115 f = expr6(critical);
1116 if (!f)
1117 return NULL;
1118 if (j != '*' && (!is_simple(e) || !is_simple(f))) {
1119 error(ERR_NONFATAL, "division operator may only be applied to"
1120 " scalar values");
1121 return NULL;
1123 if (j != '*' && reloc_value(f) == 0) {
1124 error(ERR_NONFATAL, "division by zero");
1125 return NULL;
1127 switch (j) {
1128 case '*':
1129 if (is_simple(e))
1130 e = scalar_mult (f, reloc_value(e));
1131 else if (is_simple(f))
1132 e = scalar_mult (e, reloc_value(f));
1133 else {
1134 error(ERR_NONFATAL, "unable to multiply two "
1135 "non-scalar objects");
1136 return NULL;
1138 break;
1139 case '/':
1140 e = scalarvect (((unsigned long)reloc_value(e)) /
1141 ((unsigned long)reloc_value(f)));
1142 break;
1143 case '%':
1144 e = scalarvect (((unsigned long)reloc_value(e)) %
1145 ((unsigned long)reloc_value(f)));
1146 break;
1147 case TOKEN_SDIV:
1148 e = scalarvect (((signed long)reloc_value(e)) /
1149 ((signed long)reloc_value(f)));
1150 break;
1151 case TOKEN_SMOD:
1152 e = scalarvect (((signed long)reloc_value(e)) %
1153 ((signed long)reloc_value(f)));
1154 break;
1157 return e;
1160 static expr *expr6(int critical) {
1161 expr *e;
1162 long label_seg, label_ofs;
1164 if (i == '-') {
1165 i = nexttoken();
1166 e = expr6(critical);
1167 if (!e)
1168 return NULL;
1169 return scalar_mult (e, -1L);
1170 } else if (i == '+') {
1171 i = nexttoken();
1172 return expr6(critical);
1173 } else if (i == '~') {
1174 i = nexttoken();
1175 e = expr6(critical);
1176 if (!e)
1177 return NULL;
1178 if (!is_simple(e)) {
1179 error(ERR_NONFATAL, "`~' operator may only be applied to"
1180 " scalar values");
1181 return NULL;
1183 return scalarvect(~reloc_value(e));
1184 } else if (i == TOKEN_SEG) {
1185 i = nexttoken();
1186 e = expr6(critical);
1187 if (!e)
1188 return NULL;
1189 return segment_part(e);
1190 } else if (i == '(') {
1191 i = nexttoken();
1192 e = expr0(critical);
1193 if (!e)
1194 return NULL;
1195 if (i != ')') {
1196 error(ERR_NONFATAL, "expecting `)'");
1197 return NULL;
1199 i = nexttoken();
1200 return e;
1201 } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID ||
1202 i == TOKEN_HERE || i == TOKEN_BASE) {
1203 e = tempptr;
1204 switch (i) {
1205 case TOKEN_NUM:
1206 e->type = EXPR_SIMPLE;
1207 e->value = tokval.t_integer;
1208 break;
1209 case TOKEN_REG:
1210 e->type = tokval.t_integer;
1211 e->value = 1;
1212 break;
1213 case TOKEN_ID:
1214 case TOKEN_HERE:
1215 case TOKEN_BASE:
1217 * Since the whole line is parsed before the label it
1218 * defines is given to the label manager, we have
1219 * problems with lines such as
1221 * end: TIMES 512-(end-start) DB 0
1223 * where `end' is not known on pass one, despite not
1224 * really being a forward reference, and due to
1225 * criticality it is _needed_. Hence we check our label
1226 * against the currently defined one, and do our own
1227 * resolution of it if we have to.
1229 if (i == TOKEN_BASE) {
1230 label_seg = seg;
1231 label_ofs = 0;
1232 } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) {
1233 label_seg = seg;
1234 label_ofs = ofs;
1235 } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) {
1236 if (critical == 2) {
1237 error (ERR_NONFATAL, "symbol `%s' undefined",
1238 tokval.t_charptr);
1239 return NULL;
1240 } else if (critical == 1) {
1241 error (ERR_NONFATAL, "symbol `%s' not defined before use",
1242 tokval.t_charptr);
1243 return NULL;
1244 } else {
1245 label_seg = seg;
1246 label_ofs = ofs;
1249 e->type = EXPR_SIMPLE;
1250 e->value = label_ofs;
1251 if (label_seg!=NO_SEG) {
1252 tempptr++;
1253 tempptr->type = EXPR_SEGBASE + label_seg;
1254 tempptr->value = 1;
1256 break;
1258 tempptr++;
1259 tempptr->type = 0;
1260 tempptr++;
1261 i = nexttoken();
1262 return e;
1263 } else {
1264 error(ERR_NONFATAL, "expression syntax error");
1265 return NULL;
1269 static expr *evaluate (int critical) {
1270 expr *e;
1271 expr *f = NULL;
1273 e = expr0 (critical);
1274 if (!e)
1275 return NULL;
1277 if (i == TOKEN_WRT) {
1278 if (!is_reloc(e)) {
1279 error(ERR_NONFATAL, "invalid left-hand operand to WRT");
1280 return NULL;
1282 i = nexttoken(); /* eat the WRT */
1283 f = expr6 (critical);
1284 if (!f)
1285 return NULL;
1287 e = scalar_mult (e, 1L); /* strip far-absolute segment part */
1288 if (f) {
1289 expr *g = tempptr++;
1290 tempptr++->type = 0;
1291 g->type = EXPR_WRT;
1292 if (!is_reloc(f)) {
1293 error(ERR_NONFATAL, "invalid right-hand operand to WRT");
1294 return NULL;
1296 g->value = reloc_seg(f);
1297 if (g->value == NO_SEG)
1298 g->value = reloc_value(f) | SEG_ABS;
1299 else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) {
1300 error(ERR_NONFATAL, "invalid right-hand operand to WRT");
1301 return NULL;
1303 e = add_vectors (e, g);
1305 return e;