1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
25 static long reg_flags
[] = { /* sizes and special flags */
26 0, REG8
, REG_AL
, REG_AX
, REG8
, REG8
, REG16
, REG16
, REG8
, REG_CL
,
27 REG_CREG
, REG_CREG
, REG_CREG
, REG_CR4
, REG_CS
, REG_CX
, REG8
,
28 REG16
, REG8
, REG_DREG
, REG_DREG
, REG_DREG
, REG_DREG
, REG_DREG
,
29 REG_DREG
, REG_DESS
, REG_DX
, REG_EAX
, REG32
, REG32
, REG_ECX
,
30 REG32
, REG32
, REG_DESS
, REG32
, REG32
, REG_FSGS
, REG_FSGS
,
31 MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
,
32 REG16
, REG16
, REG_DESS
, FPU0
, FPUREG
, FPUREG
, FPUREG
, FPUREG
,
33 FPUREG
, FPUREG
, FPUREG
, REG_TREG
, REG_TREG
, REG_TREG
, REG_TREG
,
37 enum { /* special tokens */
38 S_BYTE
, S_DWORD
, S_FAR
, S_LONG
, S_NEAR
, S_QWORD
, S_SHORT
, S_TO
,
42 static char *special_names
[] = { /* and the actual text */
43 "byte", "dword", "far", "long", "near", "qword", "short", "to",
47 static char *prefix_names
[] = {
48 "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
49 "repnz", "repz", "times"
53 * Evaluator datatype. Expressions, within the evaluator, are
54 * stored as an array of these beasts, terminated by a record with
55 * type==0. Mostly, it's a vector type: each type denotes some kind
56 * of a component, and the value denotes the multiple of that
57 * component present in the expression. The exception is the WRT
58 * type, whose `value' field denotes the segment to which the
59 * expression is relative. These segments will be segment-base
60 * types, i.e. either odd segment values or SEG_ABS types. So it is
61 * still valid to assume that anything with a `value' field of zero
65 long type
; /* a register, or EXPR_xxx */
66 long value
; /* must be >= 32 bits */
69 static void eval_reset(void);
70 static expr
*evaluate(int);
73 * ASSUMPTION MADE HERE. The number of distinct register names
74 * (i.e. possible "type" fields for an expr structure) does not
77 #define EXPR_SIMPLE 126
79 #define EXPR_SEGBASE 128
81 static int is_reloc(expr
*);
82 static int is_simple(expr
*);
83 static int is_really_simple (expr
*);
84 static long reloc_value(expr
*);
85 static long reloc_seg(expr
*);
86 static long reloc_wrt(expr
*);
88 enum { /* token types, other than chars */
89 TOKEN_ID
= 256, TOKEN_NUM
, TOKEN_REG
, TOKEN_INSN
, TOKEN_ERRNUM
,
90 TOKEN_HERE
, TOKEN_BASE
, TOKEN_SPECIAL
, TOKEN_PREFIX
, TOKEN_SHL
,
91 TOKEN_SHR
, TOKEN_SDIV
, TOKEN_SMOD
, TOKEN_SEG
, TOKEN_WRT
,
96 long t_integer
, t_inttwo
;
100 static char tempstorage
[1024], *q
;
101 static int bsi (char *string
, char **array
, int size
);/* binary search */
103 static int nexttoken (void);
104 static int is_comma_next (void);
108 static struct tokenval tokval
;
109 static lfunc labelfunc
;
112 static struct ofmt
*outfmt
;
114 static long seg
, ofs
;
116 insn
*parse_line (long segment
, long offset
, lfunc lookup_label
, int pass
,
117 char *buffer
, insn
*result
, struct ofmt
*output
,
124 labelfunc
= lookup_label
;
133 result
->eops
= NULL
; /* must do this, whatever happens */
135 if (i
==0) { /* blank line - ignore */
136 result
->label
= NULL
; /* so, no label on it */
137 result
->opcode
= -1; /* and no instruction either */
140 if (i
!= TOKEN_ID
&& i
!= TOKEN_INSN
&& i
!= TOKEN_PREFIX
&&
141 (i
!=TOKEN_REG
|| (REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
142 error (ERR_NONFATAL
, "label or instruction expected"
143 " at start of line");
144 result
->label
= NULL
;
149 if (i
== TOKEN_ID
) { /* there's a label here */
150 label
= result
->label
= tokval
.t_charptr
;
152 if (i
== ':') { /* skip over the optional colon */
155 } else /* no label; so, moving swiftly on */
156 result
->label
= NULL
;
159 result
->opcode
= -1; /* this line contains just a label */
166 while (i
== TOKEN_PREFIX
||
167 (i
==TOKEN_REG
&& !(REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
169 * Handle special case: the TIMES prefix.
171 if (i
== TOKEN_PREFIX
&& tokval
.t_integer
== P_TIMES
) {
176 value
= evaluate (pass
);
177 if (!value
) { /* but, error in evaluator */
178 result
->opcode
= -1; /* unrecoverable parse error: */
179 return result
; /* ignore this instruction */
181 if (!is_simple (value
)) {
183 "non-constant argument supplied to TIMES");
186 result
->times
= value
->value
;
188 if (result
->nprefix
== MAXPREFIX
)
190 "instruction has more than %d prefixes", MAXPREFIX
);
192 result
->prefixes
[result
->nprefix
++] = tokval
.t_integer
;
197 if (i
!= TOKEN_INSN
) {
198 error (ERR_NONFATAL
, "parser: instruction expected");
203 result
->opcode
= tokval
.t_integer
;
204 result
->condition
= tokval
.t_inttwo
;
207 * RESB, RESW and RESD cannot be satisfied with incorrectly
208 * evaluated operands, since the correct values _must_ be known
209 * on the first pass. Hence, even in pass one, we set the
210 * `critical' flag on calling evaluate(), so that it will bomb
211 * out on undefined symbols. Nasty, but there's nothing we can
214 * For the moment, EQU has the same difficulty, so we'll
217 if (result
->opcode
== I_RESB
||
218 result
->opcode
== I_RESW
||
219 result
->opcode
== I_RESD
||
220 result
->opcode
== I_RESQ
||
221 result
->opcode
== I_REST
||
222 result
->opcode
== I_EQU
)
225 critical
= (pass
==2 ? 2 : 0);
227 if (result
->opcode
== I_DB
||
228 result
->opcode
== I_DW
||
229 result
->opcode
== I_DD
||
230 result
->opcode
== I_DQ
||
231 result
->opcode
== I_DT
) {
232 extop
*eop
, **tail
= &result
->eops
;
236 * Begin to read the DB/DW/DD/DQ/DT operands.
242 eop
= *tail
= nasm_malloc(sizeof(extop
));
245 eop
->type
= EOT_NOTHING
;
248 if (i
== TOKEN_NUM
&& tokval
.t_charptr
&& is_comma_next()) {
249 eop
->type
= EOT_DB_STRING
;
250 eop
->stringval
= tokval
.t_charptr
;
251 eop
->stringlen
= tokval
.t_inttwo
;
252 i
= nexttoken(); /* eat the comma */
256 if (i
== TOKEN_FLOAT
|| i
== '-') {
263 if (i
!= TOKEN_FLOAT
) {
269 if (i
== TOKEN_FLOAT
) {
270 eop
->type
= EOT_DB_STRING
;
272 if (result
->opcode
== I_DD
)
274 else if (result
->opcode
== I_DQ
)
276 else if (result
->opcode
== I_DT
)
279 error(ERR_NONFATAL
, "floating-point constant"
280 " encountered in `D%c' instruction",
281 result
->opcode
== I_DW
? 'W' : 'B');
282 eop
->type
= EOT_NOTHING
;
285 if (!float_const (tokval
.t_charptr
, sign
,
286 (unsigned char *)eop
->stringval
,
287 eop
->stringlen
, error
))
288 eop
->type
= EOT_NOTHING
;
289 i
= nexttoken(); /* eat the comma */
294 /* anything else */ {
297 value
= evaluate (critical
);
298 if (!value
) { /* but, error in evaluator */
299 result
->opcode
= -1;/* unrecoverable parse error: */
300 return result
; /* ignore this instruction */
302 if (is_reloc(value
)) {
303 eop
->type
= EOT_DB_NUMBER
;
304 eop
->offset
= reloc_value(value
);
305 eop
->segment
= reloc_seg(value
);
306 eop
->wrt
= reloc_wrt(value
);
309 "`%s' operand %d: expression is not simple"
311 insn_names
[result
->opcode
], oper_num
);
318 /* right. Now we begin to parse the operands. There may be up to three
319 * of these, separated by commas, and terminated by a zero token. */
321 for (operand
= 0; operand
< 3; operand
++) {
322 expr
*seg
, *value
; /* used most of the time */
323 int mref
; /* is this going to be a memory ref? */
325 result
->oprs
[operand
].addr_size
= 0;/* have to zero this whatever */
327 if (i
== 0) break; /* end of operands: get out of here */
328 result
->oprs
[operand
].type
= 0; /* so far, no override */
329 while (i
== TOKEN_SPECIAL
) {/* size specifiers */
330 switch ((int)tokval
.t_integer
) {
332 result
->oprs
[operand
].type
|= BITS8
;
335 result
->oprs
[operand
].type
|= BITS16
;
339 result
->oprs
[operand
].type
|= BITS32
;
342 result
->oprs
[operand
].type
|= BITS64
;
345 result
->oprs
[operand
].type
|= BITS80
;
348 result
->oprs
[operand
].type
|= TO
;
351 result
->oprs
[operand
].type
|= FAR
;
354 result
->oprs
[operand
].type
|= NEAR
;
357 result
->oprs
[operand
].type
|= SHORT
;
363 if (i
== '[') { /* memory reference */
366 if (i
== TOKEN_SPECIAL
) { /* check for address size override */
367 switch ((int)tokval
.t_integer
) {
369 result
->oprs
[operand
].addr_size
= 16;
373 result
->oprs
[operand
].addr_size
= 32;
376 error (ERR_NONFATAL
, "invalid size specification in"
377 " effective address");
381 } else /* immediate operand, or register */
386 value
= evaluate (critical
);
387 if (!value
) { /* error in evaluator */
388 result
->opcode
= -1; /* unrecoverable parse error: */
389 return result
; /* ignore this instruction */
391 if (i
== ':' && mref
) { /* it was seg:offset */
392 seg
= value
; /* so shift this into the segment */
393 i
= nexttoken(); /* then skip the colon */
394 if (i
== TOKEN_SPECIAL
) { /* another check for size override */
395 switch ((int)tokval
.t_integer
) {
397 result
->oprs
[operand
].addr_size
= 16;
401 result
->oprs
[operand
].addr_size
= 32;
404 error (ERR_NONFATAL
, "invalid size specification in"
405 " effective address");
409 value
= evaluate (critical
);
410 /* and get the offset */
411 if (!value
) { /* but, error in evaluator */
412 result
->opcode
= -1; /* unrecoverable parse error: */
413 return result
; /* ignore this instruction */
416 if (mref
) { /* find ] at the end */
418 error (ERR_NONFATAL
, "parser: expecting ]");
419 do { /* error recovery again */
421 } while (i
!= 0 && i
!= ',');
422 } else /* we got the required ] */
424 } else { /* immediate operand */
425 if (i
!= 0 && i
!= ',' && i
!= ':') {
426 error (ERR_NONFATAL
, "comma or end of line expected");
427 do { /* error recovery */
429 } while (i
!= 0 && i
!= ',');
430 } else if (i
== ':') {
431 result
->oprs
[operand
].type
|= COLON
;
435 /* now convert the exprs returned from evaluate() into operand
438 if (mref
) { /* it's a memory reference */
440 int b
, i
, s
; /* basereg, indexreg, scale */
443 if (seg
) { /* segment override */
444 if (seg
[1].type
!=0 || seg
->value
!=1 ||
445 REG_SREG
& ~reg_flags
[seg
->type
])
446 error (ERR_NONFATAL
, "invalid segment override");
447 else if (result
->nprefix
== MAXPREFIX
)
449 "instruction has more than %d prefixes",
452 result
->prefixes
[result
->nprefix
++] = seg
->type
;
455 b
= i
= -1, o
= s
= 0;
457 if (e
->type
< EXPR_SIMPLE
) { /* this bit's a register */
458 if (e
->value
== 1) /* in fact it can be basereg */
460 else /* no, it has to be indexreg */
461 i
= e
->type
, s
= e
->value
;
464 if (e
->type
&& e
->type
< EXPR_SIMPLE
) {/* it's a second register */
465 if (e
->value
!= 1) { /* it has to be indexreg */
466 if (i
!= -1) { /* but it can't be */
467 error(ERR_NONFATAL
, "invalid effective address");
471 i
= e
->type
, s
= e
->value
;
472 } else { /* it can be basereg */
473 if (b
!= -1) /* or can it? */
480 if (e
->type
!= 0) { /* is there an offset? */
481 if (e
->type
< EXPR_SIMPLE
) {/* in fact, is there an error? */
482 error (ERR_NONFATAL
, "invalid effective address");
486 if (e
->type
== EXPR_SIMPLE
) {
490 if (e
->type
== EXPR_WRT
) {
491 result
->oprs
[operand
].wrt
= e
->value
;
494 result
->oprs
[operand
].wrt
= NO_SEG
;
495 if (e
->type
!= 0) { /* is there a segment id? */
496 if (e
->type
< EXPR_SEGBASE
) {
498 "invalid effective address");
502 result
->oprs
[operand
].segment
= (e
->type
-
506 result
->oprs
[operand
].segment
= NO_SEG
;
510 result
->oprs
[operand
].wrt
= NO_SEG
;
511 result
->oprs
[operand
].segment
= NO_SEG
;
514 if (e
->type
!= 0) { /* there'd better be nothing left! */
515 error (ERR_NONFATAL
, "invalid effective address");
520 result
->oprs
[operand
].type
|= MEMORY
;
521 if (b
==-1 && (i
==-1 || s
==0))
522 result
->oprs
[operand
].type
|= MEM_OFFS
;
523 result
->oprs
[operand
].basereg
= b
;
524 result
->oprs
[operand
].indexreg
= i
;
525 result
->oprs
[operand
].scale
= s
;
526 result
->oprs
[operand
].offset
= o
;
527 } else { /* it's not a memory reference */
528 if (is_reloc(value
)) { /* it's immediate */
529 result
->oprs
[operand
].type
|= IMMEDIATE
;
530 result
->oprs
[operand
].offset
= reloc_value(value
);
531 result
->oprs
[operand
].segment
= reloc_seg(value
);
532 result
->oprs
[operand
].wrt
= reloc_wrt(value
);
533 if (is_simple(value
) && reloc_value(value
)==1)
534 result
->oprs
[operand
].type
|= UNITY
;
535 } else { /* it's a register */
536 if (value
->type
>=EXPR_SIMPLE
|| value
->value
!=1) {
537 error (ERR_NONFATAL
, "invalid operand type");
541 /* clear overrides, except TO which applies to FPU regs */
542 result
->oprs
[operand
].type
&= TO
;
543 result
->oprs
[operand
].type
|= REGISTER
;
544 result
->oprs
[operand
].type
|= reg_flags
[value
->type
];
545 result
->oprs
[operand
].basereg
= value
->type
;
550 result
->operands
= operand
; /* set operand count */
552 while (operand
<3) /* clear remaining operands */
553 result
->oprs
[operand
++].type
= 0;
556 * Transform RESW, RESD, RESQ, REST into RESB.
558 switch (result
->opcode
) {
559 case I_RESW
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=2; break;
560 case I_RESD
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=4; break;
561 case I_RESQ
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=8; break;
562 case I_REST
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=10; break;
568 static int is_comma_next (void) {
572 while (isspace(*p
)) p
++;
573 return (*p
== ',' || *p
== ';' || !*p
);
576 /* isidstart matches any character that may start an identifier, and isidchar
577 * matches any character that may appear at places other than the start of an
578 * identifier. E.g. a period may only appear at the start of an identifier
579 * (for local labels), whereas a number may appear anywhere *but* at the
582 #define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' )
583 #define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \
584 || (c)=='@' || (c)=='~' )
586 /* Ditto for numeric constants. */
588 #define isnumstart(c) ( isdigit(c) || (c)=='$' )
589 #define isnumchar(c) ( isalnum(c) )
591 /* This returns the numeric value of a given 'digit'. */
593 #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
596 * This tokeniser routine has only one side effect, that of
597 * updating `bufptr'. Hence by saving `bufptr', lookahead may be
601 static int nexttoken (void) {
602 char ourcopy
[256], *r
, *s
;
604 while (isspace(*bufptr
)) bufptr
++;
605 if (!*bufptr
) return 0;
607 /* we have a token; either an id, a number or a char */
608 if (isidstart(*bufptr
) ||
609 (*bufptr
== '$' && isidstart(bufptr
[1]))) {
610 /* now we've got an identifier */
614 if (*bufptr
== '$') {
619 tokval
.t_charptr
= q
;
621 while (isidchar(*bufptr
)) *q
++ = *bufptr
++;
623 for (s
=tokval
.t_charptr
, r
=ourcopy
; *s
; s
++)
627 return TOKEN_ID
; /* bypass all other checks */
628 /* right, so we have an identifier sitting in temp storage. now,
629 * is it actually a register or instruction name, or what? */
630 if ((tokval
.t_integer
=bsi(ourcopy
, reg_names
,
631 elements(reg_names
)))>=0)
633 if ((tokval
.t_integer
=bsi(ourcopy
, insn_names
,
634 elements(insn_names
)))>=0)
636 for (i
=0; i
<elements(icn
); i
++)
637 if (!strncmp(ourcopy
, icn
[i
], strlen(icn
[i
]))) {
638 char *p
= ourcopy
+ strlen(icn
[i
]);
639 tokval
.t_integer
= ico
[i
];
640 if ((tokval
.t_inttwo
=bsi(p
, conditions
,
641 elements(conditions
)))>=0)
644 if ((tokval
.t_integer
=bsi(ourcopy
, prefix_names
,
645 elements(prefix_names
)))>=0) {
646 tokval
.t_integer
+= PREFIX_ENUM_START
;
649 if ((tokval
.t_integer
=bsi(ourcopy
, special_names
,
650 elements(special_names
)))>=0)
651 return TOKEN_SPECIAL
;
652 if (!strcmp(ourcopy
, "seg"))
654 if (!strcmp(ourcopy
, "wrt"))
657 } else if (*bufptr
== '$' && !isnumchar(bufptr
[1])) {
659 * It's a $ sign with no following hex number; this must
660 * mean it's a Here token ($), evaluating to the current
661 * assembly location, or a Base token ($$), evaluating to
662 * the base of the current segment.
665 if (*bufptr
== '$') {
670 } else if (isnumstart(*bufptr
)) { /* now we've got a number */
675 while (isnumchar(*bufptr
)) {
678 if (*bufptr
== '.') {
680 * a floating point constant
683 while (isnumchar(*bufptr
)) {
687 tokval
.t_charptr
= r
;
691 tokval
.t_integer
= readnum(r
, &rn_error
);
693 return TOKEN_ERRNUM
; /* some malformation occurred */
694 tokval
.t_charptr
= NULL
;
696 } else if (*bufptr
== '\'' || *bufptr
== '"') {/* a char constant */
697 char quote
= *bufptr
++, *r
;
698 r
= tokval
.t_charptr
= bufptr
;
699 while (*bufptr
&& *bufptr
!= quote
) bufptr
++;
700 tokval
.t_inttwo
= bufptr
- r
; /* store full version */
702 return TOKEN_ERRNUM
; /* unmatched quotes */
703 tokval
.t_integer
= 0;
704 r
= bufptr
++; /* skip over final quote */
705 while (quote
!= *--r
) {
706 tokval
.t_integer
= (tokval
.t_integer
<<8) + (unsigned char) *r
;
709 } else if (*bufptr
== ';') { /* a comment has happened - stay */
711 } else if ((*bufptr
== '>' || *bufptr
== '<' ||
712 *bufptr
== '/' || *bufptr
== '%') && bufptr
[1] == *bufptr
) {
714 return (bufptr
[-2] == '>' ? TOKEN_SHR
:
715 bufptr
[-2] == '<' ? TOKEN_SHL
:
716 bufptr
[-2] == '/' ? TOKEN_SDIV
:
718 } else /* just an ordinary char */
719 return (unsigned char) (*bufptr
++);
722 /* return index of "string" in "array", or -1 if no match. */
723 static int bsi (char *string
, char **array
, int size
) {
724 int i
= -1, j
= size
; /* always, i < index < j */
727 int l
= strcmp(string
, array
[k
]);
728 if (l
<0) /* it's in the first half */
730 else if (l
>0) /* it's in the second half */
732 else /* we've got it :) */
735 return -1; /* we haven't got it :( */
738 void cleanup_insn (insn
*i
) {
743 i
->eops
= i
->eops
->next
;
748 /* ------------- Evaluator begins here ------------------ */
750 static expr exprtempstorage
[1024], *tempptr
; /* store exprs in here */
753 * Add two vector datatypes. We have some bizarre behaviour on far-
754 * absolute segment types: we preserve them during addition _only_
755 * if one of the segments is a truly pure scalar.
757 static expr
*add_vectors(expr
*p
, expr
*q
) {
761 preserve
= is_really_simple(p
) || is_really_simple(q
);
763 while (p
->type
&& q
->type
&&
764 p
->type
< EXPR_SEGBASE
+SEG_ABS
&&
765 q
->type
< EXPR_SEGBASE
+SEG_ABS
)
766 if (p
->type
> q
->type
) {
767 tempptr
->type
= q
->type
;
768 tempptr
->value
= q
->value
;
770 } else if (p
->type
< q
->type
) {
771 tempptr
->type
= p
->type
;
772 tempptr
->value
= p
->value
;
774 } else { /* *p and *q have same type */
775 tempptr
->type
= p
->type
;
776 tempptr
->value
= p
->value
+ q
->value
;
780 (preserve
|| p
->type
< EXPR_SEGBASE
+SEG_ABS
)) {
781 tempptr
->type
= p
->type
;
782 tempptr
->value
= p
->value
;
786 (preserve
|| q
->type
< EXPR_SEGBASE
+SEG_ABS
)) {
787 tempptr
->type
= q
->type
;
788 tempptr
->value
= q
->value
;
791 (tempptr
++)->type
= 0;
797 * Multiply a vector by a scalar. Strip far-absolute segment part
800 static expr
*scalar_mult(expr
*vect
, long scalar
) {
803 while (p
->type
&& p
->type
< EXPR_SEGBASE
+SEG_ABS
) {
804 p
->value
= scalar
* (p
->value
);
812 static expr
*scalarvect (long scalar
) {
814 tempptr
->type
= EXPR_SIMPLE
;
815 tempptr
->value
= scalar
;
823 * Return TRUE if the argument is a simple scalar. (Or a far-
824 * absolute, which counts.)
826 static int is_simple (expr
*vect
) {
827 while (vect
->type
&& !vect
->value
)
831 if (vect
->type
!= EXPR_SIMPLE
)
835 } while (vect
->type
&& !vect
->value
);
836 if (vect
->type
&& vect
->type
< EXPR_SEGBASE
+SEG_ABS
) return 0;
841 * Return TRUE if the argument is a simple scalar, _NOT_ a far-
844 static int is_really_simple (expr
*vect
) {
845 while (vect
->type
&& !vect
->value
)
849 if (vect
->type
!= EXPR_SIMPLE
)
853 } while (vect
->type
&& !vect
->value
);
854 if (vect
->type
) return 0;
859 * Return TRUE if the argument is relocatable (i.e. a simple
860 * scalar, plus at most one segment-base, plus possibly a WRT).
862 static int is_reloc (expr
*vect
) {
863 while (vect
->type
&& !vect
->value
)
867 if (vect
->type
< EXPR_SIMPLE
)
869 if (vect
->type
== EXPR_SIMPLE
) {
872 } while (vect
->type
&& !vect
->value
);
878 } while (vect
->type
&& (vect
->type
== EXPR_WRT
|| !vect
->value
));
885 * Return the scalar part of a relocatable vector. (Including
886 * simple scalar vectors - those qualify as relocatable.)
888 static long reloc_value (expr
*vect
) {
889 while (vect
->type
&& !vect
->value
)
891 if (!vect
->type
) return 0;
892 if (vect
->type
== EXPR_SIMPLE
)
899 * Return the segment number of a relocatable vector, or NO_SEG for
902 static long reloc_seg (expr
*vect
) {
903 while (vect
->type
&& (vect
->type
== EXPR_WRT
|| !vect
->value
))
905 if (vect
->type
== EXPR_SIMPLE
) {
908 } while (vect
->type
&& (vect
->type
== EXPR_WRT
|| !vect
->value
));
913 return vect
->type
- EXPR_SEGBASE
;
917 * Return the WRT segment number of a relocatable vector, or NO_SEG
918 * if no WRT part is present.
920 static long reloc_wrt (expr
*vect
) {
921 while (vect
->type
&& vect
->type
< EXPR_WRT
)
923 if (vect
->type
== EXPR_WRT
) {
929 static void eval_reset(void) {
930 tempptr
= exprtempstorage
; /* initialise temporary storage */
934 * The SEG operator: calculate the segment part of a relocatable
935 * value. Return NULL, as usual, if an error occurs. Report the
938 static expr
*segment_part (expr
*e
) {
942 error(ERR_NONFATAL
, "cannot apply SEG to a non-relocatable value");
948 error(ERR_NONFATAL
, "cannot apply SEG to a non-relocatable value");
950 } else if (seg
& SEG_ABS
)
951 return scalarvect(seg
& ~SEG_ABS
);
955 f
->type
= EXPR_SEGBASE
+outfmt
->segbase(seg
+1);
962 * Recursive-descent parser. Called with a single boolean operand,
963 * which is TRUE if the evaluation is critical (i.e. unresolved
964 * symbols are an error condition). Must update the global `i' to
965 * reflect the token after the parsed string. May return NULL.
967 * evaluate() should report its own errors: on return it is assumed
968 * that if NULL has been returned, the error has already been
975 * expr : expr0 [ WRT expr6 ]
976 * expr0 : expr1 [ {|} expr1]
977 * expr1 : expr2 [ {^} expr2]
978 * expr2 : expr3 [ {&} expr3]
979 * expr3 : expr4 [ {<<,>>} expr4...]
980 * expr4 : expr5 [ {+,-} expr5...]
981 * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
982 * expr6 : { ~,+,-,SEG } expr6
989 static expr
*expr0(int), *expr1(int), *expr2(int), *expr3(int);
990 static expr
*expr4(int), *expr5(int), *expr6(int);
992 static expr
*expr0(int critical
) {
1000 f
= expr1(critical
);
1003 if (!is_simple(e
) || !is_simple(f
)) {
1004 error(ERR_NONFATAL
, "`|' operator may only be applied to"
1007 e
= scalarvect (reloc_value(e
) | reloc_value(f
));
1012 static expr
*expr1(int critical
) {
1015 e
= expr2(critical
);
1020 f
= expr2(critical
);
1023 if (!is_simple(e
) || !is_simple(f
)) {
1024 error(ERR_NONFATAL
, "`^' operator may only be applied to"
1027 e
= scalarvect (reloc_value(e
) ^ reloc_value(f
));
1032 static expr
*expr2(int critical
) {
1035 e
= expr3(critical
);
1040 f
= expr3(critical
);
1043 if (!is_simple(e
) || !is_simple(f
)) {
1044 error(ERR_NONFATAL
, "`&' operator may only be applied to"
1047 e
= scalarvect (reloc_value(e
) & reloc_value(f
));
1052 static expr
*expr3(int critical
) {
1055 e
= expr4(critical
);
1058 while (i
== TOKEN_SHL
|| i
== TOKEN_SHR
) {
1061 f
= expr4(critical
);
1064 if (!is_simple(e
) || !is_simple(f
)) {
1065 error(ERR_NONFATAL
, "shift operator may only be applied to"
1070 e
= scalarvect (reloc_value(e
) << reloc_value(f
));
1073 e
= scalarvect (((unsigned long)reloc_value(e
)) >>
1081 static expr
*expr4(int critical
) {
1084 e
= expr5(critical
);
1087 while (i
== '+' || i
== '-') {
1090 f
= expr5(critical
);
1095 e
= add_vectors (e
, f
);
1098 e
= add_vectors (e
, scalar_mult(f
, -1L));
1105 static expr
*expr5(int critical
) {
1108 e
= expr6(critical
);
1111 while (i
== '*' || i
== '/' || i
== '*' ||
1112 i
== TOKEN_SDIV
|| i
== TOKEN_SMOD
) {
1115 f
= expr6(critical
);
1118 if (j
!= '*' && (!is_simple(e
) || !is_simple(f
))) {
1119 error(ERR_NONFATAL
, "division operator may only be applied to"
1123 if (j
!= '*' && reloc_value(f
) == 0) {
1124 error(ERR_NONFATAL
, "division by zero");
1130 e
= scalar_mult (f
, reloc_value(e
));
1131 else if (is_simple(f
))
1132 e
= scalar_mult (e
, reloc_value(f
));
1134 error(ERR_NONFATAL
, "unable to multiply two "
1135 "non-scalar objects");
1140 e
= scalarvect (((unsigned long)reloc_value(e
)) /
1141 ((unsigned long)reloc_value(f
)));
1144 e
= scalarvect (((unsigned long)reloc_value(e
)) %
1145 ((unsigned long)reloc_value(f
)));
1148 e
= scalarvect (((signed long)reloc_value(e
)) /
1149 ((signed long)reloc_value(f
)));
1152 e
= scalarvect (((signed long)reloc_value(e
)) %
1153 ((signed long)reloc_value(f
)));
1160 static expr
*expr6(int critical
) {
1162 long label_seg
, label_ofs
;
1166 e
= expr6(critical
);
1169 return scalar_mult (e
, -1L);
1170 } else if (i
== '+') {
1172 return expr6(critical
);
1173 } else if (i
== '~') {
1175 e
= expr6(critical
);
1178 if (!is_simple(e
)) {
1179 error(ERR_NONFATAL
, "`~' operator may only be applied to"
1183 return scalarvect(~reloc_value(e
));
1184 } else if (i
== TOKEN_SEG
) {
1186 e
= expr6(critical
);
1189 return segment_part(e
);
1190 } else if (i
== '(') {
1192 e
= expr0(critical
);
1196 error(ERR_NONFATAL
, "expecting `)'");
1201 } else if (i
== TOKEN_NUM
|| i
== TOKEN_REG
|| i
== TOKEN_ID
||
1202 i
== TOKEN_HERE
|| i
== TOKEN_BASE
) {
1206 e
->type
= EXPR_SIMPLE
;
1207 e
->value
= tokval
.t_integer
;
1210 e
->type
= tokval
.t_integer
;
1217 * Since the whole line is parsed before the label it
1218 * defines is given to the label manager, we have
1219 * problems with lines such as
1221 * end: TIMES 512-(end-start) DB 0
1223 * where `end' is not known on pass one, despite not
1224 * really being a forward reference, and due to
1225 * criticality it is _needed_. Hence we check our label
1226 * against the currently defined one, and do our own
1227 * resolution of it if we have to.
1229 if (i
== TOKEN_BASE
) {
1232 } else if (i
== TOKEN_HERE
|| !strcmp(tokval
.t_charptr
, label
)) {
1235 } else if (!labelfunc(tokval
.t_charptr
, &label_seg
, &label_ofs
)) {
1236 if (critical
== 2) {
1237 error (ERR_NONFATAL
, "symbol `%s' undefined",
1240 } else if (critical
== 1) {
1241 error (ERR_NONFATAL
, "symbol `%s' not defined before use",
1249 e
->type
= EXPR_SIMPLE
;
1250 e
->value
= label_ofs
;
1251 if (label_seg
!=NO_SEG
) {
1253 tempptr
->type
= EXPR_SEGBASE
+ label_seg
;
1264 error(ERR_NONFATAL
, "expression syntax error");
1269 static expr
*evaluate (int critical
) {
1273 e
= expr0 (critical
);
1277 if (i
== TOKEN_WRT
) {
1279 error(ERR_NONFATAL
, "invalid left-hand operand to WRT");
1282 i
= nexttoken(); /* eat the WRT */
1283 f
= expr6 (critical
);
1287 e
= scalar_mult (e
, 1L); /* strip far-absolute segment part */
1289 expr
*g
= tempptr
++;
1290 tempptr
++->type
= 0;
1293 error(ERR_NONFATAL
, "invalid right-hand operand to WRT");
1296 g
->value
= reloc_seg(f
);
1297 if (g
->value
== NO_SEG
)
1298 g
->value
= reloc_value(f
) | SEG_ABS
;
1299 else if (!(g
->value
& SEG_ABS
) && !(g
->value
% 2) && critical
) {
1300 error(ERR_NONFATAL
, "invalid right-hand operand to WRT");
1303 e
= add_vectors (e
, g
);