1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
22 extern int in_abs_seg
; /* ABSOLUTE segment flag */
23 extern long abs_seg
; /* ABSOLUTE segment */
24 extern long abs_offset
; /* ABSOLUTE segment offset */
26 #include "regflags.c" /* List of register flags */
28 enum { /* special tokens */
29 S_BYTE
, S_DWORD
, S_FAR
, S_LONG
, S_NEAR
, S_NOSPLIT
, S_QWORD
,
30 S_SHORT
, S_STRICT
, S_TO
, S_TWORD
, S_WORD
33 static int is_comma_next (void);
36 static struct tokenval tokval
;
38 static struct ofmt
*outfmt
; /* Structure of addresses of output routines */
39 static loc_t
*location
; /* Pointer to current line's segment,offset */
41 void parser_global_info (struct ofmt
*output
, loc_t
*locp
)
47 insn
*parse_line (int pass
, char *buffer
, insn
*result
,
48 efunc errfunc
, evalfunc evaluate
, ldfunc ldef
)
52 struct eval_hints hints
;
54 result
->forw_ref
= FALSE
;
58 stdscan_bufptr
= buffer
;
59 i
= stdscan(NULL
, &tokval
);
61 result
->label
= NULL
; /* Assume no label */
62 result
->eops
= NULL
; /* must do this, whatever happens */
63 result
->operands
= 0; /* must initialise this */
65 if (i
==0) { /* blank line - ignore */
66 result
->opcode
= -1; /* and no instruction either */
69 if (i
!= TOKEN_ID
&& i
!= TOKEN_INSN
&& i
!= TOKEN_PREFIX
&&
70 (i
!=TOKEN_REG
|| (REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
71 error (ERR_NONFATAL
, "label or instruction expected"
77 if (i
== TOKEN_ID
) { /* there's a label here */
78 result
->label
= tokval
.t_charptr
;
79 i
= stdscan(NULL
, &tokval
);
80 if (i
== ':') { /* skip over the optional colon */
81 i
= stdscan(NULL
, &tokval
);
83 error (ERR_WARNING
|ERR_WARN_OL
|ERR_PASS1
,
84 "label alone on a line without a colon might be in error");
86 if (i
!= TOKEN_INSN
|| tokval
.t_integer
!= I_EQU
)
89 * FIXME: location->segment could be NO_SEG, in which case
90 * it is possible we should be passing 'abs_seg'. Look into this.
91 * Work out whether that is *really* what we should be doing.
92 * Generally fix things. I think this is right as it is, but
93 * am still not certain.
95 ldef (result
->label
, in_abs_seg
?abs_seg
:location
->segment
,
96 location
->offset
, NULL
, TRUE
, FALSE
, outfmt
, errfunc
);
101 result
->opcode
= -1; /* this line contains just a label */
108 while (i
== TOKEN_PREFIX
||
109 (i
==TOKEN_REG
&& !(REG_SREG
& ~reg_flags
[tokval
.t_integer
])))
112 * Handle special case: the TIMES prefix.
114 if (i
== TOKEN_PREFIX
&& tokval
.t_integer
== P_TIMES
) {
117 i
= stdscan(NULL
, &tokval
);
118 value
= evaluate (stdscan
, NULL
, &tokval
, NULL
, pass0
, error
, NULL
);
120 if (!value
) { /* but, error in evaluator */
121 result
->opcode
= -1; /* unrecoverable parse error: */
122 return result
; /* ignore this instruction */
124 if (!is_simple (value
)) {
126 "non-constant argument supplied to TIMES");
129 result
->times
= value
->value
;
130 if (value
->value
< 0) {
131 error(ERR_NONFATAL
, "TIMES value %d is negative",
137 if (result
->nprefix
== MAXPREFIX
)
139 "instruction has more than %d prefixes", MAXPREFIX
);
141 result
->prefixes
[result
->nprefix
++] = tokval
.t_integer
;
142 i
= stdscan(NULL
, &tokval
);
146 if (i
!= TOKEN_INSN
) {
147 if (result
->nprefix
> 0 && i
== 0) {
149 * Instruction prefixes are present, but no actual
150 * instruction. This is allowed: at this point we
151 * invent a notional instruction of RESB 0.
153 result
->opcode
= I_RESB
;
154 result
->operands
= 1;
155 result
->oprs
[0].type
= IMMEDIATE
;
156 result
->oprs
[0].offset
= 0L;
157 result
->oprs
[0].segment
= result
->oprs
[0].wrt
= NO_SEG
;
160 error (ERR_NONFATAL
, "parser: instruction expected");
166 result
->opcode
= tokval
.t_integer
;
167 result
->condition
= tokval
.t_inttwo
;
170 * RESB, RESW and RESD cannot be satisfied with incorrectly
171 * evaluated operands, since the correct values _must_ be known
172 * on the first pass. Hence, even in pass one, we set the
173 * `critical' flag on calling evaluate(), so that it will bomb
174 * out on undefined symbols. Nasty, but there's nothing we can
177 * For the moment, EQU has the same difficulty, so we'll
180 if (result
->opcode
== I_RESB
||
181 result
->opcode
== I_RESW
||
182 result
->opcode
== I_RESD
||
183 result
->opcode
== I_RESQ
||
184 result
->opcode
== I_REST
||
185 result
->opcode
== I_EQU
||
186 result
->opcode
== I_INCBIN
) /* fbk */
191 critical
= (pass
==2 ? 2 : 0);
193 if (result
->opcode
== I_DB
||
194 result
->opcode
== I_DW
||
195 result
->opcode
== I_DD
||
196 result
->opcode
== I_DQ
||
197 result
->opcode
== I_DT
||
198 result
->opcode
== I_INCBIN
)
200 extop
*eop
, **tail
= &result
->eops
, **fixptr
;
203 result
->eops_float
= FALSE
;
206 * Begin to read the DB/DW/DD/DQ/DT/INCBIN operands.
209 i
= stdscan(NULL
, &tokval
);
213 eop
= *tail
= nasm_malloc(sizeof(extop
));
216 eop
->type
= EOT_NOTHING
;
219 if (i
== TOKEN_NUM
&& tokval
.t_charptr
&& is_comma_next()) {
220 eop
->type
= EOT_DB_STRING
;
221 eop
->stringval
= tokval
.t_charptr
;
222 eop
->stringlen
= tokval
.t_inttwo
;
223 i
= stdscan(NULL
, &tokval
); /* eat the comma */
227 if ((i
== TOKEN_FLOAT
&& is_comma_next()) || i
== '-') {
231 char *save
= stdscan_bufptr
;
232 i
= stdscan(NULL
, &tokval
);
234 if (i
!= TOKEN_FLOAT
|| !is_comma_next()) {
235 stdscan_bufptr
= save
;
236 i
= tokval
.t_type
= '-';
240 if (i
== TOKEN_FLOAT
) {
241 eop
->type
= EOT_DB_STRING
;
242 result
->eops_float
= TRUE
;
243 if (result
->opcode
== I_DD
)
245 else if (result
->opcode
== I_DQ
)
247 else if (result
->opcode
== I_DT
)
250 error(ERR_NONFATAL
, "floating-point constant"
251 " encountered in `D%c' instruction",
252 result
->opcode
== I_DW
? 'W' : 'B');
254 * fix suggested by Pedro Gimeno... original line
256 * eop->type = EOT_NOTHING;
260 eop
= nasm_realloc(eop
, sizeof(extop
)+eop
->stringlen
);
263 eop
->stringval
= (char *)eop
+ sizeof(extop
);
264 if (eop
->stringlen
< 4 ||
265 !float_const (tokval
.t_charptr
, sign
,
266 (unsigned char *)eop
->stringval
,
267 eop
->stringlen
, error
))
268 eop
->type
= EOT_NOTHING
;
269 i
= stdscan(NULL
, &tokval
); /* eat the comma */
277 value
= evaluate (stdscan
, NULL
, &tokval
, NULL
,
278 critical
, error
, NULL
);
280 if (!value
) { /* error in evaluator */
281 result
->opcode
= -1;/* unrecoverable parse error: */
282 return result
; /* ignore this instruction */
284 if (is_unknown(value
)) {
285 eop
->type
= EOT_DB_NUMBER
;
286 eop
->offset
= 0; /* doesn't matter what we put */
287 eop
->segment
= eop
->wrt
= NO_SEG
; /* likewise */
288 } else if (is_reloc(value
)) {
289 eop
->type
= EOT_DB_NUMBER
;
290 eop
->offset
= reloc_value(value
);
291 eop
->segment
= reloc_seg(value
);
292 eop
->wrt
= reloc_wrt(value
);
295 "operand %d: expression is not simple"
296 " or relocatable", oper_num
);
301 * We're about to call stdscan(), which will eat the
302 * comma that we're currently sitting on between
303 * arguments. However, we'd better check first that it
306 if (i
== 0) /* also could be EOL */
309 error (ERR_NONFATAL
, "comma expected after operand %d",
311 result
->opcode
= -1;/* unrecoverable parse error: */
312 return result
; /* ignore this instruction */
316 if (result
->opcode
== I_INCBIN
) {
318 * Correct syntax for INCBIN is that there should be
319 * one string operand, followed by one or two numeric
322 if (!result
->eops
|| result
->eops
->type
!= EOT_DB_STRING
)
323 error (ERR_NONFATAL
, "`incbin' expects a file name");
324 else if (result
->eops
->next
&&
325 result
->eops
->next
->type
!= EOT_DB_NUMBER
)
326 error (ERR_NONFATAL
, "`incbin': second parameter is",
328 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
329 result
->eops
->next
->next
->type
!= EOT_DB_NUMBER
)
330 error (ERR_NONFATAL
, "`incbin': third parameter is",
332 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
333 result
->eops
->next
->next
->next
)
334 error (ERR_NONFATAL
, "`incbin': more than three parameters");
338 * If we reach here, one of the above errors happened.
339 * Throw the instruction away.
345 error (ERR_WARNING
|ERR_PASS1
,
346 "no operand for data declaration");
348 result
->operands
= oper_num
;
353 /* right. Now we begin to parse the operands. There may be up to three
354 * of these, separated by commas, and terminated by a zero token. */
356 for (operand
= 0; operand
< 3; operand
++) {
357 expr
*value
; /* used most of the time */
358 int mref
; /* is this going to be a memory ref? */
359 int bracket
; /* is it a [] mref, or a & mref? */
362 result
->oprs
[operand
].addr_size
= 0;/* have to zero this whatever */
363 result
->oprs
[operand
].eaflags
= 0; /* and this */
364 result
->oprs
[operand
].opflags
= 0;
366 i
= stdscan(NULL
, &tokval
);
367 if (i
== 0) break; /* end of operands: get out of here */
368 result
->oprs
[operand
].type
= 0; /* so far, no override */
369 while (i
== TOKEN_SPECIAL
) {/* size specifiers */
370 switch ((int)tokval
.t_integer
) {
372 if (!setsize
) /* we want to use only the first */
373 result
->oprs
[operand
].type
|= BITS8
;
378 result
->oprs
[operand
].type
|= BITS16
;
384 result
->oprs
[operand
].type
|= BITS32
;
389 result
->oprs
[operand
].type
|= BITS64
;
394 result
->oprs
[operand
].type
|= BITS80
;
398 result
->oprs
[operand
].type
|= TO
;
401 result
->oprs
[operand
].type
|= STRICT
;
404 result
->oprs
[operand
].type
|= FAR
;
407 result
->oprs
[operand
].type
|= NEAR
;
410 result
->oprs
[operand
].type
|= SHORT
;
413 error (ERR_NONFATAL
, "invalid operand size specification");
415 i
= stdscan(NULL
, &tokval
);
418 if (i
== '[' || i
== '&') { /* memory reference */
420 bracket
= (i
== '[');
421 i
= stdscan(NULL
, &tokval
);
422 if (i
== TOKEN_SPECIAL
) { /* check for address size override */
423 if (tasm_compatible_mode
) {
424 switch ((int)tokval
.t_integer
) {
425 /* For TASM compatibility a size override inside the
426 * brackets changes the size of the operand, not the
427 * address type of the operand as it does in standard
428 * NASM syntax. Hence:
430 * mov eax,[DWORD val]
432 * is valid syntax in TASM compatibility mode. Note that
433 * you lose the ability to override the default address
434 * type for the instruction, but we never use anything
435 * but 32-bit flat model addressing in our code.
438 result
->oprs
[operand
].type
|= BITS8
;
441 result
->oprs
[operand
].type
|= BITS16
;
445 result
->oprs
[operand
].type
|= BITS32
;
448 result
->oprs
[operand
].type
|= BITS64
;
451 result
->oprs
[operand
].type
|= BITS80
;
454 error (ERR_NONFATAL
, "invalid operand size specification");
457 /* Standard NASM compatible syntax */
458 switch ((int)tokval
.t_integer
) {
460 result
->oprs
[operand
].eaflags
|= EAF_TIMESTWO
;
463 result
->oprs
[operand
].eaflags
|= EAF_BYTEOFFS
;
466 result
->oprs
[operand
].addr_size
= 16;
467 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
471 result
->oprs
[operand
].addr_size
= 32;
472 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
475 error (ERR_NONFATAL
, "invalid size specification in"
476 " effective address");
479 i
= stdscan(NULL
, &tokval
);
481 } else { /* immediate operand, or register */
483 bracket
= FALSE
; /* placate optimisers */
486 if((result
->oprs
[operand
].type
& FAR
) && !mref
)
488 error (ERR_NONFATAL
, "invalid use of FAR operand specifier");
491 value
= evaluate (stdscan
, NULL
, &tokval
,
492 &result
->oprs
[operand
].opflags
,
493 critical
, error
, &hints
);
495 if (result
->oprs
[operand
].opflags
& OPFLAG_FORWARD
) {
496 result
->forw_ref
= TRUE
;
498 if (!value
) { /* error in evaluator */
499 result
->opcode
= -1; /* unrecoverable parse error: */
500 return result
; /* ignore this instruction */
502 if (i
== ':' && mref
) { /* it was seg:offset */
504 * Process the segment override.
506 if (value
[1].type
!=0 || value
->value
!=1 ||
507 REG_SREG
& ~reg_flags
[value
->type
])
508 error (ERR_NONFATAL
, "invalid segment override");
509 else if (result
->nprefix
== MAXPREFIX
)
511 "instruction has more than %d prefixes",
514 result
->prefixes
[result
->nprefix
++] = value
->type
;
516 i
= stdscan(NULL
, &tokval
); /* then skip the colon */
517 if (i
== TOKEN_SPECIAL
) { /* another check for size override */
518 switch ((int)tokval
.t_integer
) {
520 result
->oprs
[operand
].addr_size
= 16;
524 result
->oprs
[operand
].addr_size
= 32;
527 error (ERR_NONFATAL
, "invalid size specification in"
528 " effective address");
530 i
= stdscan(NULL
, &tokval
);
532 value
= evaluate (stdscan
, NULL
, &tokval
,
533 &result
->oprs
[operand
].opflags
,
534 critical
, error
, &hints
);
536 if (result
->oprs
[operand
].opflags
& OPFLAG_FORWARD
) {
537 result
->forw_ref
= TRUE
;
539 /* and get the offset */
540 if (!value
) { /* but, error in evaluator */
541 result
->opcode
= -1; /* unrecoverable parse error: */
542 return result
; /* ignore this instruction */
545 if (mref
&& bracket
) { /* find ] at the end */
547 error (ERR_NONFATAL
, "parser: expecting ]");
548 do { /* error recovery again */
549 i
= stdscan(NULL
, &tokval
);
550 } while (i
!= 0 && i
!= ',');
551 } else /* we got the required ] */
552 i
= stdscan(NULL
, &tokval
);
553 } else { /* immediate operand */
554 if (i
!= 0 && i
!= ',' && i
!= ':') {
555 error (ERR_NONFATAL
, "comma or end of line expected");
556 do { /* error recovery */
557 i
= stdscan(NULL
, &tokval
);
558 } while (i
!= 0 && i
!= ',');
559 } else if (i
== ':') {
560 result
->oprs
[operand
].type
|= COLON
;
564 /* now convert the exprs returned from evaluate() into operand
567 if (mref
) { /* it's a memory reference */
569 int b
, i
, s
; /* basereg, indexreg, scale */
572 b
= i
= -1, o
= s
= 0;
573 result
->oprs
[operand
].hintbase
= hints
.base
;
574 result
->oprs
[operand
].hinttype
= hints
.type
;
576 if (e
->type
<= EXPR_REG_END
) { /* this bit's a register */
577 if (e
->value
== 1) /* in fact it can be basereg */
579 else /* no, it has to be indexreg */
580 i
= e
->type
, s
= e
->value
;
583 if (e
->type
&& e
->type
<= EXPR_REG_END
) /* it's a 2nd register */
585 if (b
!= -1) /* If the first was the base, ... */
586 i
= e
->type
, s
= e
->value
; /* second has to be indexreg */
588 else if (e
->value
!= 1) /* If both want to be index */
590 error(ERR_NONFATAL
, "beroset-p-592-invalid effective address");
598 if (e
->type
!= 0) { /* is there an offset? */
599 if (e
->type
<= EXPR_REG_END
) /* in fact, is there an error? */
601 error (ERR_NONFATAL
, "beroset-p-603-invalid effective address");
607 if (e
->type
== EXPR_UNKNOWN
) {
608 o
= 0; /* doesn't matter what */
609 result
->oprs
[operand
].wrt
= NO_SEG
; /* nor this */
610 result
->oprs
[operand
].segment
= NO_SEG
; /* or this */
611 while (e
->type
) e
++; /* go to the end of the line */
615 if (e
->type
== EXPR_SIMPLE
) {
619 if (e
->type
== EXPR_WRT
) {
620 result
->oprs
[operand
].wrt
= e
->value
;
623 result
->oprs
[operand
].wrt
= NO_SEG
;
625 * Look for a segment base type.
627 if (e
->type
&& e
->type
< EXPR_SEGBASE
) {
628 error (ERR_NONFATAL
, "beroset-p-630-invalid effective address");
632 while (e
->type
&& e
->value
== 0)
634 if (e
->type
&& e
->value
!= 1) {
635 error (ERR_NONFATAL
, "beroset-p-637-invalid effective address");
640 result
->oprs
[operand
].segment
=
641 e
->type
- EXPR_SEGBASE
;
644 result
->oprs
[operand
].segment
= NO_SEG
;
645 while (e
->type
&& e
->value
== 0)
648 error (ERR_NONFATAL
, "beroset-p-650-invalid effective address");
656 result
->oprs
[operand
].wrt
= NO_SEG
;
657 result
->oprs
[operand
].segment
= NO_SEG
;
660 if (e
->type
!= 0) { /* there'd better be nothing left! */
661 error (ERR_NONFATAL
, "beroset-p-663-invalid effective address");
666 result
->oprs
[operand
].type
|= MEMORY
;
667 if (b
==-1 && (i
==-1 || s
==0))
668 result
->oprs
[operand
].type
|= MEM_OFFS
;
669 result
->oprs
[operand
].basereg
= b
;
670 result
->oprs
[operand
].indexreg
= i
;
671 result
->oprs
[operand
].scale
= s
;
672 result
->oprs
[operand
].offset
= o
;
674 else /* it's not a memory reference */
676 if (is_just_unknown(value
)) { /* it's immediate but unknown */
677 result
->oprs
[operand
].type
|= IMMEDIATE
;
678 result
->oprs
[operand
].offset
= 0; /* don't care */
679 result
->oprs
[operand
].segment
= NO_SEG
; /* don't care again */
680 result
->oprs
[operand
].wrt
= NO_SEG
;/* still don't care */
682 else if (is_reloc(value
)) /* it's immediate */
684 result
->oprs
[operand
].type
|= IMMEDIATE
;
685 result
->oprs
[operand
].offset
= reloc_value(value
);
686 result
->oprs
[operand
].segment
= reloc_seg(value
);
687 result
->oprs
[operand
].wrt
= reloc_wrt(value
);
688 if (is_simple(value
)) {
689 if (reloc_value(value
)==1)
690 result
->oprs
[operand
].type
|= UNITY
;
692 !(result
->oprs
[operand
].type
& STRICT
)) {
693 if (reloc_value(value
) >= -128 &&
694 reloc_value(value
) <= 127)
695 result
->oprs
[operand
].type
|= SBYTE
;
699 else /* it's a register */
701 if (value
->type
>=EXPR_SIMPLE
|| value
->value
!=1) {
702 error (ERR_NONFATAL
, "invalid operand type");
708 * check that its only 1 register, not an expression...
710 for (i
= 1; value
[i
].type
; i
++)
711 if (value
[i
].value
) {
712 error (ERR_NONFATAL
, "invalid operand type");
717 /* clear overrides, except TO which applies to FPU regs */
718 if (result
->oprs
[operand
].type
& ~TO
) {
720 * we want to produce a warning iff the specified size
721 * is different from the register size
723 i
= result
->oprs
[operand
].type
& SIZE_MASK
;
728 result
->oprs
[operand
].type
&= TO
;
729 result
->oprs
[operand
].type
|= REGISTER
;
730 result
->oprs
[operand
].type
|= reg_flags
[value
->type
];
731 result
->oprs
[operand
].basereg
= value
->type
;
733 if (i
&& (result
->oprs
[operand
].type
& SIZE_MASK
) != i
)
734 error (ERR_WARNING
|ERR_PASS1
,
735 "register size specification ignored");
740 result
->operands
= operand
; /* set operand count */
742 while (operand
<3) /* clear remaining operands */
743 result
->oprs
[operand
++].type
= 0;
746 * Transform RESW, RESD, RESQ, REST into RESB.
748 switch (result
->opcode
) {
749 case I_RESW
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=2; break;
750 case I_RESD
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=4; break;
751 case I_RESQ
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=8; break;
752 case I_REST
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=10; break;
758 static int is_comma_next (void)
765 i
= stdscan (NULL
, &tv
);
767 return (i
== ',' || i
== ';' || !i
);
770 void cleanup_insn (insn
*i
)
776 i
->eops
= i
->eops
->next
;