1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the license given in the file "LICENSE"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
27 extern int in_abs_seg
; /* ABSOLUTE segment flag */
28 extern int32_t abs_seg
; /* ABSOLUTE segment */
29 extern int32_t abs_offset
; /* ABSOLUTE segment offset */
31 #include "regflags.c" /* List of register flags */
33 static int is_comma_next(void);
36 static struct tokenval tokval
;
38 static struct ofmt
*outfmt
; /* Structure of addresses of output routines */
39 static struct location
*location
; /* Pointer to current line's segment,offset */
41 void parser_global_info(struct ofmt
*output
, struct location
* locp
)
47 static int prefix_slot(enum prefixes prefix
)
75 error(ERR_PANIC
, "Invalid value %d passed to prefix_slot()", prefix
);
80 static void process_size_override(insn
* result
, int operand
)
82 if (tasm_compatible_mode
) {
83 switch ((int)tokval
.t_integer
) {
84 /* For TASM compatibility a size override inside the
85 * brackets changes the size of the operand, not the
86 * address type of the operand as it does in standard
91 * is valid syntax in TASM compatibility mode. Note that
92 * you lose the ability to override the default address
93 * type for the instruction, but we never use anything
94 * but 32-bit flat model addressing in our code.
97 result
->oprs
[operand
].type
|= BITS8
;
100 result
->oprs
[operand
].type
|= BITS16
;
104 result
->oprs
[operand
].type
|= BITS32
;
107 result
->oprs
[operand
].type
|= BITS64
;
110 result
->oprs
[operand
].type
|= BITS80
;
113 result
->oprs
[operand
].type
|= BITS128
;
117 "invalid operand size specification");
121 /* Standard NASM compatible syntax */
122 switch ((int)tokval
.t_integer
) {
124 result
->oprs
[operand
].eaflags
|= EAF_TIMESTWO
;
127 result
->oprs
[operand
].eaflags
|= EAF_REL
;
130 result
->oprs
[operand
].eaflags
|= EAF_ABS
;
133 result
->oprs
[operand
].disp_size
= 8;
134 result
->oprs
[operand
].eaflags
|= EAF_BYTEOFFS
;
139 if (result
->prefixes
[PPS_ASIZE
] &&
140 result
->prefixes
[PPS_ASIZE
] != tokval
.t_integer
)
142 "conflicting address size specifications");
144 result
->prefixes
[PPS_ASIZE
] = tokval
.t_integer
;
147 result
->oprs
[operand
].disp_size
= 16;
148 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
152 result
->oprs
[operand
].disp_size
= 32;
153 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
156 result
->oprs
[operand
].disp_size
= 64;
157 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
160 error(ERR_NONFATAL
, "invalid size specification in"
161 " effective address");
167 insn
*parse_line(int pass
, char *buffer
, insn
* result
,
168 efunc errfunc
, evalfunc evaluate
, ldfunc ldef
)
172 struct eval_hints hints
;
175 bool insn_is_label
= false;
179 result
->forw_ref
= false;
183 stdscan_bufptr
= buffer
;
184 i
= stdscan(NULL
, &tokval
);
186 result
->label
= NULL
; /* Assume no label */
187 result
->eops
= NULL
; /* must do this, whatever happens */
188 result
->operands
= 0; /* must initialize this */
190 if (i
== 0) { /* blank line - ignore */
191 result
->opcode
= -1; /* and no instruction either */
194 if (i
!= TOKEN_ID
&& i
!= TOKEN_INSN
&& i
!= TOKEN_PREFIX
&&
195 (i
!= TOKEN_REG
|| (REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
196 error(ERR_NONFATAL
, "label or instruction expected"
197 " at start of line");
202 if (i
== TOKEN_ID
|| (insn_is_label
&& i
== TOKEN_INSN
)) {
203 /* there's a label here */
205 result
->label
= tokval
.t_charptr
;
206 i
= stdscan(NULL
, &tokval
);
207 if (i
== ':') { /* skip over the optional colon */
208 i
= stdscan(NULL
, &tokval
);
210 error(ERR_WARNING
| ERR_WARN_OL
| ERR_PASS1
,
211 "label alone on a line without a colon might be in error");
213 if (i
!= TOKEN_INSN
|| tokval
.t_integer
!= I_EQU
) {
215 * FIXME: location->segment could be NO_SEG, in which case
216 * it is possible we should be passing 'abs_seg'. Look into this.
217 * Work out whether that is *really* what we should be doing.
218 * Generally fix things. I think this is right as it is, but
219 * am still not certain.
221 ldef(result
->label
, in_abs_seg
? abs_seg
: location
->segment
,
222 location
->offset
, NULL
, true, false, outfmt
, errfunc
);
227 result
->opcode
= -1; /* this line contains just a label */
231 for (j
= 0; j
< MAXPREFIX
; j
++)
232 result
->prefixes
[j
] = P_none
;
235 while (i
== TOKEN_PREFIX
||
236 (i
== TOKEN_REG
&& !(REG_SREG
& ~reg_flags
[tokval
.t_integer
])))
241 * Handle special case: the TIMES prefix.
243 if (i
== TOKEN_PREFIX
&& tokval
.t_integer
== P_TIMES
) {
246 i
= stdscan(NULL
, &tokval
);
248 evaluate(stdscan
, NULL
, &tokval
, NULL
, pass0
, error
, NULL
);
250 if (!value
) { /* but, error in evaluator */
251 result
->opcode
= -1; /* unrecoverable parse error: */
252 return result
; /* ignore this instruction */
254 if (!is_simple(value
)) {
256 "non-constant argument supplied to TIMES");
259 result
->times
= value
->value
;
260 if (value
->value
< 0) {
261 error(ERR_NONFATAL
, "TIMES value %d is negative",
267 int slot
= prefix_slot(tokval
.t_integer
);
268 if (result
->prefixes
[slot
]) {
269 if (result
->prefixes
[slot
] == tokval
.t_integer
)
271 "instruction has redundant prefixes");
274 "instruction has conflicting prefixes");
276 result
->prefixes
[slot
] = tokval
.t_integer
;
277 i
= stdscan(NULL
, &tokval
);
281 if (i
!= TOKEN_INSN
) {
285 for (j
= 0; j
< MAXPREFIX
; j
++)
286 if ((pfx
= result
->prefixes
[j
]) != P_none
)
289 if (i
== 0 && pfx
!= P_none
) {
291 * Instruction prefixes are present, but no actual
292 * instruction. This is allowed: at this point we
293 * invent a notional instruction of RESB 0.
295 result
->opcode
= I_RESB
;
296 result
->operands
= 1;
297 result
->oprs
[0].type
= IMMEDIATE
;
298 result
->oprs
[0].offset
= 0L;
299 result
->oprs
[0].segment
= result
->oprs
[0].wrt
= NO_SEG
;
302 error(ERR_NONFATAL
, "parser: instruction expected");
308 result
->opcode
= tokval
.t_integer
;
309 result
->condition
= tokval
.t_inttwo
;
312 * RESB, RESW and RESD cannot be satisfied with incorrectly
313 * evaluated operands, since the correct values _must_ be known
314 * on the first pass. Hence, even in pass one, we set the
315 * `critical' flag on calling evaluate(), so that it will bomb
316 * out on undefined symbols. Nasty, but there's nothing we can
319 * For the moment, EQU has the same difficulty, so we'll
322 if (result
->opcode
== I_RESB
|| result
->opcode
== I_RESW
||
323 result
->opcode
== I_RESD
|| result
->opcode
== I_RESQ
||
324 result
->opcode
== I_REST
|| result
->opcode
== I_RESO
||
325 result
->opcode
== I_EQU
|| result
->opcode
== I_INCBIN
) {
328 critical
= (pass
== 2 ? 2 : 0);
330 if (result
->opcode
== I_DB
|| result
->opcode
== I_DW
||
331 result
->opcode
== I_DD
|| result
->opcode
== I_DQ
||
332 result
->opcode
== I_DT
|| result
->opcode
== I_DO
||
333 result
->opcode
== I_INCBIN
) {
334 extop
*eop
, **tail
= &result
->eops
, **fixptr
;
337 result
->eops_float
= false;
340 * Begin to read the DB/DW/DD/DQ/DT/DO/INCBIN operands.
343 i
= stdscan(NULL
, &tokval
);
346 else if (first
&& i
== ':') {
347 insn_is_label
= true;
352 eop
= *tail
= nasm_malloc(sizeof(extop
));
355 eop
->type
= EOT_NOTHING
;
358 if (i
== TOKEN_NUM
&& tokval
.t_charptr
&& is_comma_next()) {
359 eop
->type
= EOT_DB_STRING
;
360 eop
->stringval
= tokval
.t_charptr
;
361 eop
->stringlen
= tokval
.t_inttwo
;
362 i
= stdscan(NULL
, &tokval
); /* eat the comma */
366 if ((i
== TOKEN_FLOAT
&& is_comma_next())
367 || i
== '-' || i
== '+') {
370 if (i
== '+' || i
== '-') {
371 char *save
= stdscan_bufptr
;
373 sign
= (i
== '-') ? -1 : 1;
374 i
= stdscan(NULL
, &tokval
);
375 if (i
!= TOKEN_FLOAT
|| !is_comma_next()) {
376 stdscan_bufptr
= save
;
377 i
= tokval
.t_type
= token
;
381 if (i
== TOKEN_FLOAT
) {
382 eop
->type
= EOT_DB_STRING
;
383 result
->eops_float
= true;
384 switch (result
->opcode
) {
404 error(ERR_NONFATAL
, "floating-point constant"
405 " encountered in unknown instruction");
407 * fix suggested by Pedro Gimeno... original line
409 * eop->type = EOT_NOTHING;
414 eop
= nasm_realloc(eop
, sizeof(extop
) + eop
->stringlen
);
417 eop
->stringval
= (char *)eop
+ sizeof(extop
);
418 if (!eop
->stringlen
||
419 !float_const(tokval
.t_charptr
, sign
,
420 (uint8_t *)eop
->stringval
,
421 eop
->stringlen
, error
))
422 eop
->type
= EOT_NOTHING
;
423 i
= stdscan(NULL
, &tokval
); /* eat the comma */
431 value
= evaluate(stdscan
, NULL
, &tokval
, NULL
,
432 critical
, error
, NULL
);
434 if (!value
) { /* error in evaluator */
435 result
->opcode
= -1; /* unrecoverable parse error: */
436 return result
; /* ignore this instruction */
438 if (is_unknown(value
)) {
439 eop
->type
= EOT_DB_NUMBER
;
440 eop
->offset
= 0; /* doesn't matter what we put */
441 eop
->segment
= eop
->wrt
= NO_SEG
; /* likewise */
442 } else if (is_reloc(value
)) {
443 eop
->type
= EOT_DB_NUMBER
;
444 eop
->offset
= reloc_value(value
);
445 eop
->segment
= reloc_seg(value
);
446 eop
->wrt
= reloc_wrt(value
);
449 "operand %d: expression is not simple"
450 " or relocatable", oper_num
);
455 * We're about to call stdscan(), which will eat the
456 * comma that we're currently sitting on between
457 * arguments. However, we'd better check first that it
460 if (i
== 0) /* also could be EOL */
463 error(ERR_NONFATAL
, "comma expected after operand %d",
465 result
->opcode
= -1; /* unrecoverable parse error: */
466 return result
; /* ignore this instruction */
470 if (result
->opcode
== I_INCBIN
) {
472 * Correct syntax for INCBIN is that there should be
473 * one string operand, followed by one or two numeric
476 if (!result
->eops
|| result
->eops
->type
!= EOT_DB_STRING
)
477 error(ERR_NONFATAL
, "`incbin' expects a file name");
478 else if (result
->eops
->next
&&
479 result
->eops
->next
->type
!= EOT_DB_NUMBER
)
480 error(ERR_NONFATAL
, "`incbin': second parameter is",
482 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
483 result
->eops
->next
->next
->type
!= EOT_DB_NUMBER
)
484 error(ERR_NONFATAL
, "`incbin': third parameter is",
486 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
487 result
->eops
->next
->next
->next
)
489 "`incbin': more than three parameters");
493 * If we reach here, one of the above errors happened.
494 * Throw the instruction away.
498 } else /* DB ... */ if (oper_num
== 0)
499 error(ERR_WARNING
| ERR_PASS1
,
500 "no operand for data declaration");
502 result
->operands
= oper_num
;
507 /* right. Now we begin to parse the operands. There may be up to four
508 * of these, separated by commas, and terminated by a zero token. */
510 for (operand
= 0; operand
< MAX_OPERANDS
; operand
++) {
511 expr
*value
; /* used most of the time */
512 int mref
; /* is this going to be a memory ref? */
513 int bracket
; /* is it a [] mref, or a & mref? */
516 result
->oprs
[operand
].disp_size
= 0; /* have to zero this whatever */
517 result
->oprs
[operand
].eaflags
= 0; /* and this */
518 result
->oprs
[operand
].opflags
= 0;
520 i
= stdscan(NULL
, &tokval
);
522 break; /* end of operands: get out of here */
523 else if (first
&& i
== ':') {
524 insn_is_label
= true;
528 result
->oprs
[operand
].type
= 0; /* so far, no override */
529 while (i
== TOKEN_SPECIAL
) { /* size specifiers */
530 switch ((int)tokval
.t_integer
) {
532 if (!setsize
) /* we want to use only the first */
533 result
->oprs
[operand
].type
|= BITS8
;
538 result
->oprs
[operand
].type
|= BITS16
;
544 result
->oprs
[operand
].type
|= BITS32
;
549 result
->oprs
[operand
].type
|= BITS64
;
554 result
->oprs
[operand
].type
|= BITS80
;
559 result
->oprs
[operand
].type
|= BITS128
;
563 result
->oprs
[operand
].type
|= TO
;
566 result
->oprs
[operand
].type
|= STRICT
;
569 result
->oprs
[operand
].type
|= FAR
;
572 result
->oprs
[operand
].type
|= NEAR
;
575 result
->oprs
[operand
].type
|= SHORT
;
578 error(ERR_NONFATAL
, "invalid operand size specification");
580 i
= stdscan(NULL
, &tokval
);
583 if (i
== '[' || i
== '&') { /* memory reference */
585 bracket
= (i
== '[');
586 i
= stdscan(NULL
, &tokval
); /* then skip the colon */
587 while (i
== TOKEN_SPECIAL
|| i
== TOKEN_PREFIX
) {
588 process_size_override(result
, operand
);
589 i
= stdscan(NULL
, &tokval
);
591 } else { /* immediate operand, or register */
593 bracket
= false; /* placate optimisers */
596 if ((result
->oprs
[operand
].type
& FAR
) && !mref
&&
597 result
->opcode
!= I_JMP
&& result
->opcode
!= I_CALL
) {
598 error(ERR_NONFATAL
, "invalid use of FAR operand specifier");
601 value
= evaluate(stdscan
, NULL
, &tokval
,
602 &result
->oprs
[operand
].opflags
,
603 critical
, error
, &hints
);
605 if (result
->oprs
[operand
].opflags
& OPFLAG_FORWARD
) {
606 result
->forw_ref
= true;
608 if (!value
) { /* error in evaluator */
609 result
->opcode
= -1; /* unrecoverable parse error: */
610 return result
; /* ignore this instruction */
612 if (i
== ':' && mref
) { /* it was seg:offset */
614 * Process the segment override.
616 if (value
[1].type
!= 0 || value
->value
!= 1 ||
617 REG_SREG
& ~reg_flags
[value
->type
])
618 error(ERR_NONFATAL
, "invalid segment override");
619 else if (result
->prefixes
[PPS_SEG
])
621 "instruction has conflicting segment overrides");
623 result
->prefixes
[PPS_SEG
] = value
->type
;
624 if (!(REG_FSGS
& ~reg_flags
[value
->type
]))
625 result
->oprs
[operand
].eaflags
|= EAF_FSGS
;
628 i
= stdscan(NULL
, &tokval
); /* then skip the colon */
629 while (i
== TOKEN_SPECIAL
|| i
== TOKEN_PREFIX
) {
630 process_size_override(result
, operand
);
631 i
= stdscan(NULL
, &tokval
);
633 value
= evaluate(stdscan
, NULL
, &tokval
,
634 &result
->oprs
[operand
].opflags
,
635 critical
, error
, &hints
);
637 if (result
->oprs
[operand
].opflags
& OPFLAG_FORWARD
) {
638 result
->forw_ref
= true;
640 /* and get the offset */
641 if (!value
) { /* but, error in evaluator */
642 result
->opcode
= -1; /* unrecoverable parse error: */
643 return result
; /* ignore this instruction */
646 if (mref
&& bracket
) { /* find ] at the end */
648 error(ERR_NONFATAL
, "parser: expecting ]");
649 do { /* error recovery again */
650 i
= stdscan(NULL
, &tokval
);
651 } while (i
!= 0 && i
!= ',');
652 } else /* we got the required ] */
653 i
= stdscan(NULL
, &tokval
);
654 } else { /* immediate operand */
655 if (i
!= 0 && i
!= ',' && i
!= ':') {
656 error(ERR_NONFATAL
, "comma or end of line expected");
657 do { /* error recovery */
658 i
= stdscan(NULL
, &tokval
);
659 } while (i
!= 0 && i
!= ',');
660 } else if (i
== ':') {
661 result
->oprs
[operand
].type
|= COLON
;
665 /* now convert the exprs returned from evaluate() into operand
668 if (mref
) { /* it's a memory reference */
670 int b
, i
, s
; /* basereg, indexreg, scale */
671 int64_t o
; /* offset */
673 b
= i
= -1, o
= s
= 0;
674 result
->oprs
[operand
].hintbase
= hints
.base
;
675 result
->oprs
[operand
].hinttype
= hints
.type
;
677 if (e
->type
&& e
->type
<= EXPR_REG_END
) { /* this bit's a register */
678 if (e
->value
== 1) /* in fact it can be basereg */
680 else /* no, it has to be indexreg */
681 i
= e
->type
, s
= e
->value
;
684 if (e
->type
&& e
->type
<= EXPR_REG_END
) { /* it's a 2nd register */
685 if (b
!= -1) /* If the first was the base, ... */
686 i
= e
->type
, s
= e
->value
; /* second has to be indexreg */
688 else if (e
->value
!= 1) { /* If both want to be index */
690 "beroset-p-592-invalid effective address");
697 if (e
->type
!= 0) { /* is there an offset? */
698 if (e
->type
<= EXPR_REG_END
) { /* in fact, is there an error? */
700 "beroset-p-603-invalid effective address");
704 if (e
->type
== EXPR_UNKNOWN
) {
705 o
= 0; /* doesn't matter what */
706 result
->oprs
[operand
].wrt
= NO_SEG
; /* nor this */
707 result
->oprs
[operand
].segment
= NO_SEG
; /* or this */
709 e
++; /* go to the end of the line */
711 if (e
->type
== EXPR_SIMPLE
) {
715 if (e
->type
== EXPR_WRT
) {
716 result
->oprs
[operand
].wrt
= e
->value
;
719 result
->oprs
[operand
].wrt
= NO_SEG
;
721 * Look for a segment base type.
723 if (e
->type
&& e
->type
< EXPR_SEGBASE
) {
725 "beroset-p-630-invalid effective address");
729 while (e
->type
&& e
->value
== 0)
731 if (e
->type
&& e
->value
!= 1) {
733 "beroset-p-637-invalid effective address");
738 result
->oprs
[operand
].segment
=
739 e
->type
- EXPR_SEGBASE
;
742 result
->oprs
[operand
].segment
= NO_SEG
;
743 while (e
->type
&& e
->value
== 0)
747 "beroset-p-650-invalid effective address");
755 result
->oprs
[operand
].wrt
= NO_SEG
;
756 result
->oprs
[operand
].segment
= NO_SEG
;
759 if (e
->type
!= 0) { /* there'd better be nothing left! */
761 "beroset-p-663-invalid effective address");
766 /* It is memory, but it can match any r/m operand */
767 result
->oprs
[operand
].type
|= MEMORY_ANY
;
769 if (b
== -1 && (i
== -1 || s
== 0)) {
770 int is_rel
= globalbits
== 64 &&
771 !(result
->oprs
[operand
].eaflags
& EAF_ABS
) &&
773 !(result
->oprs
[operand
].eaflags
& EAF_FSGS
)) ||
774 (result
->oprs
[operand
].eaflags
& EAF_REL
));
776 result
->oprs
[operand
].type
|= is_rel
? IP_REL
: MEM_OFFS
;
778 result
->oprs
[operand
].basereg
= b
;
779 result
->oprs
[operand
].indexreg
= i
;
780 result
->oprs
[operand
].scale
= s
;
781 result
->oprs
[operand
].offset
= o
;
782 } else { /* it's not a memory reference */
784 if (is_just_unknown(value
)) { /* it's immediate but unknown */
785 result
->oprs
[operand
].type
|= IMMEDIATE
;
786 result
->oprs
[operand
].offset
= 0; /* don't care */
787 result
->oprs
[operand
].segment
= NO_SEG
; /* don't care again */
788 result
->oprs
[operand
].wrt
= NO_SEG
; /* still don't care */
789 } else if (is_reloc(value
)) { /* it's immediate */
790 result
->oprs
[operand
].type
|= IMMEDIATE
;
791 result
->oprs
[operand
].offset
= reloc_value(value
);
792 result
->oprs
[operand
].segment
= reloc_seg(value
);
793 result
->oprs
[operand
].wrt
= reloc_wrt(value
);
794 if (is_simple(value
)) {
795 if (reloc_value(value
) == 1)
796 result
->oprs
[operand
].type
|= UNITY
;
797 if (optimizing
>= 0 &&
798 !(result
->oprs
[operand
].type
& STRICT
)) {
799 if (reloc_value(value
) >= -128 &&
800 reloc_value(value
) <= 127)
801 result
->oprs
[operand
].type
|= SBYTE
;
804 } else { /* it's a register */
807 if (value
->type
>= EXPR_SIMPLE
|| value
->value
!= 1) {
808 error(ERR_NONFATAL
, "invalid operand type");
814 * check that its only 1 register, not an expression...
816 for (i
= 1; value
[i
].type
; i
++)
817 if (value
[i
].value
) {
818 error(ERR_NONFATAL
, "invalid operand type");
823 /* clear overrides, except TO which applies to FPU regs */
824 if (result
->oprs
[operand
].type
& ~TO
) {
826 * we want to produce a warning iff the specified size
827 * is different from the register size
829 rs
= result
->oprs
[operand
].type
& SIZE_MASK
;
833 result
->oprs
[operand
].type
&= TO
;
834 result
->oprs
[operand
].type
|= REGISTER
;
835 result
->oprs
[operand
].type
|= reg_flags
[value
->type
];
836 result
->oprs
[operand
].basereg
= value
->type
;
838 if (rs
&& (result
->oprs
[operand
].type
& SIZE_MASK
) != rs
)
839 error(ERR_WARNING
| ERR_PASS1
,
840 "register size specification ignored");
845 result
->operands
= operand
; /* set operand count */
847 /* clear remaining operands */
848 while (operand
< MAX_OPERANDS
)
849 result
->oprs
[operand
++].type
= 0;
852 * Transform RESW, RESD, RESQ, REST, RESO into RESB.
854 switch (result
->opcode
) {
856 result
->opcode
= I_RESB
;
857 result
->oprs
[0].offset
*= 2;
860 result
->opcode
= I_RESB
;
861 result
->oprs
[0].offset
*= 4;
864 result
->opcode
= I_RESB
;
865 result
->oprs
[0].offset
*= 8;
868 result
->opcode
= I_RESB
;
869 result
->oprs
[0].offset
*= 10;
872 result
->opcode
= I_RESB
;
873 result
->oprs
[0].offset
*= 16;
882 static int is_comma_next(void)
889 i
= stdscan(NULL
, &tv
);
891 return (i
== ',' || i
== ';' || !i
);
894 void cleanup_insn(insn
* i
)
900 i
->eops
= i
->eops
->next
;