2 * neatas - a small arm assembler
4 * Copyright (C) 2011 Ali Gholami Rudi
6 * This program is released under GNU GPL version 2.
14 #include <sys/types.h>
18 #define BUFSIZE (1 << 14)
21 #define DELIMS ",:{}[]#=-+ \t\n/!^"
22 #define TOK2(a) ((a)[0] << 16 | (a)[1] << 8)
23 #define TOK3(a) ((a)[0] << 16 | (a)[1] << 8 | (a)[2])
26 static char buf
[BUFSIZE
];
29 static char cs
[BUFSIZE
];
32 static void gen(unsigned long i
)
34 memcpy(cs
+ cslen
, &i
, 4);
38 static int tok_read(char *s
)
41 while (isspace(buf
[cur
]))
43 if (buf
[cur
] == '/' && buf
[cur
+ 1] == '*') {
44 while (buf
[cur
] && (buf
[cur
] != '*' || buf
[cur
+ 1] != '/'))
48 if (buf
[cur
] == ';' || buf
[cur
] == '@') {
49 while (buf
[cur
] && buf
[cur
] != '\n')
55 if (!strchr(DELIMS
, buf
[cur
])) {
56 while (!strchr(DELIMS
, buf
[cur
]))
66 static char tok
[TOKLEN
];
67 static char tokc
[TOKLEN
];
70 /* next token in lower-case */
71 static char *tok_get(void)
85 /* next token in original case */
86 static char *tok_case(void)
92 /* have a look at the next token */
93 static char *tok_see(void)
101 static char *digs
= "0123456789abcdef";
103 static long num(char *s
, int bits
)
108 if (*s
== '-' || *s
== '+') {
112 if (s
[0] == '0' && s
[1] == 'x') {
117 int d
= strchr(digs
, *s
) - digs
;
124 return bits
< 32 ? n
& ((1ul << bits
) - 1) : n
;
128 #define NEXTERNS 1024
131 static char locals
[NLOCALS
][NAMELEN
];
132 static char loffs
[NLOCALS
];
134 static char externs
[NEXTERNS
][NAMELEN
];
136 static char globals
[NEXTERNS
][NAMELEN
];
139 static void label_extern(char *name
)
141 int idx
= nexterns
++;
142 strcpy(externs
[idx
], name
);
145 static void label_global(char *name
)
147 int idx
= nglobals
++;
148 strcpy(globals
[idx
], name
);
151 static void label_local(char *name
)
154 strcpy(locals
[idx
], name
);
156 out_sym(locals
[idx
], OUT_CS
, loffs
[idx
], 0);
159 static int label_isextern(char *name
)
162 for (i
= 0; i
< nexterns
; i
++)
163 if (!strcmp(name
, externs
[i
]))
168 static int label_offset(char *name
)
171 for (i
= 0; i
< nlocals
; i
++)
172 if (!strcmp(name
, locals
[i
]))
177 static void label_write(void)
180 for (i
= 0; i
< nglobals
; i
++)
181 out_sym(globals
[i
], OUT_GLOB
| OUT_CS
,
182 label_offset(globals
[i
]), 0);
187 /* absolute relocations */
188 static char absns
[NRELOCS
][NAMELEN
]; /* symbol name */
189 static long absos
[NRELOCS
]; /* relocation location */
191 /* relative relocations */
192 static char relns
[NRELOCS
][NAMELEN
]; /* symbol name */
193 static long relos
[NRELOCS
]; /* relocation location */
194 static long relas
[NRELOCS
]; /* relocation addend */
195 static long relbs
[NRELOCS
]; /* relocation bits: ldrh=8, 12=ldr, 24=bl */
198 static void reloc_rel(char *name
, long off
, int bits
)
201 strcpy(relns
[idx
], name
);
207 static void reloc_abs(char *name
)
210 strcpy(absns
[idx
], name
);
214 #define CSBEG_NAME "__neatas_cs"
216 /* fill immediate value for bl instruction */
217 static void bl_imm(long *dst
, long imm
)
219 imm
= ((*dst
<< 2) + imm
) >> 2;
220 *dst
= (*dst
& 0xff000000) | (imm
& 0x00ffffff);
223 /* fill immediate value for ldr instruction */
224 static void ldr_imm(long *dst
, long imm
, int half
)
226 /* set u-bit for negative offsets */
232 *dst
= (*dst
& 0xfffff000) | ((*dst
+ imm
) & 0x00000fff);
234 *dst
= (*dst
& 0xfffff0f0) |
235 (imm
& 0x0f) | ((imm
& 0xf0) << 4);
238 static void reloc_write(void)
241 out_sym(CSBEG_NAME
, OUT_CS
, 0, 0);
242 for (i
= 0; i
< nabs
; i
++) {
243 if (label_isextern(absns
[i
])) {
244 out_rel(absns
[i
], OUT_CS
, absos
[i
]);
246 long off
= label_offset(absns
[i
]);
247 out_rel(CSBEG_NAME
, OUT_CS
, absos
[i
]);
248 *(long *) (cs
+ absos
[i
]) += off
;
251 for (i
= 0; i
< nrel
; i
++) {
252 long *dst
= (void *) cs
+ relos
[i
];
254 if (label_isextern(relns
[i
])) {
255 out_rel(relns
[i
], OUT_CS
| OUT_REL24
, relos
[i
]);
256 bl_imm(dst
, relas
[i
] - 8);
259 off
= relas
[i
] + label_offset(relns
[i
]) - relos
[i
] - 8;
264 ldr_imm(dst
, off
, relbs
[i
] == 8);
271 static long dat_offs
[NDATS
]; /* data immediate value */
272 static long dat_locs
[NDATS
]; /* address of pointing ldr */
273 static char dat_names
[NDATS
][NAMELEN
]; /* relocation data symbol name */
276 static void pool_num(long num
)
280 dat_locs
[idx
] = cslen
;
283 static void pool_reloc(char *name
, long off
)
287 dat_locs
[idx
] = cslen
;
288 strcpy(dat_names
[idx
], name
);
291 static void pool_write(void)
294 for (i
= 0; i
< ndats
; i
++) {
296 long *loc
= (void *) cs
+ dat_locs
[i
];
297 int off
= cslen
- dat_locs
[i
] - 8;
298 reloc_abs(dat_names
[i
]);
299 /* ldrh needs special care */
300 if (*loc
& (1 << 26))
301 *loc
= (*loc
& 0xfffff000) | (off
& 0x00000fff);
303 *loc
= (*loc
& 0xfffff0f0) | (off
& 0x0f) |
310 static char *dpops
[] = {
311 "and", "eor", "sub", "rsb", "add", "adc", "sbc", "rsc",
312 "tst", "teq", "cmp", "cmn", "orr", "mov", "bic", "mvn"
315 static char *conds
[] = {
316 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
317 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
320 static char *regs
[] = {
321 "a1", "a2", "a3", "a4", "v1", "v2", "v3", "v4",
322 "v5", "v6", "v7", "v8", "ip", "sp", "lr", "pc"
325 static int get_reg(char *s
)
328 if (s
[0] == 'f' && s
[1] == 'p')
330 for (i
= 0; i
< 16; i
++)
331 if (TOK2(s
) == TOK2(regs
[i
]))
338 static void fill_buf(int fd
)
342 while ((nr
= read(fd
, buf
+ len
, sizeof(buf
) - len
- 1)) > 0)
347 static int tok_jmp(char *s
)
349 if (!strcmp(s
, tok_see())) {
356 static void die(char *msg
)
360 for (i
= 0; i
< cur
; i
++)
363 fprintf(stderr
, "%s:%d: %s\n", src
, lineno
, msg
);
367 static void tok_expect(char *s
)
369 if (strcmp(s
, tok_get()))
373 static int get_cond(char *s
)
376 if (s
[0] == 'h' && s
[1] == 's')
378 if (s
[0] == 'l' && s
[1] == 'o')
380 for (i
= 0; i
< 16; i
++)
381 if (TOK2(s
) == TOK2(conds
[i
]))
386 static int add_op(char *s
)
389 for (i
= 0; i
< 16; i
++)
390 if (TOK3(s
) == TOK3(dpops
[i
]))
395 static int shiftmode(char *s
)
397 if (TOK3(s
) == TOK3("lsl"))
399 if (TOK3(s
) == TOK3("lsr"))
401 if (TOK3(s
) == TOK3("asr"))
403 if (TOK3(s
) == TOK3("ror"))
408 static int ldr_word(void)
418 return (u
<< 23) | num(tok_get(), 12);
422 rm
= get_reg(tok_get());
424 sm
= shiftmode(tok_get());
426 shifts
= num(tok_get(), 8);
428 return (1 << 25) | (u
<< 23) | (shifts
<< 7) | (sm
<< 5) | rm
;
431 static int ldr_half(int s
, int h
)
434 int o
= 0x90 | (s
<< 6) | (h
<< 5);
436 return o
| (1 << 22);
439 n
= num(tok_get(), 8);
440 return o
| (1 << 22) | (u
<< 23) | (n
& 0x0f) | ((n
& 0xf0) << 4);
443 return o
| (u
<< 23) | get_reg(tok_get());
446 static long ldr_off(void)
451 off
-= num(tok_get(), 32);
455 off
+= num(tok_get(), 32);
464 * single data transfer:
465 * +------------------------------------------+
466 * |COND|01|I|P|U|B|W|L| Rn | Rd | offset |
467 * +------------------------------------------+
469 * I: immediate/offset
470 * P: post/pre indexing
476 * Rd: source/destination register
478 * I=1 offset=| immediate |
479 * I=0 offset=| shift | Rm |
481 * halfword and signed data transfer
482 * +----------------------------------------------+
483 * |COND|000|P|U|0|W|L| Rn | Rd |0000|1|S|H|1| Rm |
484 * +----------------------------------------------+
486 * +----------------------------------------------+
487 * |COND|000|P|U|1|W|L| Rn | Rd |off1|1|S|H|1|off2|
488 * +----------------------------------------------+
493 static int ldr(char *cmd
)
503 if (TOK3(cmd
) != TOK3("ldr") && TOK3(cmd
) != TOK3("str"))
505 if (TOK3(cmd
) == TOK3("ldr"))
507 cond
= get_cond(cmd
+ 3);
508 cmd
+= cond
< 0 ? 2 : 5;
521 rd
= get_reg(tok_get());
523 o
= (cond
<< 28) | (l
<< 20) | (rd
<< 12) | (half
<< 5) | (sign
<< 6);
527 o
|= (1 << 26) | (byte
<< 22);
532 strcpy(sym
, tok_case());
533 pool_reloc(sym
, ldr_off());
535 strcpy(sym
, tok_case());
536 reloc_rel(sym
, ldr_off(), (half
|| sign
) ? 8 : 12);
542 gen(o
| (1 << 23) | (1 << 24) | (rn
<< 16));
545 rn
= get_reg(tok_get());
548 gen(o
| (w
<< 21) | ((half
|| sign
) ? ldr_half(sign
, half
) :
552 o
|= (1 << 24) | ((half
|| sign
) ? ldr_half(sign
, half
) : ldr_word());
560 static int ldm_regs(void)
565 int r1
= get_reg(tok_get());
569 r2
= get_reg(tok_get());
570 for (i
= r1
; i
<= r2
; i
++)
579 static int ldm_type(char *s
, int l
)
583 if (*s
== 'i' || *s
== 'd') {
587 p
= s
[0] == (l
? 'e' : 'f');
588 u
= s
[1] == (l
? 'd' : 'a');
590 return (p
<< 24) | (u
<< 23);
594 * block data transfer
595 * +----------------------------------------+
596 * |COND|100|P|U|S|W|L| Rn | reg list |
597 * +----------------------------------------+
599 * P: post/pre indexing
606 static int ldm(char *cmd
)
610 int l
= 0, w
= 0, s
= 0;
612 if (TOK3(cmd
) != TOK3("ldm") && TOK3(cmd
) != TOK3("stm"))
614 if (TOK3(cmd
) == TOK3("ldm"))
616 cond
= get_cond(cmd
+ 3);
617 o
|= ldm_type(cond
< 0 ? cmd
+ 3 : cmd
+ 5, l
);
618 rn
= get_reg(tok_get());
627 gen(o
| (cond
<< 28) | (s
<< 22) | (w
<< 21) | (l
<< 20) | (rn
<< 16));
631 static int add_encimm(unsigned n
)
634 while (i
< 12 && (n
>> ((4 + i
) << 1)))
636 return (n
>> (i
<< 1)) | (((16 - i
) & 0x0f) << 8);
639 static long add_decimm(unsigned n
)
641 int rot
= (16 - ((n
>> 8) & 0x0f)) & 0x0f;
642 return (n
& 0xff) << (rot
<< 1);
645 static int add_op2(void)
649 long n
= num(tok_get(), 32);
650 long imm
= add_encimm(n
);
651 if (add_decimm(imm
) != n
)
652 die("cannot encode immediate");
653 return (1 << 25) | imm
;
655 rm
= get_reg(tok_get());
658 sm
= shiftmode(tok_get());
660 return (num(tok_get(), 4) << 7) | (sm
<< 5) | (rm
<< 0);
661 return (get_reg(tok_get()) << 8) | (sm
<< 5) | (1 << 4) | (rm
<< 0);
666 * +---------------------------------------+
667 * |COND|00|I| op |S| Rn | Rd | operand2 |
668 * +---------------------------------------+
670 * S: set condition code
672 * Rd: destination operand
674 * I=0 operand2=| shift | Rm |
675 * I=1 operand2=|rota| imm |
677 static int add(char *cmd
)
686 cond
= get_cond(cmd
+ 3);
687 s
= cmd
[cond
< 0 ? 3 : 6] == 's';
688 if (op
== 13 || op
== 15)
690 if ((op
& 0x0c) == 0x08)
694 if ((op
& 0xc) != 0x8) {
695 rd
= get_reg(tok_get());
699 rn
= get_reg(tok_get());
702 gen((cond
<< 28) | (s
<< 20) | (op
<< 21) | (rn
<< 16) | (rd
<< 12) | add_op2());
708 * +----------------------------------------+
709 * |COND|000000|A|S| Rd | Rn | Rs |1001| Rm |
710 * +----------------------------------------+
714 * C: set condition codes
716 * I=0 operand2=| shift | Rm |
717 * I=1 operand2=|rota| imm |
719 static int mul(char *cmd
)
722 int rd
, rm
, rs
, rn
= 0;
725 if (TOK3(cmd
) != TOK3("mul") && TOK3(cmd
) != TOK3("mla"))
727 if (TOK3(cmd
) == TOK3("mla"))
729 cond
= get_cond(cmd
+ 3);
730 s
= cmd
[cond
< 0 ? 3 : 6] == 's';
733 rd
= get_reg(tok_get());
735 rm
= get_reg(tok_get());
737 rs
= get_reg(tok_get());
740 rn
= get_reg(tok_get());
742 gen((cond
<< 28) | (a
<< 21) | (s
<< 20) | (rd
<< 16) |
743 (rn
<< 12) | (rs
<< 8) | (9 << 4) | (rm
<< 0));
748 * software interrupt:
749 * +----------------------------------+
751 * +----------------------------------+
754 static int swi(char *cmd
)
758 if (TOK3(cmd
) != TOK3("swi"))
760 cond
= get_cond(cmd
+ 3);
764 n
= num(tok_get(), 24);
765 gen((cond
<< 28) | (0xf << 24) | n
);
771 * +-----------------------------------+
772 * |COND|101|L| offset |
773 * +-----------------------------------+
777 static int bl(char *cmd
)
788 cond
= get_cond(cmd
);
791 strcpy(sym
, tok_case());
792 reloc_rel(sym
, ldr_off(), 24);
793 gen((cond
<< 28) | (5 << 25) | (l
<< 24));
798 * move PSR to a register
799 * +-------------------------------------+
800 * |COND|00010|P|001111| Rd |000000000000|
801 * +-------------------------------------+
803 * move a register to PSR
804 * +--------------------------------------+
805 * |COND|00|I|10|P|1010001111| source op |
806 * +--------------------------------------+
810 * I=0 source=|00000000| Rm |
811 * I=1 source=|rot | imm_u8 |
813 static int msr(char *cmd
)
818 static int directive(char *cmd
)
822 if (!strcmp(".extern", cmd
)) {
823 label_extern(tok_case());
825 if (!strcmp(".global", cmd
)) {
826 label_global(tok_case());
828 if (!strcmp(".word", cmd
)) {
831 reloc_abs(tok_case());
834 gen(num(tok_get(), 32));
836 } while (!tok_jmp(","));
841 static int stmt(void)
844 char first_case
[TOKLEN
];
845 strcpy(first
, tok_see());
846 strcpy(first_case
, tok_case());
849 label_local(first_case
);
852 if (!directive(first
))
871 int main(int argc
, char *argv
[])
876 while (i
< argc
&& argv
[i
][0] == '-') {
877 if (argv
[i
][1] == 'o')
878 strcpy(obj
, argv
[++i
]);
882 fprintf(stderr
, "neatcc: no file given\n");
885 strcpy(src
, argv
[i
]);
886 ifd
= open(src
, O_RDONLY
);
898 while (*s
&& *s
!= '.')
904 ofd
= open(obj
, O_WRONLY
| O_TRUNC
| O_CREAT
, 0600);
905 out_write(ofd
, cs
, cslen
, cs
, 0);