fix "add r0, r1, r2, lsl #1"
[neatas.git] / neatas.c
blob3fe812a775b11b901e9cd3fe7f3a4957163c8c84
1 /*
2 * neatas - a small arm assembler
4 * Copyright (C) 2011 Ali Gholami Rudi
6 * This program is released under GNU GPL version 2.
7 */
8 #include <ctype.h>
9 #include <fcntl.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <unistd.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 #include "out.h"
18 #define BUFSIZE (1 << 14)
19 #define TOKLEN 128
21 #define DELIMS ",:{}[]#=-+ \t\n/!^"
22 #define TOK2(a) ((a)[0] << 16 | (a)[1] << 8)
23 #define TOK3(a) ((a)[0] << 16 | (a)[1] << 8 | (a)[2])
25 static char buf[BUFSIZE];
26 static int cur;
28 static char cs[BUFSIZE];
29 static int cslen;
31 static void gen(unsigned long i)
33 memcpy(cs + cslen, &i, 4);
34 cslen += 4;
37 static int tok_read(char *s)
39 while (1) {
40 while (isspace(buf[cur]))
41 cur++;
42 if (buf[cur] == '/' && buf[cur + 1] == '*') {
43 while (buf[cur] && (buf[cur] != '*' || buf[cur + 1] != '/'))
44 cur++;
45 continue;
47 if (buf[cur] == ';' || buf[cur] == '@') {
48 while (buf[cur] && buf[cur] != '\n')
49 cur++;
50 continue;
52 break;
54 if (!strchr(DELIMS, buf[cur])) {
55 while (!strchr(DELIMS, buf[cur]))
56 *s++ = buf[cur++];
57 *s = '\0';
58 return 0;
60 s[0] = buf[cur++];
61 s[1] = '\0';
62 return s[0] != 0;
65 static char tok[TOKLEN];
66 static char tokc[TOKLEN];
67 static int tok_next;
69 /* next token in lower-case */
70 static char *tok_get(void)
72 char *s = tokc;
73 char *d = tok;
74 if (!tok_next) {
75 tok_read(tokc);
76 while (*s)
77 *d++ = tolower(*s++);
78 *d = '\0';
80 tok_next = 0;
81 return tok;
84 /* next token in original case */
85 static char *tok_case(void)
87 tok_get();
88 return tokc;
91 /* have a look at the next token */
92 static char *tok_see(void)
94 if (!tok_next)
95 tok_get();
96 tok_next = 1;
97 return tok;
100 static char *digs = "0123456789abcdef";
102 static long num(char *s, int bits)
104 int b = 10;
105 int neg = 0;
106 long n = 0;
107 if (*s == '-' || *s == '+') {
108 neg = *s == '-';
109 s++;
111 if (s[0] == '0' && s[1] == 'x') {
112 b = 16;
113 s += 2;
115 while (*s) {
116 int d = strchr(digs, *s) - digs;
117 n *= b;
118 n += d;
119 s++;
121 if (neg)
122 n = -n;
123 return bits < 32 ? n & ((1ul << bits) - 1) : n;
126 #define NLOCALS 1024
127 #define NEXTERNS 1024
128 #define NAMELEN 32
130 static char locals[NLOCALS][NAMELEN];
131 static char loffs[NLOCALS];
132 static int nlocals;
133 static char externs[NEXTERNS][NAMELEN];
134 static int nexterns;
135 static char globals[NEXTERNS][NAMELEN];
136 static int nglobals;
138 static void label_extern(char *name)
140 int idx = nexterns++;
141 strcpy(externs[idx], name);
144 static void label_global(char *name)
146 int idx = nglobals++;
147 strcpy(globals[idx], name);
150 static void label_local(char *name)
152 int idx = nlocals++;
153 strcpy(locals[idx], name);
154 loffs[idx] = cslen;
155 out_sym(locals[idx], OUT_CS, loffs[idx], 0);
158 static int label_isextern(char *name)
160 int i;
161 for (i = 0; i < nexterns; i++)
162 if (!strcmp(name, externs[i]))
163 return 1;
164 return 0;
167 static int label_offset(char *name)
169 int i;
170 for (i = 0; i < nlocals; i++)
171 if (!strcmp(name, locals[i]))
172 return loffs[i];
173 return 0;
176 static void label_write(void)
178 int i;
179 for (i = 0; i < nglobals; i++)
180 out_sym(globals[i], OUT_GLOB | OUT_CS,
181 label_offset(globals[i]), 0);
184 #define NRELOCS 1024
186 /* absolute relocations */
187 static char absns[NRELOCS][NAMELEN]; /* symbol name */
188 static long absos[NRELOCS]; /* relocation location */
189 static int nabs;
190 /* relative relocations */
191 static char relns[NRELOCS][NAMELEN]; /* symbol name */
192 static long relos[NRELOCS]; /* relocation location */
193 static long relas[NRELOCS]; /* relocation addend */
194 static long relbs[NRELOCS]; /* relocation bits: ldrh=8, 12=ldr, 24=bl */
195 static int nrel;
197 static void reloc_rel(char *name, long off, int bits)
199 int idx = nrel++;
200 strcpy(relns[idx], name);
201 relos[idx] = cslen;
202 relas[idx] = off;
203 relbs[idx] = bits;
206 static void reloc_abs(char *name)
208 int idx = nabs++;
209 strcpy(absns[idx], name);
210 absos[idx] = cslen;
213 #define CSBEG_NAME "__neatas_cs"
215 /* fill immediate value for bl instruction */
216 static void bl_imm(long *dst, long imm)
218 imm = ((*dst << 2) + imm) >> 2;
219 *dst = (*dst & 0xff000000) | (imm & 0x00ffffff);
222 /* fill immediate value for ldr instruction */
223 static void ldr_imm(long *dst, long imm, int half)
225 /* set u-bit for negative offsets */
226 if (imm < 0) {
227 *dst ^= (1 << 23);
228 imm = -imm;
230 if (!half)
231 *dst = (*dst & 0xfffff000) | ((*dst + imm) & 0x00000fff);
232 if (half)
233 *dst = (*dst & 0xfffff0f0) |
234 (imm & 0x0f) | ((imm & 0xf0) << 4);
237 static void reloc_write(void)
239 int i;
240 out_sym(CSBEG_NAME, OUT_CS, 0, 0);
241 for (i = 0; i < nabs; i++) {
242 if (label_isextern(absns[i])) {
243 out_rel(absns[i], OUT_CS, absos[i]);
244 } else {
245 long off = label_offset(absns[i]);
246 out_rel(CSBEG_NAME, OUT_CS, absos[i]);
247 *(long *) (cs + absos[i]) += off;
250 for (i = 0; i < nrel; i++) {
251 long *dst = (void *) cs + relos[i];
252 long off;
253 if (label_isextern(relns[i])) {
254 out_rel(relns[i], OUT_CS | OUT_REL24, relos[i]);
255 bl_imm(dst, relas[i] - 8);
256 continue;
258 off = relas[i] + label_offset(relns[i]) - relos[i] - 8;
259 /* bl instruction */
260 if (relbs[i] == 24)
261 bl_imm(dst, off);
262 else
263 ldr_imm(dst, off, relbs[i] == 8);
267 #define NDATS 1024
269 /* data pool */
270 static long dat_offs[NDATS]; /* data immediate value */
271 static long dat_locs[NDATS]; /* address of pointing ldr */
272 static char dat_names[NDATS][NAMELEN]; /* relocation data symbol name */
273 static int ndats;
275 static void pool_num(long num)
277 int idx = ndats++;
278 dat_offs[idx] = num;
279 dat_locs[idx] = cslen;
282 static void pool_reloc(char *name, long off)
284 int idx = ndats++;
285 dat_offs[idx] = off;
286 dat_locs[idx] = cslen;
287 strcpy(dat_names[idx], name);
290 static void pool_write(void)
292 int i;
293 for (i = 0; i < ndats; i++) {
294 if (dat_names[i]) {
295 long *loc = (void *) cs + dat_locs[i];
296 int off = cslen - dat_locs[i] - 8;
297 reloc_abs(dat_names[i]);
298 /* ldrh needs special care */
299 if (*loc & (1 << 26))
300 *loc = (*loc & 0xfffff000) | (off & 0x00000fff);
301 else
302 *loc = (*loc & 0xfffff0f0) | (off & 0x0f) |
303 ((off & 0xf0) << 4);
305 gen(dat_offs[i]);
309 static char *dpops[] = {
310 "and", "eor", "sub", "rsb", "add", "adc", "sbc", "rsc",
311 "tst", "teq", "cmp", "cmn", "orr", "mov", "bic", "mvn"
314 static char *conds[] = {
315 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
316 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
319 static char *regs[] = {
320 "a1", "a2", "a3", "a4", "v1", "v2", "v3", "v4",
321 "v5", "v6", "v7", "v8", "ip", "sp", "lr", "pc"
324 static int get_reg(char *s)
326 int i;
327 if (s[0] == 'f' && s[1] == 'p')
328 return 11;
329 for (i = 0; i < 16; i++)
330 if (TOK2(s) == TOK2(regs[i]))
331 return i;
332 if (s[0] == 'r')
333 return atoi(s + 1);
334 return -1;
337 static void fill_buf(int fd)
339 int len = 0;
340 int nr;
341 while ((nr = read(fd, buf + len, sizeof(buf) - len - 1)) > 0)
342 len += nr;
343 buf[len] = '\0';
346 static int tok_jmp(char *s)
348 if (!strcmp(s, tok_see())) {
349 tok_get();
350 return 0;
352 return 1;
355 static void tok_expect(char *s)
357 if (strcmp(s, tok_get())) {
358 fprintf(stderr, "syntax error\n");
359 exit(1);
363 static int get_cond(char *s)
365 int i;
366 if (s[0] == 'h' && s[1] == 's')
367 return 2;
368 if (s[0] == 'l' && s[1] == 'o')
369 return 3;
370 for (i = 0; i < 16; i++)
371 if (TOK2(s) == TOK2(conds[i]))
372 return i;
373 return -1;
376 static int add_op(char *s)
378 int i;
379 for (i = 0; i < 16; i++)
380 if (TOK3(s) == TOK3(dpops[i]))
381 return i;
382 return -1;
385 static int shiftmode(char *s)
387 if (TOK3(s) == TOK3("lsl"))
388 return 0;
389 if (TOK3(s) == TOK3("lsr"))
390 return 1;
391 if (TOK3(s) == TOK3("asr"))
392 return 2;
393 if (TOK3(s) == TOK3("ror"))
394 return 3;
395 return 0;
398 static int ldr_word(void)
400 int sm = 0;
401 int rm;
402 int shifts = 0;
403 int u = 1;
404 if (tok_jmp(","))
405 return 0;
406 if (!tok_jmp("#")) {
407 u = tok_jmp("-");
408 return (u << 23) | num(tok_get(), 12);
410 if (!tok_jmp("-"))
411 u = 0;
412 rm = get_reg(tok_get());
413 if (!tok_jmp(",")) {
414 sm = shiftmode(tok_get());
415 tok_expect("#");
416 shifts = num(tok_get(), 8);
418 return (1 << 25) | (u << 23) | (shifts << 7) | (sm << 5) | rm;
421 static int ldr_half(int s, int h)
423 int u, n;
424 int o = 0x90 | (s << 6) | (h << 5);
425 if (tok_jmp(","))
426 return o | (1 << 22);
427 if (!tok_jmp("#")) {
428 u = tok_jmp("-");
429 n = num(tok_get(), 8);
430 return o | (1 << 22) | (u << 23) | (n & 0x0f) | ((n & 0xf0) << 4);
432 u = tok_jmp("-");
433 return o | (u << 23) | get_reg(tok_get());
436 static long ldr_off(void)
438 long off = 0;
439 while (1) {
440 if (!tok_jmp("-")) {
441 off -= num(tok_get(), 32);
442 continue;
444 if (!tok_jmp("+")) {
445 off += num(tok_get(), 32);
446 continue;
448 break;
450 return off;
454 * single data transfer:
455 * +------------------------------------------+
456 * |COND|01|I|P|U|B|W|L| Rn | Rd | offset |
457 * +------------------------------------------+
459 * I: immediate/offset
460 * P: post/pre indexing
461 * U: down/up
462 * B: byte/word
463 * W: writeback
464 * L: store/load
465 * Rn: base register
466 * Rd: source/destination register
468 * I=1 offset=| immediate |
469 * I=0 offset=| shift | Rm |
471 * halfword and signed data transfer
472 * +----------------------------------------------+
473 * |COND|000|P|U|0|W|L| Rn | Rd |0000|1|S|H|1| Rm |
474 * +----------------------------------------------+
476 * +----------------------------------------------+
477 * |COND|000|P|U|1|W|L| Rn | Rd |off1|1|S|H|1|off2|
478 * +----------------------------------------------+
480 * S: singed
481 * H: halfword
483 static int ldr(char *cmd)
485 int l = 0;
486 int rd, rn;
487 int cond;
488 int w = 0;
489 int sign = 0;
490 int byte = 0;
491 int half = 0;
492 int o;
493 if (TOK3(cmd) != TOK3("ldr") && TOK3(cmd) != TOK3("str"))
494 return 1;
495 if (TOK3(cmd) == TOK3("ldr"))
496 l = 1;
497 cond = get_cond(cmd + 3);
498 cmd += cond < 0 ? 2 : 5;
499 if (cond < 0)
500 cond = 14;
501 while (*++cmd) {
502 if (*cmd == 't')
503 w = 1;
504 if (*cmd == 'b')
505 byte = 1;
506 if (*cmd == 'h')
507 half = 1;
508 if (*cmd == 's')
509 sign = 1;
511 rd = get_reg(tok_get());
512 tok_expect(",");
513 o = (cond << 28) | (l << 20) | (rd << 12) | (half << 5) | (sign << 6);
514 if (half || sign)
515 o |= 0x90;
516 else
517 o |= (1 << 26);
518 if (tok_jmp("[")) {
519 char sym[NAMELEN];
520 rn = 15;
521 if (!tok_jmp("=")) {
522 strcpy(sym, tok_case());
523 pool_reloc(sym, ldr_off());
524 } else {
525 strcpy(sym, tok_case());
526 reloc_rel(sym, ldr_off(), (half || sign) ? 8 : 12);
528 if (half || sign)
529 o |= (1 << 22);
530 else
531 o |= (1 << 26);
532 gen(o | (1 << 23) | (1 << 24) | (rn << 16));
533 return 0;
535 rn = get_reg(tok_get());
536 o |= (rn << 16);
537 if (!tok_jmp("]")) {
538 gen(o | (w << 21) | ((half || sign) ? ldr_half(sign, half) :
539 ldr_word()));
540 return 0;
542 o |= (1 << 24) | ((half || sign) ? ldr_half(sign, half) : ldr_word());
543 tok_expect("]");
544 if (!tok_jmp("!"))
545 o |= (1 << 21);
546 gen(o);
547 return 0;
550 static int ldm_regs(void)
552 int o = 0;
553 tok_expect("{");
554 while (1) {
555 int r1 = get_reg(tok_get());
556 int r2 = r1;
557 int i;
558 if (!tok_jmp("-"))
559 r2 = get_reg(tok_get());
560 for (i = r1; i <= r2; i++)
561 o |= (1 << i);
562 if (tok_jmp(","))
563 break;
565 tok_expect("}");
566 return o;
569 static int ldm_type(char *s, int l)
571 int p = 0;
572 int u = 0;
573 if (*s == 'i' || *s == 'd') {
574 p = s[0] == 'i';
575 u = s[1] == 'b';
576 } else {
577 p = s[0] == (l ? 'e' : 'f');
578 u = s[1] == (l ? 'd' : 'a');
580 return (p << 24) | (u << 23);
584 * block data transfer
585 * +----------------------------------------+
586 * |COND|100|P|U|S|W|L| Rn | reg list |
587 * +----------------------------------------+
589 * P: post/pre indexing
590 * U: down/up
591 * S: PSR/user bit
592 * W: write back
593 * L: load/store
594 * Rn: base register
596 static int ldm(char *cmd)
598 int rn;
599 int cond;
600 int l = 0, w = 0, s = 0;
601 int o = 4 << 25;
602 if (TOK3(cmd) != TOK3("ldm") && TOK3(cmd) != TOK3("stm"))
603 return 1;
604 if (TOK3(cmd) == TOK3("ldm"))
605 l = 1;
606 cond = get_cond(cmd + 3);
607 o |= ldm_type(cond < 0 ? cmd + 3 : cmd + 5, l);
608 rn = get_reg(tok_get());
609 if (!tok_jmp("!"))
610 w = 1;
611 tok_expect(",");
612 if (cond < 0)
613 cond = 14;
614 o |= ldm_regs();
615 if (!tok_jmp("^"))
616 s = 1;
617 gen(o | (cond << 28) | (s << 22) | (w << 21) | (l << 20) | (rn << 16));
618 return 0;
621 static int add_op2(void)
623 int sm, rm;
624 if (!tok_jmp("#"))
625 return (1 << 25) | num(tok_get(), 8);
626 rm = get_reg(tok_get());
627 if (tok_jmp(","))
628 return rm;
629 sm = shiftmode(tok_get());
630 if (!tok_jmp("#"))
631 return (num(tok_get(), 4) << 7) | (sm << 5) | (rm << 0);
632 return (get_reg(tok_get()) << 8) | (sm << 5) | (1 << 4) | (rm << 0);
636 * data processing:
637 * +---------------------------------------+
638 * |COND|00|I| op |S| Rn | Rd | operand2 |
639 * +---------------------------------------+
641 * S: set condition code
642 * Rn: first operand
643 * Rd: destination operand
645 * I=0 operand2=| shift | Rm |
646 * I=1 operand2=|rota| imm |
648 static int add(char *cmd)
650 int op, cond;
651 int rd = 0, rn = 0;
652 int nops = 2;
653 int s = 0;
654 op = add_op(cmd);
655 if (op < 0)
656 return 1;
657 cond = get_cond(cmd + 3);
658 s = cmd[cond < 0 ? 3 : 6] == 's';
659 if (op == 13 || op == 15)
660 nops = 1;
661 if ((op & 0x0c) == 0x08)
662 s = 1;
663 if (cond < 0)
664 cond = 14;
665 if ((op & 0xc) != 0x8) {
666 rd = get_reg(tok_get());
667 tok_expect(",");
669 if (nops > 1) {
670 rn = get_reg(tok_get());
671 tok_expect(",");
673 gen((cond << 28) | (s << 20) | (op << 21) | (rn << 16) | (rd << 12) | add_op2());
674 return 0;
678 * multiply
679 * +----------------------------------------+
680 * |COND|000000|A|S| Rd | Rn | Rs |1001| Rm |
681 * +----------------------------------------+
683 * Rd: destination
684 * A: accumulate
685 * C: set condition codes
687 * I=0 operand2=| shift | Rm |
688 * I=1 operand2=|rota| imm |
690 static int mul(char *cmd)
692 int cond;
693 int rd, rm, rs, rn = 0;
694 int s = 0;
695 int a = 0;
696 if (TOK3(cmd) != TOK3("mul") && TOK3(cmd) != TOK3("mla"))
697 return 1;
698 if (TOK3(cmd) == TOK3("mla"))
699 a = 1;
700 cond = get_cond(cmd + 3);
701 s = cmd[cond < 0 ? 3 : 6] == 's';
702 if (cond < 0)
703 cond = 14;
704 rd = get_reg(tok_get());
705 tok_expect(",");
706 rm = get_reg(tok_get());
707 tok_expect(",");
708 rs = get_reg(tok_get());
709 if (a) {
710 tok_expect(",");
711 rn = get_reg(tok_get());
713 gen((cond << 28) | (a << 21) | (s << 20) | (rd << 16) |
714 (rn << 12) | (rs << 8) | (9 << 4) | (rm << 0));
715 return 0;
719 * software interrupt:
720 * +----------------------------------+
721 * |COND|1111| |
722 * +----------------------------------+
725 static int swi(char *cmd)
727 int n;
728 int cond;
729 if (TOK3(cmd) != TOK3("swi"))
730 return 1;
731 cond = get_cond(cmd + 3);
732 if (cond == -1)
733 cond = 14;
734 tok_jmp("#");
735 n = num(tok_get(), 24);
736 gen((cond << 28) | (0xf << 24) | n);
737 return 0;
741 * branch:
742 * +-----------------------------------+
743 * |COND|101|L| offset |
744 * +-----------------------------------+
746 * L: link
748 static int bl(char *cmd)
750 int l = 0;
751 int cond;
752 char sym[NAMELEN];
753 if (*cmd++ != 'b')
754 return 1;
755 if (*cmd == 'l') {
756 l = 1;
757 cmd++;
759 cond = get_cond(cmd);
760 if (cond == -1)
761 cond = 14;
762 strcpy(sym, tok_case());
763 reloc_rel(sym, ldr_off(), 24);
764 gen((cond << 28) | (5 << 25) | (l << 24));
765 return 0;
769 * move PSR to a register
770 * +-------------------------------------+
771 * |COND|00010|P|001111| Rd |000000000000|
772 * +-------------------------------------+
774 * move a register to PSR
775 * +--------------------------------------+
776 * |COND|00|I|10|P|1010001111| source op |
777 * +--------------------------------------+
779 * P: CPSR/SPSR_cur
781 * I=0 source=|00000000| Rm |
782 * I=1 source=|rot | imm_u8 |
784 static int msr(char *cmd)
786 return 1;
789 static int directive(char *cmd)
791 if (cmd[0] != '.')
792 return 1;
793 if (!strcmp(".extern", cmd)) {
794 label_extern(tok_case());
796 if (!strcmp(".global", cmd)) {
797 label_global(tok_case());
799 if (!strcmp(".word", cmd)) {
800 do {
801 if (!tok_jmp("=")) {
802 reloc_abs(tok_case());
803 gen(ldr_off());
804 } else {
805 gen(num(tok_get(), 32));
807 } while (!tok_jmp(","));
809 return 0;
812 static int stmt(void)
814 char first[TOKLEN];
815 char first_case[TOKLEN];
816 strcpy(first, tok_see());
817 strcpy(first_case, tok_case());
818 /* a label */
819 if (!tok_jmp(":")) {
820 label_local(first_case);
821 return 0;
823 if (!directive(first))
824 return 0;
825 if (!add(first))
826 return 0;
827 if (!mul(first))
828 return 0;
829 if (!ldr(first))
830 return 0;
831 if (!ldm(first))
832 return 0;
833 if (!msr(first))
834 return 0;
835 if (!swi(first))
836 return 0;
837 if (!bl(first))
838 return 0;
839 return 1;
842 int main(int argc, char *argv[])
844 char obj[128] = "";
845 char *src;
846 int ofd, ifd;
847 int i = 1;
848 while (i < argc && argv[i][0] == '-') {
849 if (argv[i][1] == 'o')
850 strcpy(obj, argv[++i]);
851 i++;
853 if (i == argc) {
854 fprintf(stderr, "neatcc: no file given\n");
855 return 1;
857 src = argv[i];
858 ifd = open(src, O_RDONLY);
859 fill_buf(ifd);
860 close(ifd);
861 out_init(0);
862 while (!stmt())
864 label_write();
865 pool_write();
866 reloc_write();
867 if (!*obj) {
868 char *s = obj;
869 strcpy(obj, src);
870 while (*s && *s != '.')
871 s++;
872 *s++ = '.';
873 *s++ = 'o';
874 *s++ = '\0';
876 ofd = open(obj, O_WRONLY | O_TRUNC | O_CREAT, 0600);
877 out_write(ofd, cs, cslen, cs, 0);
878 close(ofd);
879 return 0;