exit if instruction immediate cannot be encoded
[neatas.git] / neatas.c
blob2d5a056669cd2723cb3a1805f0ce8d73b56aca90
1 /*
2 * neatas - a small arm assembler
4 * Copyright (C) 2011 Ali Gholami Rudi
6 * This program is released under GNU GPL version 2.
7 */
8 #include <ctype.h>
9 #include <fcntl.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <unistd.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 #include "out.h"
18 #define BUFSIZE (1 << 14)
19 #define TOKLEN 128
21 #define DELIMS ",:{}[]#=-+ \t\n/!^"
22 #define TOK2(a) ((a)[0] << 16 | (a)[1] << 8)
23 #define TOK3(a) ((a)[0] << 16 | (a)[1] << 8 | (a)[2])
25 static char src[256];
26 static char buf[BUFSIZE];
27 static int cur;
29 static char cs[BUFSIZE];
30 static int cslen;
32 static void gen(unsigned long i)
34 memcpy(cs + cslen, &i, 4);
35 cslen += 4;
38 static int tok_read(char *s)
40 while (1) {
41 while (isspace(buf[cur]))
42 cur++;
43 if (buf[cur] == '/' && buf[cur + 1] == '*') {
44 while (buf[cur] && (buf[cur] != '*' || buf[cur + 1] != '/'))
45 cur++;
46 continue;
48 if (buf[cur] == ';' || buf[cur] == '@') {
49 while (buf[cur] && buf[cur] != '\n')
50 cur++;
51 continue;
53 break;
55 if (!strchr(DELIMS, buf[cur])) {
56 while (!strchr(DELIMS, buf[cur]))
57 *s++ = buf[cur++];
58 *s = '\0';
59 return 0;
61 s[0] = buf[cur++];
62 s[1] = '\0';
63 return s[0] != 0;
66 static char tok[TOKLEN];
67 static char tokc[TOKLEN];
68 static int tok_next;
70 /* next token in lower-case */
71 static char *tok_get(void)
73 char *s = tokc;
74 char *d = tok;
75 if (!tok_next) {
76 tok_read(tokc);
77 while (*s)
78 *d++ = tolower(*s++);
79 *d = '\0';
81 tok_next = 0;
82 return tok;
85 /* next token in original case */
86 static char *tok_case(void)
88 tok_get();
89 return tokc;
92 /* have a look at the next token */
93 static char *tok_see(void)
95 if (!tok_next)
96 tok_get();
97 tok_next = 1;
98 return tok;
101 static char *digs = "0123456789abcdef";
103 static long num(char *s, int bits)
105 int b = 10;
106 int neg = 0;
107 long n = 0;
108 if (*s == '-' || *s == '+') {
109 neg = *s == '-';
110 s++;
112 if (s[0] == '0' && s[1] == 'x') {
113 b = 16;
114 s += 2;
116 while (*s) {
117 int d = strchr(digs, *s) - digs;
118 n *= b;
119 n += d;
120 s++;
122 if (neg)
123 n = -n;
124 return bits < 32 ? n & ((1ul << bits) - 1) : n;
127 #define NLOCALS 1024
128 #define NEXTERNS 1024
129 #define NAMELEN 32
131 static char locals[NLOCALS][NAMELEN];
132 static char loffs[NLOCALS];
133 static int nlocals;
134 static char externs[NEXTERNS][NAMELEN];
135 static int nexterns;
136 static char globals[NEXTERNS][NAMELEN];
137 static int nglobals;
139 static void label_extern(char *name)
141 int idx = nexterns++;
142 strcpy(externs[idx], name);
145 static void label_global(char *name)
147 int idx = nglobals++;
148 strcpy(globals[idx], name);
151 static void label_local(char *name)
153 int idx = nlocals++;
154 strcpy(locals[idx], name);
155 loffs[idx] = cslen;
156 out_sym(locals[idx], OUT_CS, loffs[idx], 0);
159 static int label_isextern(char *name)
161 int i;
162 for (i = 0; i < nexterns; i++)
163 if (!strcmp(name, externs[i]))
164 return 1;
165 return 0;
168 static int label_offset(char *name)
170 int i;
171 for (i = 0; i < nlocals; i++)
172 if (!strcmp(name, locals[i]))
173 return loffs[i];
174 return 0;
177 static void label_write(void)
179 int i;
180 for (i = 0; i < nglobals; i++)
181 out_sym(globals[i], OUT_GLOB | OUT_CS,
182 label_offset(globals[i]), 0);
185 #define NRELOCS 1024
187 /* absolute relocations */
188 static char absns[NRELOCS][NAMELEN]; /* symbol name */
189 static long absos[NRELOCS]; /* relocation location */
190 static int nabs;
191 /* relative relocations */
192 static char relns[NRELOCS][NAMELEN]; /* symbol name */
193 static long relos[NRELOCS]; /* relocation location */
194 static long relas[NRELOCS]; /* relocation addend */
195 static long relbs[NRELOCS]; /* relocation bits: ldrh=8, 12=ldr, 24=bl */
196 static int nrel;
198 static void reloc_rel(char *name, long off, int bits)
200 int idx = nrel++;
201 strcpy(relns[idx], name);
202 relos[idx] = cslen;
203 relas[idx] = off;
204 relbs[idx] = bits;
207 static void reloc_abs(char *name)
209 int idx = nabs++;
210 strcpy(absns[idx], name);
211 absos[idx] = cslen;
214 #define CSBEG_NAME "__neatas_cs"
216 /* fill immediate value for bl instruction */
217 static void bl_imm(long *dst, long imm)
219 imm = ((*dst << 2) + imm) >> 2;
220 *dst = (*dst & 0xff000000) | (imm & 0x00ffffff);
223 /* fill immediate value for ldr instruction */
224 static void ldr_imm(long *dst, long imm, int half)
226 /* set u-bit for negative offsets */
227 if (imm < 0) {
228 *dst ^= (1 << 23);
229 imm = -imm;
231 if (!half)
232 *dst = (*dst & 0xfffff000) | ((*dst + imm) & 0x00000fff);
233 if (half)
234 *dst = (*dst & 0xfffff0f0) |
235 (imm & 0x0f) | ((imm & 0xf0) << 4);
238 static void reloc_write(void)
240 int i;
241 out_sym(CSBEG_NAME, OUT_CS, 0, 0);
242 for (i = 0; i < nabs; i++) {
243 if (label_isextern(absns[i])) {
244 out_rel(absns[i], OUT_CS, absos[i]);
245 } else {
246 long off = label_offset(absns[i]);
247 out_rel(CSBEG_NAME, OUT_CS, absos[i]);
248 *(long *) (cs + absos[i]) += off;
251 for (i = 0; i < nrel; i++) {
252 long *dst = (void *) cs + relos[i];
253 long off;
254 if (label_isextern(relns[i])) {
255 out_rel(relns[i], OUT_CS | OUT_REL24, relos[i]);
256 bl_imm(dst, relas[i] - 8);
257 continue;
259 off = relas[i] + label_offset(relns[i]) - relos[i] - 8;
260 /* bl instruction */
261 if (relbs[i] == 24)
262 bl_imm(dst, off);
263 else
264 ldr_imm(dst, off, relbs[i] == 8);
268 #define NDATS 1024
270 /* data pool */
271 static long dat_offs[NDATS]; /* data immediate value */
272 static long dat_locs[NDATS]; /* address of pointing ldr */
273 static char dat_names[NDATS][NAMELEN]; /* relocation data symbol name */
274 static int ndats;
276 static void pool_num(long num)
278 int idx = ndats++;
279 dat_offs[idx] = num;
280 dat_locs[idx] = cslen;
283 static void pool_reloc(char *name, long off)
285 int idx = ndats++;
286 dat_offs[idx] = off;
287 dat_locs[idx] = cslen;
288 strcpy(dat_names[idx], name);
291 static void pool_write(void)
293 int i;
294 for (i = 0; i < ndats; i++) {
295 if (dat_names[i]) {
296 long *loc = (void *) cs + dat_locs[i];
297 int off = cslen - dat_locs[i] - 8;
298 reloc_abs(dat_names[i]);
299 /* ldrh needs special care */
300 if (*loc & (1 << 26))
301 *loc = (*loc & 0xfffff000) | (off & 0x00000fff);
302 else
303 *loc = (*loc & 0xfffff0f0) | (off & 0x0f) |
304 ((off & 0xf0) << 4);
306 gen(dat_offs[i]);
310 static char *dpops[] = {
311 "and", "eor", "sub", "rsb", "add", "adc", "sbc", "rsc",
312 "tst", "teq", "cmp", "cmn", "orr", "mov", "bic", "mvn"
315 static char *conds[] = {
316 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
317 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
320 static char *regs[] = {
321 "a1", "a2", "a3", "a4", "v1", "v2", "v3", "v4",
322 "v5", "v6", "v7", "v8", "ip", "sp", "lr", "pc"
325 static int get_reg(char *s)
327 int i;
328 if (s[0] == 'f' && s[1] == 'p')
329 return 11;
330 for (i = 0; i < 16; i++)
331 if (TOK2(s) == TOK2(regs[i]))
332 return i;
333 if (s[0] == 'r')
334 return atoi(s + 1);
335 return -1;
338 static void fill_buf(int fd)
340 int len = 0;
341 int nr;
342 while ((nr = read(fd, buf + len, sizeof(buf) - len - 1)) > 0)
343 len += nr;
344 buf[len] = '\0';
347 static int tok_jmp(char *s)
349 if (!strcmp(s, tok_see())) {
350 tok_get();
351 return 0;
353 return 1;
356 static void die(char *msg)
358 int lineno = 1;
359 int i;
360 for (i = 0; i < cur; i++)
361 if (buf[i] == '\n')
362 lineno++;
363 fprintf(stderr, "%s:%d: %s\n", src, lineno, msg);
364 exit(1);
367 static void tok_expect(char *s)
369 if (strcmp(s, tok_get()))
370 die("syntax error");
373 static int get_cond(char *s)
375 int i;
376 if (s[0] == 'h' && s[1] == 's')
377 return 2;
378 if (s[0] == 'l' && s[1] == 'o')
379 return 3;
380 for (i = 0; i < 16; i++)
381 if (TOK2(s) == TOK2(conds[i]))
382 return i;
383 return -1;
386 static int add_op(char *s)
388 int i;
389 for (i = 0; i < 16; i++)
390 if (TOK3(s) == TOK3(dpops[i]))
391 return i;
392 return -1;
395 static int shiftmode(char *s)
397 if (TOK3(s) == TOK3("lsl"))
398 return 0;
399 if (TOK3(s) == TOK3("lsr"))
400 return 1;
401 if (TOK3(s) == TOK3("asr"))
402 return 2;
403 if (TOK3(s) == TOK3("ror"))
404 return 3;
405 return 0;
408 static int ldr_word(void)
410 int sm = 0;
411 int rm;
412 int shifts = 0;
413 int u = 1;
414 if (tok_jmp(","))
415 return 0;
416 if (!tok_jmp("#")) {
417 u = tok_jmp("-");
418 return (u << 23) | num(tok_get(), 12);
420 if (!tok_jmp("-"))
421 u = 0;
422 rm = get_reg(tok_get());
423 if (!tok_jmp(",")) {
424 sm = shiftmode(tok_get());
425 tok_expect("#");
426 shifts = num(tok_get(), 8);
428 return (1 << 25) | (u << 23) | (shifts << 7) | (sm << 5) | rm;
431 static int ldr_half(int s, int h)
433 int u, n;
434 int o = 0x90 | (s << 6) | (h << 5);
435 if (tok_jmp(","))
436 return o | (1 << 22);
437 if (!tok_jmp("#")) {
438 u = tok_jmp("-");
439 n = num(tok_get(), 8);
440 return o | (1 << 22) | (u << 23) | (n & 0x0f) | ((n & 0xf0) << 4);
442 u = tok_jmp("-");
443 return o | (u << 23) | get_reg(tok_get());
446 static long ldr_off(void)
448 long off = 0;
449 while (1) {
450 if (!tok_jmp("-")) {
451 off -= num(tok_get(), 32);
452 continue;
454 if (!tok_jmp("+")) {
455 off += num(tok_get(), 32);
456 continue;
458 break;
460 return off;
464 * single data transfer:
465 * +------------------------------------------+
466 * |COND|01|I|P|U|B|W|L| Rn | Rd | offset |
467 * +------------------------------------------+
469 * I: immediate/offset
470 * P: post/pre indexing
471 * U: down/up
472 * B: byte/word
473 * W: writeback
474 * L: store/load
475 * Rn: base register
476 * Rd: source/destination register
478 * I=1 offset=| immediate |
479 * I=0 offset=| shift | Rm |
481 * halfword and signed data transfer
482 * +----------------------------------------------+
483 * |COND|000|P|U|0|W|L| Rn | Rd |0000|1|S|H|1| Rm |
484 * +----------------------------------------------+
486 * +----------------------------------------------+
487 * |COND|000|P|U|1|W|L| Rn | Rd |off1|1|S|H|1|off2|
488 * +----------------------------------------------+
490 * S: singed
491 * H: halfword
493 static int ldr(char *cmd)
495 int l = 0;
496 int rd, rn;
497 int cond;
498 int w = 0;
499 int sign = 0;
500 int byte = 0;
501 int half = 0;
502 int o;
503 if (TOK3(cmd) != TOK3("ldr") && TOK3(cmd) != TOK3("str"))
504 return 1;
505 if (TOK3(cmd) == TOK3("ldr"))
506 l = 1;
507 cond = get_cond(cmd + 3);
508 cmd += cond < 0 ? 2 : 5;
509 if (cond < 0)
510 cond = 14;
511 while (*++cmd) {
512 if (*cmd == 't')
513 w = 1;
514 if (*cmd == 'b')
515 byte = 1;
516 if (*cmd == 'h')
517 half = 1;
518 if (*cmd == 's')
519 sign = 1;
521 rd = get_reg(tok_get());
522 tok_expect(",");
523 o = (cond << 28) | (l << 20) | (rd << 12) | (half << 5) | (sign << 6);
524 if (half || sign)
525 o |= 0x90;
526 else
527 o |= (1 << 26) | (byte << 22);
528 if (tok_jmp("[")) {
529 char sym[NAMELEN];
530 rn = 15;
531 if (!tok_jmp("=")) {
532 strcpy(sym, tok_case());
533 pool_reloc(sym, ldr_off());
534 } else {
535 strcpy(sym, tok_case());
536 reloc_rel(sym, ldr_off(), (half || sign) ? 8 : 12);
538 if (half || sign)
539 o |= (1 << 22);
540 else
541 o |= (1 << 26);
542 gen(o | (1 << 23) | (1 << 24) | (rn << 16));
543 return 0;
545 rn = get_reg(tok_get());
546 o |= (rn << 16);
547 if (!tok_jmp("]")) {
548 gen(o | (w << 21) | ((half || sign) ? ldr_half(sign, half) :
549 ldr_word()));
550 return 0;
552 o |= (1 << 24) | ((half || sign) ? ldr_half(sign, half) : ldr_word());
553 tok_expect("]");
554 if (!tok_jmp("!"))
555 o |= (1 << 21);
556 gen(o);
557 return 0;
560 static int ldm_regs(void)
562 int o = 0;
563 tok_expect("{");
564 while (1) {
565 int r1 = get_reg(tok_get());
566 int r2 = r1;
567 int i;
568 if (!tok_jmp("-"))
569 r2 = get_reg(tok_get());
570 for (i = r1; i <= r2; i++)
571 o |= (1 << i);
572 if (tok_jmp(","))
573 break;
575 tok_expect("}");
576 return o;
579 static int ldm_type(char *s, int l)
581 int p = 0;
582 int u = 0;
583 if (*s == 'i' || *s == 'd') {
584 p = s[0] == 'i';
585 u = s[1] == 'b';
586 } else {
587 p = s[0] == (l ? 'e' : 'f');
588 u = s[1] == (l ? 'd' : 'a');
590 return (p << 24) | (u << 23);
594 * block data transfer
595 * +----------------------------------------+
596 * |COND|100|P|U|S|W|L| Rn | reg list |
597 * +----------------------------------------+
599 * P: post/pre indexing
600 * U: down/up
601 * S: PSR/user bit
602 * W: write back
603 * L: load/store
604 * Rn: base register
606 static int ldm(char *cmd)
608 int rn;
609 int cond;
610 int l = 0, w = 0, s = 0;
611 int o = 4 << 25;
612 if (TOK3(cmd) != TOK3("ldm") && TOK3(cmd) != TOK3("stm"))
613 return 1;
614 if (TOK3(cmd) == TOK3("ldm"))
615 l = 1;
616 cond = get_cond(cmd + 3);
617 o |= ldm_type(cond < 0 ? cmd + 3 : cmd + 5, l);
618 rn = get_reg(tok_get());
619 if (!tok_jmp("!"))
620 w = 1;
621 tok_expect(",");
622 if (cond < 0)
623 cond = 14;
624 o |= ldm_regs();
625 if (!tok_jmp("^"))
626 s = 1;
627 gen(o | (cond << 28) | (s << 22) | (w << 21) | (l << 20) | (rn << 16));
628 return 0;
631 static int add_encimm(unsigned n)
633 int i = 0;
634 while (i < 12 && (n >> ((4 + i) << 1)))
635 i++;
636 return (n >> (i << 1)) | (((16 - i) & 0x0f) << 8);
639 static long add_decimm(unsigned n)
641 int rot = (16 - ((n >> 8) & 0x0f)) & 0x0f;
642 return (n & 0xff) << (rot << 1);
645 static int add_op2(void)
647 int sm, rm;
648 if (!tok_jmp("#")) {
649 long n = num(tok_get(), 32);
650 long imm = add_encimm(n);
651 if (add_decimm(imm) != n)
652 die("cannot encode immediate");
653 return (1 << 25) | imm;
655 rm = get_reg(tok_get());
656 if (tok_jmp(","))
657 return rm;
658 sm = shiftmode(tok_get());
659 if (!tok_jmp("#"))
660 return (num(tok_get(), 4) << 7) | (sm << 5) | (rm << 0);
661 return (get_reg(tok_get()) << 8) | (sm << 5) | (1 << 4) | (rm << 0);
665 * data processing:
666 * +---------------------------------------+
667 * |COND|00|I| op |S| Rn | Rd | operand2 |
668 * +---------------------------------------+
670 * S: set condition code
671 * Rn: first operand
672 * Rd: destination operand
674 * I=0 operand2=| shift | Rm |
675 * I=1 operand2=|rota| imm |
677 static int add(char *cmd)
679 int op, cond;
680 int rd = 0, rn = 0;
681 int nops = 2;
682 int s = 0;
683 op = add_op(cmd);
684 if (op < 0)
685 return 1;
686 cond = get_cond(cmd + 3);
687 s = cmd[cond < 0 ? 3 : 6] == 's';
688 if (op == 13 || op == 15)
689 nops = 1;
690 if ((op & 0x0c) == 0x08)
691 s = 1;
692 if (cond < 0)
693 cond = 14;
694 if ((op & 0xc) != 0x8) {
695 rd = get_reg(tok_get());
696 tok_expect(",");
698 if (nops > 1) {
699 rn = get_reg(tok_get());
700 tok_expect(",");
702 gen((cond << 28) | (s << 20) | (op << 21) | (rn << 16) | (rd << 12) | add_op2());
703 return 0;
707 * multiply
708 * +----------------------------------------+
709 * |COND|000000|A|S| Rd | Rn | Rs |1001| Rm |
710 * +----------------------------------------+
712 * Rd: destination
713 * A: accumulate
714 * C: set condition codes
716 * I=0 operand2=| shift | Rm |
717 * I=1 operand2=|rota| imm |
719 static int mul(char *cmd)
721 int cond;
722 int rd, rm, rs, rn = 0;
723 int s = 0;
724 int a = 0;
725 if (TOK3(cmd) != TOK3("mul") && TOK3(cmd) != TOK3("mla"))
726 return 1;
727 if (TOK3(cmd) == TOK3("mla"))
728 a = 1;
729 cond = get_cond(cmd + 3);
730 s = cmd[cond < 0 ? 3 : 6] == 's';
731 if (cond < 0)
732 cond = 14;
733 rd = get_reg(tok_get());
734 tok_expect(",");
735 rm = get_reg(tok_get());
736 tok_expect(",");
737 rs = get_reg(tok_get());
738 if (a) {
739 tok_expect(",");
740 rn = get_reg(tok_get());
742 gen((cond << 28) | (a << 21) | (s << 20) | (rd << 16) |
743 (rn << 12) | (rs << 8) | (9 << 4) | (rm << 0));
744 return 0;
748 * software interrupt:
749 * +----------------------------------+
750 * |COND|1111| |
751 * +----------------------------------+
754 static int swi(char *cmd)
756 int n;
757 int cond;
758 if (TOK3(cmd) != TOK3("swi"))
759 return 1;
760 cond = get_cond(cmd + 3);
761 if (cond == -1)
762 cond = 14;
763 tok_jmp("#");
764 n = num(tok_get(), 24);
765 gen((cond << 28) | (0xf << 24) | n);
766 return 0;
770 * branch:
771 * +-----------------------------------+
772 * |COND|101|L| offset |
773 * +-----------------------------------+
775 * L: link
777 static int bl(char *cmd)
779 int l = 0;
780 int cond;
781 char sym[NAMELEN];
782 if (*cmd++ != 'b')
783 return 1;
784 if (*cmd == 'l') {
785 l = 1;
786 cmd++;
788 cond = get_cond(cmd);
789 if (cond == -1)
790 cond = 14;
791 strcpy(sym, tok_case());
792 reloc_rel(sym, ldr_off(), 24);
793 gen((cond << 28) | (5 << 25) | (l << 24));
794 return 0;
798 * move PSR to a register
799 * +-------------------------------------+
800 * |COND|00010|P|001111| Rd |000000000000|
801 * +-------------------------------------+
803 * move a register to PSR
804 * +--------------------------------------+
805 * |COND|00|I|10|P|1010001111| source op |
806 * +--------------------------------------+
808 * P: CPSR/SPSR_cur
810 * I=0 source=|00000000| Rm |
811 * I=1 source=|rot | imm_u8 |
813 static int msr(char *cmd)
815 return 1;
818 static int directive(char *cmd)
820 if (cmd[0] != '.')
821 return 1;
822 if (!strcmp(".extern", cmd)) {
823 label_extern(tok_case());
825 if (!strcmp(".global", cmd)) {
826 label_global(tok_case());
828 if (!strcmp(".word", cmd)) {
829 do {
830 if (!tok_jmp("=")) {
831 reloc_abs(tok_case());
832 gen(ldr_off());
833 } else {
834 gen(num(tok_get(), 32));
836 } while (!tok_jmp(","));
838 return 0;
841 static int stmt(void)
843 char first[TOKLEN];
844 char first_case[TOKLEN];
845 strcpy(first, tok_see());
846 strcpy(first_case, tok_case());
847 /* a label */
848 if (!tok_jmp(":")) {
849 label_local(first_case);
850 return 0;
852 if (!directive(first))
853 return 0;
854 if (!add(first))
855 return 0;
856 if (!mul(first))
857 return 0;
858 if (!ldr(first))
859 return 0;
860 if (!ldm(first))
861 return 0;
862 if (!msr(first))
863 return 0;
864 if (!swi(first))
865 return 0;
866 if (!bl(first))
867 return 0;
868 return 1;
871 int main(int argc, char *argv[])
873 char obj[128] = "";
874 int ofd, ifd;
875 int i = 1;
876 while (i < argc && argv[i][0] == '-') {
877 if (argv[i][1] == 'o')
878 strcpy(obj, argv[++i]);
879 i++;
881 if (i == argc) {
882 fprintf(stderr, "neatcc: no file given\n");
883 return 1;
885 strcpy(src, argv[i]);
886 ifd = open(src, O_RDONLY);
887 fill_buf(ifd);
888 close(ifd);
889 out_init(0);
890 while (!stmt())
892 label_write();
893 pool_write();
894 reloc_write();
895 if (!*obj) {
896 char *s = obj;
897 strcpy(obj, src);
898 while (*s && *s != '.')
899 s++;
900 *s++ = '.';
901 *s++ = 'o';
902 *s++ = '\0';
904 ofd = open(obj, O_WRONLY | O_TRUNC | O_CREAT, 0600);
905 out_write(ofd, cs, cslen, cs, 0);
906 close(ofd);
907 return 0;