Make get_tok_str support NULL as second param.
[tinycc.git] / arm-gen.c
blob1ee008f02f4181f0b35f3a4a1833d8560a39faab
1 /*
2 * ARMv4 code generator for TCC
4 * Copyright (c) 2003 Daniel Glöckner
5 * Copyright (c) 2012 Thomas Preud'homme
7 * Based on i386-gen.c by Fabrice Bellard
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #ifdef TARGET_DEFS_ONLY
26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
27 #error "Currently TinyCC only supports float computation with VFP instructions"
28 #endif
30 /* number of available registers */
31 #ifdef TCC_ARM_VFP
32 #define NB_REGS 13
33 #else
34 #define NB_REGS 9
35 #endif
37 #ifndef TCC_ARM_VERSION
38 # define TCC_ARM_VERSION 5
39 #endif
41 /* a register can belong to several classes. The classes must be
42 sorted from more general to more precise (see gv2() code which does
43 assumptions on it). */
44 #define RC_INT 0x0001 /* generic integer register */
45 #define RC_FLOAT 0x0002 /* generic float register */
46 #define RC_R0 0x0004
47 #define RC_R1 0x0008
48 #define RC_R2 0x0010
49 #define RC_R3 0x0020
50 #define RC_R12 0x0040
51 #define RC_F0 0x0080
52 #define RC_F1 0x0100
53 #define RC_F2 0x0200
54 #define RC_F3 0x0400
55 #ifdef TCC_ARM_VFP
56 #define RC_F4 0x0800
57 #define RC_F5 0x1000
58 #define RC_F6 0x2000
59 #define RC_F7 0x4000
60 #endif
61 #define RC_IRET RC_R0 /* function return: integer register */
62 #define RC_LRET RC_R1 /* function return: second integer register */
63 #define RC_FRET RC_F0 /* function return: float register */
65 /* pretty names for the registers */
66 enum {
67 TREG_R0 = 0,
68 TREG_R1,
69 TREG_R2,
70 TREG_R3,
71 TREG_R12,
72 TREG_F0,
73 TREG_F1,
74 TREG_F2,
75 TREG_F3,
76 #ifdef TCC_ARM_VFP
77 TREG_F4,
78 TREG_F5,
79 TREG_F6,
80 TREG_F7,
81 #endif
84 #ifdef TCC_ARM_VFP
85 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
86 #endif
88 /* return registers for function */
89 #define REG_IRET TREG_R0 /* single word int return register */
90 #define REG_LRET TREG_R1 /* second word return register (for long long) */
91 #define REG_FRET TREG_F0 /* float return register */
93 #ifdef TCC_ARM_EABI
94 #define TOK___divdi3 TOK___aeabi_ldivmod
95 #define TOK___moddi3 TOK___aeabi_ldivmod
96 #define TOK___udivdi3 TOK___aeabi_uldivmod
97 #define TOK___umoddi3 TOK___aeabi_uldivmod
98 #endif
100 /* defined if function parameters must be evaluated in reverse order */
101 #define INVERT_FUNC_PARAMS
103 /* defined if structures are passed as pointers. Otherwise structures
104 are directly pushed on stack. */
105 /* #define FUNC_STRUCT_PARAM_AS_PTR */
107 /* pointer size, in bytes */
108 #define PTR_SIZE 4
110 /* long double size and alignment, in bytes */
111 #ifdef TCC_ARM_VFP
112 #define LDOUBLE_SIZE 8
113 #endif
115 #ifndef LDOUBLE_SIZE
116 #define LDOUBLE_SIZE 8
117 #endif
119 #ifdef TCC_ARM_EABI
120 #define LDOUBLE_ALIGN 8
121 #else
122 #define LDOUBLE_ALIGN 4
123 #endif
125 /* maximum alignment (for aligned attribute support) */
126 #define MAX_ALIGN 8
128 #define CHAR_IS_UNSIGNED
130 /******************************************************/
131 /* ELF defines */
133 #define EM_TCC_TARGET EM_ARM
135 /* relocation type for 32 bit data relocation */
136 #define R_DATA_32 R_ARM_ABS32
137 #define R_DATA_PTR R_ARM_ABS32
138 #define R_JMP_SLOT R_ARM_JUMP_SLOT
139 #define R_COPY R_ARM_COPY
141 #define ELF_START_ADDR 0x00008000
142 #define ELF_PAGE_SIZE 0x1000
144 enum float_abi {
145 ARM_SOFTFP_FLOAT,
146 ARM_HARD_FLOAT,
149 /******************************************************/
150 #else /* ! TARGET_DEFS_ONLY */
151 /******************************************************/
152 #include "tcc.h"
154 enum float_abi float_abi;
156 ST_DATA const int reg_classes[NB_REGS] = {
157 /* r0 */ RC_INT | RC_R0,
158 /* r1 */ RC_INT | RC_R1,
159 /* r2 */ RC_INT | RC_R2,
160 /* r3 */ RC_INT | RC_R3,
161 /* r12 */ RC_INT | RC_R12,
162 /* f0 */ RC_FLOAT | RC_F0,
163 /* f1 */ RC_FLOAT | RC_F1,
164 /* f2 */ RC_FLOAT | RC_F2,
165 /* f3 */ RC_FLOAT | RC_F3,
166 #ifdef TCC_ARM_VFP
167 /* d4/s8 */ RC_FLOAT | RC_F4,
168 /* d5/s10 */ RC_FLOAT | RC_F5,
169 /* d6/s12 */ RC_FLOAT | RC_F6,
170 /* d7/s14 */ RC_FLOAT | RC_F7,
171 #endif
174 static int func_sub_sp_offset, last_itod_magic;
175 static int leaffunc;
177 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
178 static CType float_type, double_type, func_float_type, func_double_type;
179 ST_FUNC void arm_init(struct TCCState *s)
181 float_type.t = VT_FLOAT;
182 double_type.t = VT_DOUBLE;
183 func_float_type.t = VT_FUNC;
184 func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
185 func_double_type.t = VT_FUNC;
186 func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
188 float_abi = s->float_abi;
189 #ifndef TCC_ARM_HARDFLOAT
190 tcc_warning("soft float ABI currently not supported: default to softfp");
191 #endif
193 #else
194 #define func_float_type func_old_type
195 #define func_double_type func_old_type
196 #define func_ldouble_type func_old_type
197 ST_FUNC void arm_init(struct TCCState *s)
199 #if !defined (TCC_ARM_VFP)
200 tcc_warning("Support for FPA is deprecated and will be removed in next"
201 " release");
202 #endif
203 #if !defined (TCC_ARM_EABI)
204 tcc_warning("Support for OABI is deprecated and will be removed in next"
205 " release");
206 #endif
208 #endif
210 static int two2mask(int a,int b) {
211 return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
214 static int regmask(int r) {
215 return reg_classes[r]&~(RC_INT|RC_FLOAT);
218 /******************************************************/
220 #ifdef TCC_ARM_EABI
221 char *default_elfinterp(struct TCCState *s)
223 if (s->float_abi == ARM_HARD_FLOAT)
224 return "/lib/ld-linux-armhf.so.3";
225 else
226 return "/lib/ld-linux.so.3";
228 #endif
230 void o(uint32_t i)
232 /* this is a good place to start adding big-endian support*/
233 int ind1;
235 ind1 = ind + 4;
236 if (!cur_text_section)
237 tcc_error("compiler error! This happens f.ex. if the compiler\n"
238 "can't evaluate constant expressions outside of a function.");
239 if (ind1 > cur_text_section->data_allocated)
240 section_realloc(cur_text_section, ind1);
241 cur_text_section->data[ind++] = i&255;
242 i>>=8;
243 cur_text_section->data[ind++] = i&255;
244 i>>=8;
245 cur_text_section->data[ind++] = i&255;
246 i>>=8;
247 cur_text_section->data[ind++] = i;
250 static uint32_t stuff_const(uint32_t op, uint32_t c)
252 int try_neg=0;
253 uint32_t nc = 0, negop = 0;
255 switch(op&0x1F00000)
257 case 0x800000: //add
258 case 0x400000: //sub
259 try_neg=1;
260 negop=op^0xC00000;
261 nc=-c;
262 break;
263 case 0x1A00000: //mov
264 case 0x1E00000: //mvn
265 try_neg=1;
266 negop=op^0x400000;
267 nc=~c;
268 break;
269 case 0x200000: //xor
270 if(c==~0)
271 return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
272 break;
273 case 0x0: //and
274 if(c==~0)
275 return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
276 case 0x1C00000: //bic
277 try_neg=1;
278 negop=op^0x1C00000;
279 nc=~c;
280 break;
281 case 0x1800000: //orr
282 if(c==~0)
283 return (op&0xFFF0FFFF)|0x1E00000;
284 break;
286 do {
287 uint32_t m;
288 int i;
289 if(c<256) /* catch undefined <<32 */
290 return op|c;
291 for(i=2;i<32;i+=2) {
292 m=(0xff>>i)|(0xff<<(32-i));
293 if(!(c&~m))
294 return op|(i<<7)|(c<<i)|(c>>(32-i));
296 op=negop;
297 c=nc;
298 } while(try_neg--);
299 return 0;
303 //only add,sub
304 void stuff_const_harder(uint32_t op, uint32_t v) {
305 uint32_t x;
306 x=stuff_const(op,v);
307 if(x)
308 o(x);
309 else {
310 uint32_t a[16], nv, no, o2, n2;
311 int i,j,k;
312 a[0]=0xff;
313 o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
314 for(i=1;i<16;i++)
315 a[i]=(a[i-1]>>2)|(a[i-1]<<30);
316 for(i=0;i<12;i++)
317 for(j=i<4?i+12:15;j>=i+4;j--)
318 if((v&(a[i]|a[j]))==v) {
319 o(stuff_const(op,v&a[i]));
320 o(stuff_const(o2,v&a[j]));
321 return;
323 no=op^0xC00000;
324 n2=o2^0xC00000;
325 nv=-v;
326 for(i=0;i<12;i++)
327 for(j=i<4?i+12:15;j>=i+4;j--)
328 if((nv&(a[i]|a[j]))==nv) {
329 o(stuff_const(no,nv&a[i]));
330 o(stuff_const(n2,nv&a[j]));
331 return;
333 for(i=0;i<8;i++)
334 for(j=i+4;j<12;j++)
335 for(k=i<4?i+12:15;k>=j+4;k--)
336 if((v&(a[i]|a[j]|a[k]))==v) {
337 o(stuff_const(op,v&a[i]));
338 o(stuff_const(o2,v&a[j]));
339 o(stuff_const(o2,v&a[k]));
340 return;
342 no=op^0xC00000;
343 nv=-v;
344 for(i=0;i<8;i++)
345 for(j=i+4;j<12;j++)
346 for(k=i<4?i+12:15;k>=j+4;k--)
347 if((nv&(a[i]|a[j]|a[k]))==nv) {
348 o(stuff_const(no,nv&a[i]));
349 o(stuff_const(n2,nv&a[j]));
350 o(stuff_const(n2,nv&a[k]));
351 return;
353 o(stuff_const(op,v&a[0]));
354 o(stuff_const(o2,v&a[4]));
355 o(stuff_const(o2,v&a[8]));
356 o(stuff_const(o2,v&a[12]));
360 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
362 addr-=pos+8;
363 addr/=4;
364 if(addr>=0x1000000 || addr<-0x1000000) {
365 if(fail)
366 tcc_error("FIXME: function bigger than 32MB");
367 return 0;
369 return 0x0A000000|(addr&0xffffff);
372 int decbranch(int pos)
374 int x;
375 x=*(uint32_t *)(cur_text_section->data + pos);
376 x&=0x00ffffff;
377 if(x&0x800000)
378 x-=0x1000000;
379 return x*4+pos+8;
382 /* output a symbol and patch all calls to it */
383 void gsym_addr(int t, int a)
385 uint32_t *x;
386 int lt;
387 while(t) {
388 x=(uint32_t *)(cur_text_section->data + t);
389 t=decbranch(lt=t);
390 if(a==lt+4)
391 *x=0xE1A00000; // nop
392 else {
393 *x &= 0xff000000;
394 *x |= encbranch(lt,a,1);
399 void gsym(int t)
401 gsym_addr(t, ind);
404 #ifdef TCC_ARM_VFP
405 static uint32_t vfpr(int r)
407 if(r<TREG_F0 || r>TREG_F7)
408 tcc_error("compiler error! register %i is no vfp register",r);
409 return r-5;
411 #else
412 static uint32_t fpr(int r)
414 if(r<TREG_F0 || r>TREG_F3)
415 tcc_error("compiler error! register %i is no fpa register",r);
416 return r-5;
418 #endif
420 static uint32_t intr(int r)
422 if(r==4)
423 return 12;
424 if((r<0 || r>4) && r!=14)
425 tcc_error("compiler error! register %i is no int register",r);
426 return r;
429 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
431 if(*off>maxoff || *off&((1<<shift)-1)) {
432 uint32_t x, y;
433 x=0xE280E000;
434 if(*sgn)
435 x=0xE240E000;
436 x|=(*base)<<16;
437 *base=14; // lr
438 y=stuff_const(x,*off&~maxoff);
439 if(y) {
440 o(y);
441 *off&=maxoff;
442 return;
444 y=stuff_const(x,(*off+maxoff)&~maxoff);
445 if(y) {
446 o(y);
447 *sgn=!*sgn;
448 *off=((*off+maxoff)&~maxoff)-*off;
449 return;
451 stuff_const_harder(x,*off&~maxoff);
452 *off&=maxoff;
456 static uint32_t mapcc(int cc)
458 switch(cc)
460 case TOK_ULT:
461 return 0x30000000; /* CC/LO */
462 case TOK_UGE:
463 return 0x20000000; /* CS/HS */
464 case TOK_EQ:
465 return 0x00000000; /* EQ */
466 case TOK_NE:
467 return 0x10000000; /* NE */
468 case TOK_ULE:
469 return 0x90000000; /* LS */
470 case TOK_UGT:
471 return 0x80000000; /* HI */
472 case TOK_Nset:
473 return 0x40000000; /* MI */
474 case TOK_Nclear:
475 return 0x50000000; /* PL */
476 case TOK_LT:
477 return 0xB0000000; /* LT */
478 case TOK_GE:
479 return 0xA0000000; /* GE */
480 case TOK_LE:
481 return 0xD0000000; /* LE */
482 case TOK_GT:
483 return 0xC0000000; /* GT */
485 tcc_error("unexpected condition code");
486 return 0xE0000000; /* AL */
489 static int negcc(int cc)
491 switch(cc)
493 case TOK_ULT:
494 return TOK_UGE;
495 case TOK_UGE:
496 return TOK_ULT;
497 case TOK_EQ:
498 return TOK_NE;
499 case TOK_NE:
500 return TOK_EQ;
501 case TOK_ULE:
502 return TOK_UGT;
503 case TOK_UGT:
504 return TOK_ULE;
505 case TOK_Nset:
506 return TOK_Nclear;
507 case TOK_Nclear:
508 return TOK_Nset;
509 case TOK_LT:
510 return TOK_GE;
511 case TOK_GE:
512 return TOK_LT;
513 case TOK_LE:
514 return TOK_GT;
515 case TOK_GT:
516 return TOK_LE;
518 tcc_error("unexpected condition code");
519 return TOK_NE;
522 /* load 'r' from value 'sv' */
523 void load(int r, SValue *sv)
525 int v, ft, fc, fr, sign;
526 uint32_t op;
527 SValue v1;
529 fr = sv->r;
530 ft = sv->type.t;
531 fc = sv->c.ul;
533 if(fc>=0)
534 sign=0;
535 else {
536 sign=1;
537 fc=-fc;
540 v = fr & VT_VALMASK;
541 if (fr & VT_LVAL) {
542 uint32_t base = 0xB; // fp
543 if(v == VT_LLOCAL) {
544 v1.type.t = VT_PTR;
545 v1.r = VT_LOCAL | VT_LVAL;
546 v1.c.ul = sv->c.ul;
547 load(base=14 /* lr */, &v1);
548 fc=sign=0;
549 v=VT_LOCAL;
550 } else if(v == VT_CONST) {
551 v1.type.t = VT_PTR;
552 v1.r = fr&~VT_LVAL;
553 v1.c.ul = sv->c.ul;
554 v1.sym=sv->sym;
555 load(base=14, &v1);
556 fc=sign=0;
557 v=VT_LOCAL;
558 } else if(v < VT_CONST) {
559 base=intr(v);
560 fc=sign=0;
561 v=VT_LOCAL;
563 if(v == VT_LOCAL) {
564 if(is_float(ft)) {
565 calcaddr(&base,&fc,&sign,1020,2);
566 #ifdef TCC_ARM_VFP
567 op=0xED100A00; /* flds */
568 if(!sign)
569 op|=0x800000;
570 if ((ft & VT_BTYPE) != VT_FLOAT)
571 op|=0x100; /* flds -> fldd */
572 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
573 #else
574 op=0xED100100;
575 if(!sign)
576 op|=0x800000;
577 #if LDOUBLE_SIZE == 8
578 if ((ft & VT_BTYPE) != VT_FLOAT)
579 op|=0x8000;
580 #else
581 if ((ft & VT_BTYPE) == VT_DOUBLE)
582 op|=0x8000;
583 else if ((ft & VT_BTYPE) == VT_LDOUBLE)
584 op|=0x400000;
585 #endif
586 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
587 #endif
588 } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
589 || (ft & VT_BTYPE) == VT_SHORT) {
590 calcaddr(&base,&fc,&sign,255,0);
591 op=0xE1500090;
592 if ((ft & VT_BTYPE) == VT_SHORT)
593 op|=0x20;
594 if ((ft & VT_UNSIGNED) == 0)
595 op|=0x40;
596 if(!sign)
597 op|=0x800000;
598 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
599 } else {
600 calcaddr(&base,&fc,&sign,4095,0);
601 op=0xE5100000;
602 if(!sign)
603 op|=0x800000;
604 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
605 op|=0x400000;
606 o(op|(intr(r)<<12)|fc|(base<<16));
608 return;
610 } else {
611 if (v == VT_CONST) {
612 op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
613 if (fr & VT_SYM || !op) {
614 o(0xE59F0000|(intr(r)<<12));
615 o(0xEA000000);
616 if(fr & VT_SYM)
617 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
618 o(sv->c.ul);
619 } else
620 o(op);
621 return;
622 } else if (v == VT_LOCAL) {
623 op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
624 if (fr & VT_SYM || !op) {
625 o(0xE59F0000|(intr(r)<<12));
626 o(0xEA000000);
627 if(fr & VT_SYM) // needed ?
628 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
629 o(sv->c.ul);
630 o(0xE08B0000|(intr(r)<<12)|intr(r));
631 } else
632 o(op);
633 return;
634 } else if(v == VT_CMP) {
635 o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
636 o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
637 return;
638 } else if (v == VT_JMP || v == VT_JMPI) {
639 int t;
640 t = v & 1;
641 o(0xE3A00000|(intr(r)<<12)|t);
642 o(0xEA000000);
643 gsym(sv->c.ul);
644 o(0xE3A00000|(intr(r)<<12)|(t^1));
645 return;
646 } else if (v < VT_CONST) {
647 if(is_float(ft))
648 #ifdef TCC_ARM_VFP
649 o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
650 #else
651 o(0xEE008180|(fpr(r)<<12)|fpr(v));
652 #endif
653 else
654 o(0xE1A00000|(intr(r)<<12)|intr(v));
655 return;
658 tcc_error("load unimplemented!");
661 /* store register 'r' in lvalue 'v' */
662 void store(int r, SValue *sv)
664 SValue v1;
665 int v, ft, fc, fr, sign;
666 uint32_t op;
668 fr = sv->r;
669 ft = sv->type.t;
670 fc = sv->c.ul;
672 if(fc>=0)
673 sign=0;
674 else {
675 sign=1;
676 fc=-fc;
679 v = fr & VT_VALMASK;
680 if (fr & VT_LVAL || fr == VT_LOCAL) {
681 uint32_t base = 0xb;
682 if(v < VT_CONST) {
683 base=intr(v);
684 v=VT_LOCAL;
685 fc=sign=0;
686 } else if(v == VT_CONST) {
687 v1.type.t = ft;
688 v1.r = fr&~VT_LVAL;
689 v1.c.ul = sv->c.ul;
690 v1.sym=sv->sym;
691 load(base=14, &v1);
692 fc=sign=0;
693 v=VT_LOCAL;
695 if(v == VT_LOCAL) {
696 if(is_float(ft)) {
697 calcaddr(&base,&fc,&sign,1020,2);
698 #ifdef TCC_ARM_VFP
699 op=0xED000A00; /* fsts */
700 if(!sign)
701 op|=0x800000;
702 if ((ft & VT_BTYPE) != VT_FLOAT)
703 op|=0x100; /* fsts -> fstd */
704 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
705 #else
706 op=0xED000100;
707 if(!sign)
708 op|=0x800000;
709 #if LDOUBLE_SIZE == 8
710 if ((ft & VT_BTYPE) != VT_FLOAT)
711 op|=0x8000;
712 #else
713 if ((ft & VT_BTYPE) == VT_DOUBLE)
714 op|=0x8000;
715 if ((ft & VT_BTYPE) == VT_LDOUBLE)
716 op|=0x400000;
717 #endif
718 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
719 #endif
720 return;
721 } else if((ft & VT_BTYPE) == VT_SHORT) {
722 calcaddr(&base,&fc,&sign,255,0);
723 op=0xE14000B0;
724 if(!sign)
725 op|=0x800000;
726 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
727 } else {
728 calcaddr(&base,&fc,&sign,4095,0);
729 op=0xE5000000;
730 if(!sign)
731 op|=0x800000;
732 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
733 op|=0x400000;
734 o(op|(intr(r)<<12)|fc|(base<<16));
736 return;
739 tcc_error("store unimplemented");
742 static void gadd_sp(int val)
744 stuff_const_harder(0xE28DD000,val);
747 /* 'is_jmp' is '1' if it is a jump */
748 static void gcall_or_jmp(int is_jmp)
750 int r;
751 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
752 uint32_t x;
753 /* constant case */
754 x=encbranch(ind,ind+vtop->c.ul,0);
755 if(x) {
756 if (vtop->r & VT_SYM) {
757 /* relocation case */
758 greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
759 } else
760 put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
761 o(x|(is_jmp?0xE0000000:0xE1000000));
762 } else {
763 if(!is_jmp)
764 o(0xE28FE004); // add lr,pc,#4
765 o(0xE51FF004); // ldr pc,[pc,#-4]
766 if (vtop->r & VT_SYM)
767 greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
768 o(vtop->c.ul);
770 } else {
771 /* otherwise, indirect call */
772 r = gv(RC_INT);
773 if(!is_jmp)
774 o(0xE1A0E00F); // mov lr,pc
775 o(0xE1A0F000|intr(r)); // mov pc,r
779 /* Return whether a structure is an homogeneous float aggregate or not.
780 The answer is true if all the elements of the structure are of the same
781 primitive float type and there is less than 4 elements.
783 type: the type corresponding to the structure to be tested */
784 static int is_hgen_float_aggr(CType *type)
786 if ((type->t & VT_BTYPE) == VT_STRUCT) {
787 struct Sym *ref;
788 int btype, nb_fields = 0;
790 ref = type->ref->next;
791 btype = ref->type.t & VT_BTYPE;
792 if (btype == VT_FLOAT || btype == VT_DOUBLE) {
793 for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
794 return !ref && nb_fields <= 4;
797 return 0;
800 struct avail_regs {
801 signed char avail[3]; /* 3 holes max with only float and double alignments */
802 int first_hole; /* first available hole */
803 int last_hole; /* last available hole (none if equal to first_hole) */
804 int first_free_reg; /* next free register in the sequence, hole excluded */
807 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
809 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
810 param) according to the rules described in the procedure call standard for
811 the ARM architecture (AAPCS). If found, the registers are assigned to this
812 VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
813 and the parameter is a single float.
815 avregs: opaque structure to keep track of available VFP co-processor regs
816 align: alignment contraints for the param, as returned by type_size()
817 size: size of the parameter, as returned by type_size() */
818 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
820 int first_reg = 0;
822 if (avregs->first_free_reg == -1)
823 return -1;
824 if (align >> 3) { /* double alignment */
825 first_reg = avregs->first_free_reg;
826 /* alignment contraint not respected so use next reg and record hole */
827 if (first_reg & 1)
828 avregs->avail[avregs->last_hole++] = first_reg++;
829 } else { /* no special alignment (float or array of float) */
830 /* if single float and a hole is available, assign the param to it */
831 if (size == 4 && avregs->first_hole != avregs->last_hole)
832 return avregs->avail[avregs->first_hole++];
833 else
834 first_reg = avregs->first_free_reg;
836 if (first_reg + size / 4 <= 16) {
837 avregs->first_free_reg = first_reg + size / 4;
838 return first_reg;
840 avregs->first_free_reg = -1;
841 return -1;
844 /* Returns whether all params need to be passed in core registers or not.
845 This is the case for function part of the runtime ABI. */
846 int floats_in_core_regs(SValue *sval)
848 if (!sval->sym)
849 return 0;
851 switch (sval->sym->v) {
852 case TOK___floatundisf:
853 case TOK___floatundidf:
854 case TOK___fixunssfdi:
855 case TOK___fixunsdfdi:
856 #ifndef TCC_ARM_VFP
857 case TOK___fixunsxfdi:
858 #endif
859 case TOK___floatdisf:
860 case TOK___floatdidf:
861 case TOK___fixsfdi:
862 case TOK___fixdfdi:
863 return 1;
865 default:
866 return 0;
870 /* Return the number of registers needed to return the struct, or 0 if
871 returning via struct pointer. */
872 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align) {
873 #ifdef TCC_ARM_EABI
874 int size, align;
875 size = type_size(vt, &align);
876 if (float_abi == ARM_HARD_FLOAT && !variadic &&
877 (is_float(vt->t) || is_hgen_float_aggr(vt))) {
878 *ret_align = 8;
879 ret->ref = NULL;
880 ret->t = VT_DOUBLE;
881 return (size + 7) >> 3;
882 } else if (size <= 4) {
883 *ret_align = 4;
884 ret->ref = NULL;
885 ret->t = VT_INT;
886 return 1;
887 } else
888 return 0;
889 #else
890 return 0;
891 #endif
894 /* Parameters are classified according to how they are copied to their final
895 destination for the function call. Because the copying is performed class
896 after class according to the order in the union below, it is important that
897 some constraints about the order of the members of this union are respected:
898 - CORE_STRUCT_CLASS must come after STACK_CLASS;
899 - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
900 VFP_STRUCT_CLASS;
901 - VFP_STRUCT_CLASS must come after VFP_CLASS.
902 See the comment for the main loop in copy_params() for the reason. */
903 enum reg_class {
904 STACK_CLASS = 0,
905 CORE_STRUCT_CLASS,
906 VFP_CLASS,
907 VFP_STRUCT_CLASS,
908 CORE_CLASS,
909 NB_CLASSES
912 struct param_plan {
913 int start; /* first reg or addr used depending on the class */
914 int end; /* last reg used or next free addr depending on the class */
915 SValue *sval; /* pointer to SValue on the value stack */
916 struct param_plan *prev; /* previous element in this class */
919 struct plan {
920 struct param_plan *pplans; /* array of all the param plans */
921 struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
924 #define add_param_plan(plan,pplan,class) \
925 do { \
926 pplan.prev = plan->clsplans[class]; \
927 plan->pplans[plan ## _nb] = pplan; \
928 plan->clsplans[class] = &plan->pplans[plan ## _nb++]; \
929 } while(0)
931 /* Assign parameters to registers and stack with alignment according to the
932 rules in the procedure call standard for the ARM architecture (AAPCS).
933 The overall assignment is recorded in an array of per parameter structures
934 called parameter plans. The parameter plans are also further organized in a
935 number of linked lists, one per class of parameter (see the comment for the
936 definition of union reg_class).
938 nb_args: number of parameters of the function for which a call is generated
939 float_abi: float ABI in use for this function call
940 plan: the structure where the overall assignment is recorded
941 todo: a bitmap that record which core registers hold a parameter
943 Returns the amount of stack space needed for parameter passing
945 Note: this function allocated an array in plan->pplans with tcc_malloc. It
946 is the responsability of the caller to free this array once used (ie not
947 before copy_params). */
948 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
950 int i, size, align;
951 int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
952 int plan_nb = 0;
953 struct param_plan pplan;
954 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
956 ncrn = nsaa = 0;
957 *todo = 0;
958 plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
959 memset(plan->clsplans, 0, sizeof(plan->clsplans));
960 for(i = nb_args; i-- ;) {
961 int j, start_vfpreg = 0;
962 size = type_size(&vtop[-i].type, &align);
963 size = (size + 3) & ~3;
964 align = (align + 3) & ~3;
965 switch(vtop[-i].type.t & VT_BTYPE) {
966 case VT_STRUCT:
967 case VT_FLOAT:
968 case VT_DOUBLE:
969 case VT_LDOUBLE:
970 if (float_abi == ARM_HARD_FLOAT) {
971 int is_hfa = 0; /* Homogeneous float aggregate */
973 if (is_float(vtop[-i].type.t)
974 || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
975 int end_vfpreg;
977 start_vfpreg = assign_vfpreg(&avregs, align, size);
978 end_vfpreg = start_vfpreg + ((size - 1) >> 2);
979 if (start_vfpreg >= 0) {
980 pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
981 if (is_hfa)
982 add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
983 else
984 add_param_plan(plan, pplan, VFP_CLASS);
985 continue;
986 } else
987 break;
990 ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
991 if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
992 /* The parameter is allocated both in core register and on stack. As
993 * such, it can be of either class: it would either be the last of
994 * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
995 for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
996 *todo|=(1<<j);
997 pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
998 add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
999 ncrn += size/4;
1000 if (ncrn > 4)
1001 nsaa = (ncrn - 4) * 4;
1002 } else {
1003 ncrn = 4;
1004 break;
1006 continue;
1007 default:
1008 if (ncrn < 4) {
1009 int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1011 if (is_long) {
1012 ncrn = (ncrn + 1) & -2;
1013 if (ncrn == 4)
1014 break;
1016 pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1017 ncrn++;
1018 if (is_long)
1019 pplan.end = ncrn++;
1020 add_param_plan(plan, pplan, CORE_CLASS);
1021 continue;
1024 nsaa = (nsaa + (align - 1)) & ~(align - 1);
1025 pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1026 add_param_plan(plan, pplan, STACK_CLASS);
1027 nsaa += size; /* size already rounded up before */
1029 return nsaa;
1032 #undef add_param_plan
1034 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1035 function call.
1037 nb_args: number of parameters the function take
1038 plan: the overall assignment plan for parameters
1039 todo: a bitmap indicating what core reg will hold a parameter
1041 Returns the number of SValue added by this function on the value stack */
1042 static int copy_params(int nb_args, struct plan *plan, int todo)
1044 int size, align, r, i, nb_extra_sval = 0;
1045 struct param_plan *pplan;
1047 /* Several constraints require parameters to be copied in a specific order:
1048 - structures are copied to the stack before being loaded in a reg;
1049 - floats loaded to an odd numbered VFP reg are first copied to the
1050 preceding even numbered VFP reg and then moved to the next VFP reg.
1052 It is thus important that:
1053 - structures assigned to core regs must be copied after parameters
1054 assigned to the stack but before structures assigned to VFP regs because
1055 a structure can lie partly in core registers and partly on the stack;
1056 - parameters assigned to the stack and all structures be copied before
1057 parameters assigned to a core reg since copying a parameter to the stack
1058 require using a core reg;
1059 - parameters assigned to VFP regs be copied before structures assigned to
1060 VFP regs as the copy might use an even numbered VFP reg that already
1061 holds part of a structure. */
1062 for(i = 0; i < NB_CLASSES; i++) {
1063 for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1064 vpushv(pplan->sval);
1065 pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1066 switch(i) {
1067 case STACK_CLASS:
1068 case CORE_STRUCT_CLASS:
1069 case VFP_STRUCT_CLASS:
1070 if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1071 int padding = 0;
1072 size = type_size(&pplan->sval->type, &align);
1073 /* align to stack align size */
1074 size = (size + 3) & ~3;
1075 if (i == STACK_CLASS && pplan->prev)
1076 padding = pplan->start - pplan->prev->end;
1077 size += padding; /* Add padding if any */
1078 /* allocate the necessary size on stack */
1079 gadd_sp(-size);
1080 /* generate structure store */
1081 r = get_reg(RC_INT);
1082 o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1083 vset(&vtop->type, r | VT_LVAL, 0);
1084 vswap();
1085 vstore(); /* memcpy to current sp + potential padding */
1087 /* Homogeneous float aggregate are loaded to VFP registers
1088 immediately since there is no way of loading data in multiple
1089 non consecutive VFP registers as what is done for other
1090 structures (see the use of todo). */
1091 if (i == VFP_STRUCT_CLASS) {
1092 int first = pplan->start, nb = pplan->end - first + 1;
1093 /* vpop.32 {pplan->start, ..., pplan->end} */
1094 o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1095 /* No need to write the register used to a SValue since VFP regs
1096 cannot be used for gcall_or_jmp */
1098 } else {
1099 if (is_float(pplan->sval->type.t)) {
1100 #ifdef TCC_ARM_VFP
1101 r = vfpr(gv(RC_FLOAT)) << 12;
1102 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1103 size = 4;
1104 else {
1105 size = 8;
1106 r |= 0x101; /* vpush.32 -> vpush.64 */
1108 o(0xED2D0A01 + r); /* vpush */
1109 #else
1110 r = fpr(gv(RC_FLOAT)) << 12;
1111 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1112 size = 4;
1113 else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1114 size = 8;
1115 else
1116 size = LDOUBLE_SIZE;
1118 if (size == 12)
1119 r |= 0x400000;
1120 else if(size == 8)
1121 r|=0x8000;
1123 o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1124 #endif
1125 } else {
1126 /* simple type (currently always same size) */
1127 /* XXX: implicit cast ? */
1128 size=4;
1129 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1130 lexpand_nr();
1131 size = 8;
1132 r = gv(RC_INT);
1133 o(0xE52D0004|(intr(r)<<12)); /* push r */
1134 vtop--;
1136 r = gv(RC_INT);
1137 o(0xE52D0004|(intr(r)<<12)); /* push r */
1139 if (i == STACK_CLASS && pplan->prev)
1140 gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1142 break;
1144 case VFP_CLASS:
1145 gv(regmask(TREG_F0 + (pplan->start >> 1)));
1146 if (pplan->start & 1) { /* Must be in upper part of double register */
1147 o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1148 vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1150 break;
1152 case CORE_CLASS:
1153 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1154 lexpand_nr();
1155 gv(regmask(pplan->end));
1156 pplan->sval->r2 = vtop->r;
1157 vtop--;
1159 gv(regmask(pplan->start));
1160 /* Mark register as used so that gcall_or_jmp use another one
1161 (regs >=4 are free as never used to pass parameters) */
1162 pplan->sval->r = vtop->r;
1163 break;
1165 vtop--;
1169 /* Manually free remaining registers since next parameters are loaded
1170 * manually, without the help of gv(int). */
1171 save_regs(nb_args);
1173 if(todo) {
1174 o(0xE8BD0000|todo); /* pop {todo} */
1175 for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1176 int r;
1177 pplan->sval->r = pplan->start;
1178 /* An SValue can only pin 2 registers at best (r and r2) but a structure
1179 can occupy more than 2 registers. Thus, we need to push on the value
1180 stack some fake parameter to have on SValue for each registers used
1181 by a structure (r2 is not used). */
1182 for (r = pplan->start + 1; r <= pplan->end; r++) {
1183 if (todo & (1 << r)) {
1184 nb_extra_sval++;
1185 vpushi(0);
1186 vtop->r = r;
1191 return nb_extra_sval;
1194 /* Generate function call. The function address is pushed first, then
1195 all the parameters in call order. This functions pops all the
1196 parameters and the function address. */
1197 void gfunc_call(int nb_args)
1199 int r, args_size;
1200 int variadic, def_float_abi = float_abi;
1201 int todo;
1202 struct plan plan;
1204 #ifdef TCC_ARM_EABI
1205 if (float_abi == ARM_HARD_FLOAT) {
1206 variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1207 if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1208 float_abi = ARM_SOFTFP_FLOAT;
1210 #endif
1211 /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1212 VT_JMP anywhere except on the top of the stack because it would complicate
1213 the code generator. */
1214 r = vtop->r & VT_VALMASK;
1215 if (r == VT_CMP || (r & ~1) == VT_JMP)
1216 gv(RC_INT);
1218 args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1220 #ifdef TCC_ARM_EABI
1221 if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1222 args_size = (args_size + 7) & ~7;
1223 o(0xE24DD004); /* sub sp, sp, #4 */
1225 #endif
1227 nb_args += copy_params(nb_args, &plan, todo);
1228 tcc_free(plan.pplans);
1230 /* Move fct SValue on top as required by gcall_or_jmp */
1231 vrotb(nb_args + 1);
1232 gcall_or_jmp(0);
1233 if (args_size)
1234 gadd_sp(args_size); /* pop all parameters passed on the stack */
1235 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1236 if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1237 if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1238 o(0xEE000A10); /*vmov s0, r0 */
1239 } else {
1240 o(0xEE000B10); /* vmov.32 d0[0], r0 */
1241 o(0xEE201B10); /* vmov.32 d0[1], r1 */
1244 #endif
1245 vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1246 leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1247 float_abi = def_float_abi;
1250 /* generate function prolog of type 't' */
1251 void gfunc_prolog(CType *func_type)
1253 Sym *sym,*sym2;
1254 int n, nf, size, align, struct_ret = 0;
1255 int addr, pn, sn; /* pn=core, sn=stack */
1256 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1257 CType ret_type;
1259 sym = func_type->ref;
1260 func_vt = sym->type;
1261 func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1263 n = nf = 0;
1264 if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1265 !gfunc_sret(&func_vt, func_var, &ret_type, &align))
1267 n++;
1268 struct_ret = 1;
1269 func_vc = 12; /* Offset from fp of the place to store the result */
1271 for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1272 size = type_size(&sym2->type, &align);
1273 #ifdef TCC_ARM_EABI
1274 if (float_abi == ARM_HARD_FLOAT && !func_var &&
1275 (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1276 int tmpnf = assign_vfpreg(&avregs, align, size);
1277 tmpnf += (size + 3) / 4;
1278 nf = (tmpnf > nf) ? tmpnf : nf;
1279 } else
1280 #endif
1281 if (n < 4)
1282 n += (size + 3) / 4;
1284 o(0xE1A0C00D); /* mov ip,sp */
1285 if (func_var)
1286 n=4;
1287 if (n) {
1288 if(n>4)
1289 n=4;
1290 #ifdef TCC_ARM_EABI
1291 n=(n+1)&-2;
1292 #endif
1293 o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1295 if (nf) {
1296 if (nf>16)
1297 nf=16;
1298 nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1299 o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1301 o(0xE92D5800); /* save fp, ip, lr */
1302 o(0xE1A0B00D); /* mov fp, sp */
1303 func_sub_sp_offset = ind;
1304 o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1306 #ifdef TCC_ARM_EABI
1307 if (float_abi == ARM_HARD_FLOAT) {
1308 func_vc += nf * 4;
1309 avregs = AVAIL_REGS_INITIALIZER;
1311 #endif
1312 pn = struct_ret, sn = 0;
1313 while ((sym = sym->next)) {
1314 CType *type;
1315 type = &sym->type;
1316 size = type_size(type, &align);
1317 size = (size + 3) >> 2;
1318 align = (align + 3) & ~3;
1319 #ifdef TCC_ARM_EABI
1320 if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1321 || is_hgen_float_aggr(&sym->type))) {
1322 int fpn = assign_vfpreg(&avregs, align, size << 2);
1323 if (fpn >= 0)
1324 addr = fpn * 4;
1325 else
1326 goto from_stack;
1327 } else
1328 #endif
1329 if (pn < 4) {
1330 #ifdef TCC_ARM_EABI
1331 pn = (pn + (align-1)/4) & -(align/4);
1332 #endif
1333 addr = (nf + pn) * 4;
1334 pn += size;
1335 if (!sn && pn > 4)
1336 sn = (pn - 4);
1337 } else {
1338 from_stack:
1339 #ifdef TCC_ARM_EABI
1340 sn = (sn + (align-1)/4) & -(align/4);
1341 #endif
1342 addr = (n + nf + sn) * 4;
1343 sn += size;
1345 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1346 addr + 12);
1348 last_itod_magic=0;
1349 leaffunc = 1;
1350 loc = 0;
1353 /* generate function epilog */
1354 void gfunc_epilog(void)
1356 uint32_t x;
1357 int diff;
1358 /* Copy float return value to core register if base standard is used and
1359 float computation is made with VFP */
1360 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1361 if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1362 if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1363 o(0xEE100A10); /* fmrs r0, s0 */
1364 else {
1365 o(0xEE100B10); /* fmrdl r0, d0 */
1366 o(0xEE301B10); /* fmrdh r1, d0 */
1369 #endif
1370 o(0xE89BA800); /* restore fp, sp, pc */
1371 diff = (-loc + 3) & -4;
1372 #ifdef TCC_ARM_EABI
1373 if(!leaffunc)
1374 diff = ((diff + 11) & -8) - 4;
1375 #endif
1376 if(diff > 0) {
1377 x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1378 if(x)
1379 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1380 else {
1381 int addr;
1382 addr=ind;
1383 o(0xE59FC004); /* ldr ip,[pc+4] */
1384 o(0xE04BD00C); /* sub sp,fp,ip */
1385 o(0xE1A0F00E); /* mov pc,lr */
1386 o(diff);
1387 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1392 /* generate a jump to a label */
1393 int gjmp(int t)
1395 int r;
1396 r=ind;
1397 o(0xE0000000|encbranch(r,t,1));
1398 return r;
1401 /* generate a jump to a fixed address */
1402 void gjmp_addr(int a)
1404 gjmp(a);
1407 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1408 int gtst(int inv, int t)
1410 int v, r;
1411 uint32_t op;
1412 v = vtop->r & VT_VALMASK;
1413 r=ind;
1414 if (v == VT_CMP) {
1415 op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1416 op|=encbranch(r,t,1);
1417 o(op);
1418 t=r;
1419 } else { /* VT_JMP || VT_JMPI */
1420 if ((v & 1) == inv) {
1421 if(!vtop->c.i)
1422 vtop->c.i=t;
1423 else {
1424 uint32_t *x;
1425 int p,lp;
1426 if(t) {
1427 p = vtop->c.i;
1428 do {
1429 p = decbranch(lp=p);
1430 } while(p);
1431 x = (uint32_t *)(cur_text_section->data + lp);
1432 *x &= 0xff000000;
1433 *x |= encbranch(lp,t,1);
1435 t = vtop->c.i;
1437 } else {
1438 t = gjmp(t);
1439 gsym(vtop->c.i);
1442 vtop--;
1443 return t;
1446 /* generate an integer binary operation */
1447 void gen_opi(int op)
1449 int c, func = 0;
1450 uint32_t opc = 0, r, fr;
1451 unsigned short retreg = REG_IRET;
1453 c=0;
1454 switch(op) {
1455 case '+':
1456 opc = 0x8;
1457 c=1;
1458 break;
1459 case TOK_ADDC1: /* add with carry generation */
1460 opc = 0x9;
1461 c=1;
1462 break;
1463 case '-':
1464 opc = 0x4;
1465 c=1;
1466 break;
1467 case TOK_SUBC1: /* sub with carry generation */
1468 opc = 0x5;
1469 c=1;
1470 break;
1471 case TOK_ADDC2: /* add with carry use */
1472 opc = 0xA;
1473 c=1;
1474 break;
1475 case TOK_SUBC2: /* sub with carry use */
1476 opc = 0xC;
1477 c=1;
1478 break;
1479 case '&':
1480 opc = 0x0;
1481 c=1;
1482 break;
1483 case '^':
1484 opc = 0x2;
1485 c=1;
1486 break;
1487 case '|':
1488 opc = 0x18;
1489 c=1;
1490 break;
1491 case '*':
1492 gv2(RC_INT, RC_INT);
1493 r = vtop[-1].r;
1494 fr = vtop[0].r;
1495 vtop--;
1496 o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1497 return;
1498 case TOK_SHL:
1499 opc = 0;
1500 c=2;
1501 break;
1502 case TOK_SHR:
1503 opc = 1;
1504 c=2;
1505 break;
1506 case TOK_SAR:
1507 opc = 2;
1508 c=2;
1509 break;
1510 case '/':
1511 case TOK_PDIV:
1512 func=TOK___divsi3;
1513 c=3;
1514 break;
1515 case TOK_UDIV:
1516 func=TOK___udivsi3;
1517 c=3;
1518 break;
1519 case '%':
1520 #ifdef TCC_ARM_EABI
1521 func=TOK___aeabi_idivmod;
1522 retreg=REG_LRET;
1523 #else
1524 func=TOK___modsi3;
1525 #endif
1526 c=3;
1527 break;
1528 case TOK_UMOD:
1529 #ifdef TCC_ARM_EABI
1530 func=TOK___aeabi_uidivmod;
1531 retreg=REG_LRET;
1532 #else
1533 func=TOK___umodsi3;
1534 #endif
1535 c=3;
1536 break;
1537 case TOK_UMULL:
1538 gv2(RC_INT, RC_INT);
1539 r=intr(vtop[-1].r2=get_reg(RC_INT));
1540 c=vtop[-1].r;
1541 vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1542 vtop--;
1543 o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1544 return;
1545 default:
1546 opc = 0x15;
1547 c=1;
1548 break;
1550 switch(c) {
1551 case 1:
1552 if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1553 if(opc == 4 || opc == 5 || opc == 0xc) {
1554 vswap();
1555 opc|=2; // sub -> rsb
1558 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1559 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1560 gv(RC_INT);
1561 vswap();
1562 c=intr(gv(RC_INT));
1563 vswap();
1564 opc=0xE0000000|(opc<<20)|(c<<16);
1565 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1566 uint32_t x;
1567 x=stuff_const(opc|0x2000000,vtop->c.i);
1568 if(x) {
1569 r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1570 o(x|(r<<12));
1571 goto done;
1574 fr=intr(gv(RC_INT));
1575 r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1576 o(opc|(r<<12)|fr);
1577 done:
1578 vtop--;
1579 if (op >= TOK_ULT && op <= TOK_GT) {
1580 vtop->r = VT_CMP;
1581 vtop->c.i = op;
1583 break;
1584 case 2:
1585 opc=0xE1A00000|(opc<<5);
1586 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1587 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1588 gv(RC_INT);
1589 vswap();
1590 r=intr(gv(RC_INT));
1591 vswap();
1592 opc|=r;
1593 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1594 fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1595 c = vtop->c.i & 0x1f;
1596 o(opc|(c<<7)|(fr<<12));
1597 } else {
1598 fr=intr(gv(RC_INT));
1599 c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1600 o(opc|(c<<12)|(fr<<8)|0x10);
1602 vtop--;
1603 break;
1604 case 3:
1605 vpush_global_sym(&func_old_type, func);
1606 vrott(3);
1607 gfunc_call(2);
1608 vpushi(0);
1609 vtop->r = retreg;
1610 break;
1611 default:
1612 tcc_error("gen_opi %i unimplemented!",op);
1616 #ifdef TCC_ARM_VFP
1617 static int is_zero(int i)
1619 if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1620 return 0;
1621 if (vtop[i].type.t == VT_FLOAT)
1622 return (vtop[i].c.f == 0.f);
1623 else if (vtop[i].type.t == VT_DOUBLE)
1624 return (vtop[i].c.d == 0.0);
1625 return (vtop[i].c.ld == 0.l);
1628 /* generate a floating point operation 'v = t1 op t2' instruction. The
1629 * two operands are guaranted to have the same floating point type */
1630 void gen_opf(int op)
1632 uint32_t x;
1633 int fneg=0,r;
1634 x=0xEE000A00|T2CPR(vtop->type.t);
1635 switch(op) {
1636 case '+':
1637 if(is_zero(-1))
1638 vswap();
1639 if(is_zero(0)) {
1640 vtop--;
1641 return;
1643 x|=0x300000;
1644 break;
1645 case '-':
1646 x|=0x300040;
1647 if(is_zero(0)) {
1648 vtop--;
1649 return;
1651 if(is_zero(-1)) {
1652 x|=0x810000; /* fsubX -> fnegX */
1653 vswap();
1654 vtop--;
1655 fneg=1;
1657 break;
1658 case '*':
1659 x|=0x200000;
1660 break;
1661 case '/':
1662 x|=0x800000;
1663 break;
1664 default:
1665 if(op < TOK_ULT || op > TOK_GT) {
1666 tcc_error("unknown fp op %x!",op);
1667 return;
1669 if(is_zero(-1)) {
1670 vswap();
1671 switch(op) {
1672 case TOK_LT: op=TOK_GT; break;
1673 case TOK_GE: op=TOK_ULE; break;
1674 case TOK_LE: op=TOK_GE; break;
1675 case TOK_GT: op=TOK_ULT; break;
1678 x|=0xB40040; /* fcmpX */
1679 if(op!=TOK_EQ && op!=TOK_NE)
1680 x|=0x80; /* fcmpX -> fcmpeX */
1681 if(is_zero(0)) {
1682 vtop--;
1683 o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1684 } else {
1685 x|=vfpr(gv(RC_FLOAT));
1686 vswap();
1687 o(x|(vfpr(gv(RC_FLOAT))<<12));
1688 vtop--;
1690 o(0xEEF1FA10); /* fmstat */
1692 switch(op) {
1693 case TOK_LE: op=TOK_ULE; break;
1694 case TOK_LT: op=TOK_ULT; break;
1695 case TOK_UGE: op=TOK_GE; break;
1696 case TOK_UGT: op=TOK_GT; break;
1699 vtop->r = VT_CMP;
1700 vtop->c.i = op;
1701 return;
1703 r=gv(RC_FLOAT);
1704 x|=vfpr(r);
1705 r=regmask(r);
1706 if(!fneg) {
1707 int r2;
1708 vswap();
1709 r2=gv(RC_FLOAT);
1710 x|=vfpr(r2)<<16;
1711 r|=regmask(r2);
1713 vtop->r=get_reg_ex(RC_FLOAT,r);
1714 if(!fneg)
1715 vtop--;
1716 o(x|(vfpr(vtop->r)<<12));
1719 #else
1720 static uint32_t is_fconst()
1722 long double f;
1723 uint32_t r;
1724 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1725 return 0;
1726 if (vtop->type.t == VT_FLOAT)
1727 f = vtop->c.f;
1728 else if (vtop->type.t == VT_DOUBLE)
1729 f = vtop->c.d;
1730 else
1731 f = vtop->c.ld;
1732 if(!ieee_finite(f))
1733 return 0;
1734 r=0x8;
1735 if(f<0.0) {
1736 r=0x18;
1737 f=-f;
1739 if(f==0.0)
1740 return r;
1741 if(f==1.0)
1742 return r|1;
1743 if(f==2.0)
1744 return r|2;
1745 if(f==3.0)
1746 return r|3;
1747 if(f==4.0)
1748 return r|4;
1749 if(f==5.0)
1750 return r|5;
1751 if(f==0.5)
1752 return r|6;
1753 if(f==10.0)
1754 return r|7;
1755 return 0;
1758 /* generate a floating point operation 'v = t1 op t2' instruction. The
1759 two operands are guaranted to have the same floating point type */
1760 void gen_opf(int op)
1762 uint32_t x, r, r2, c1, c2;
1763 //fputs("gen_opf\n",stderr);
1764 vswap();
1765 c1 = is_fconst();
1766 vswap();
1767 c2 = is_fconst();
1768 x=0xEE000100;
1769 #if LDOUBLE_SIZE == 8
1770 if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1771 x|=0x80;
1772 #else
1773 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1774 x|=0x80;
1775 else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1776 x|=0x80000;
1777 #endif
1778 switch(op)
1780 case '+':
1781 if(!c2) {
1782 vswap();
1783 c2=c1;
1785 vswap();
1786 r=fpr(gv(RC_FLOAT));
1787 vswap();
1788 if(c2) {
1789 if(c2>0xf)
1790 x|=0x200000; // suf
1791 r2=c2&0xf;
1792 } else {
1793 r2=fpr(gv(RC_FLOAT));
1795 break;
1796 case '-':
1797 if(c2) {
1798 if(c2<=0xf)
1799 x|=0x200000; // suf
1800 r2=c2&0xf;
1801 vswap();
1802 r=fpr(gv(RC_FLOAT));
1803 vswap();
1804 } else if(c1 && c1<=0xf) {
1805 x|=0x300000; // rsf
1806 r2=c1;
1807 r=fpr(gv(RC_FLOAT));
1808 vswap();
1809 } else {
1810 x|=0x200000; // suf
1811 vswap();
1812 r=fpr(gv(RC_FLOAT));
1813 vswap();
1814 r2=fpr(gv(RC_FLOAT));
1816 break;
1817 case '*':
1818 if(!c2 || c2>0xf) {
1819 vswap();
1820 c2=c1;
1822 vswap();
1823 r=fpr(gv(RC_FLOAT));
1824 vswap();
1825 if(c2 && c2<=0xf)
1826 r2=c2;
1827 else
1828 r2=fpr(gv(RC_FLOAT));
1829 x|=0x100000; // muf
1830 break;
1831 case '/':
1832 if(c2 && c2<=0xf) {
1833 x|=0x400000; // dvf
1834 r2=c2;
1835 vswap();
1836 r=fpr(gv(RC_FLOAT));
1837 vswap();
1838 } else if(c1 && c1<=0xf) {
1839 x|=0x500000; // rdf
1840 r2=c1;
1841 r=fpr(gv(RC_FLOAT));
1842 vswap();
1843 } else {
1844 x|=0x400000; // dvf
1845 vswap();
1846 r=fpr(gv(RC_FLOAT));
1847 vswap();
1848 r2=fpr(gv(RC_FLOAT));
1850 break;
1851 default:
1852 if(op >= TOK_ULT && op <= TOK_GT) {
1853 x|=0xd0f110; // cmfe
1854 /* bug (intention?) in Linux FPU emulator
1855 doesn't set carry if equal */
1856 switch(op) {
1857 case TOK_ULT:
1858 case TOK_UGE:
1859 case TOK_ULE:
1860 case TOK_UGT:
1861 tcc_error("unsigned comparision on floats?");
1862 break;
1863 case TOK_LT:
1864 op=TOK_Nset;
1865 break;
1866 case TOK_LE:
1867 op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1868 break;
1869 case TOK_EQ:
1870 case TOK_NE:
1871 x&=~0x400000; // cmfe -> cmf
1872 break;
1874 if(c1 && !c2) {
1875 c2=c1;
1876 vswap();
1877 switch(op) {
1878 case TOK_Nset:
1879 op=TOK_GT;
1880 break;
1881 case TOK_GE:
1882 op=TOK_ULE;
1883 break;
1884 case TOK_ULE:
1885 op=TOK_GE;
1886 break;
1887 case TOK_GT:
1888 op=TOK_Nset;
1889 break;
1892 vswap();
1893 r=fpr(gv(RC_FLOAT));
1894 vswap();
1895 if(c2) {
1896 if(c2>0xf)
1897 x|=0x200000;
1898 r2=c2&0xf;
1899 } else {
1900 r2=fpr(gv(RC_FLOAT));
1902 vtop[-1].r = VT_CMP;
1903 vtop[-1].c.i = op;
1904 } else {
1905 tcc_error("unknown fp op %x!",op);
1906 return;
1909 if(vtop[-1].r == VT_CMP)
1910 c1=15;
1911 else {
1912 c1=vtop->r;
1913 if(r2&0x8)
1914 c1=vtop[-1].r;
1915 vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1916 c1=fpr(vtop[-1].r);
1918 vtop--;
1919 o(x|(r<<16)|(c1<<12)|r2);
1921 #endif
1923 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1924 and 'long long' cases. */
1925 ST_FUNC void gen_cvt_itof1(int t)
1927 uint32_t r, r2;
1928 int bt;
1929 bt=vtop->type.t & VT_BTYPE;
1930 if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1931 #ifndef TCC_ARM_VFP
1932 uint32_t dsize = 0;
1933 #endif
1934 r=intr(gv(RC_INT));
1935 #ifdef TCC_ARM_VFP
1936 r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1937 o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1938 r2|=r2<<12;
1939 if(!(vtop->type.t & VT_UNSIGNED))
1940 r2|=0x80; /* fuitoX -> fsituX */
1941 o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1942 #else
1943 r2=fpr(vtop->r=get_reg(RC_FLOAT));
1944 if((t & VT_BTYPE) != VT_FLOAT)
1945 dsize=0x80; /* flts -> fltd */
1946 o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1947 if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1948 uint32_t off = 0;
1949 o(0xE3500000|(r<<12)); /* cmp */
1950 r=fpr(get_reg(RC_FLOAT));
1951 if(last_itod_magic) {
1952 off=ind+8-last_itod_magic;
1953 off/=4;
1954 if(off>255)
1955 off=0;
1957 o(0xBD1F0100|(r<<12)|off); /* ldflts */
1958 if(!off) {
1959 o(0xEA000000); /* b */
1960 last_itod_magic=ind;
1961 o(0x4F800000); /* 4294967296.0f */
1963 o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1965 #endif
1966 return;
1967 } else if(bt == VT_LLONG) {
1968 int func;
1969 CType *func_type = 0;
1970 if((t & VT_BTYPE) == VT_FLOAT) {
1971 func_type = &func_float_type;
1972 if(vtop->type.t & VT_UNSIGNED)
1973 func=TOK___floatundisf;
1974 else
1975 func=TOK___floatdisf;
1976 #if LDOUBLE_SIZE != 8
1977 } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1978 func_type = &func_ldouble_type;
1979 if(vtop->type.t & VT_UNSIGNED)
1980 func=TOK___floatundixf;
1981 else
1982 func=TOK___floatdixf;
1983 } else if((t & VT_BTYPE) == VT_DOUBLE) {
1984 #else
1985 } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1986 #endif
1987 func_type = &func_double_type;
1988 if(vtop->type.t & VT_UNSIGNED)
1989 func=TOK___floatundidf;
1990 else
1991 func=TOK___floatdidf;
1993 if(func_type) {
1994 vpush_global_sym(func_type, func);
1995 vswap();
1996 gfunc_call(1);
1997 vpushi(0);
1998 vtop->r=TREG_F0;
1999 return;
2002 tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2005 /* convert fp to int 't' type */
2006 void gen_cvt_ftoi(int t)
2008 uint32_t r, r2;
2009 int u, func = 0;
2010 u=t&VT_UNSIGNED;
2011 t&=VT_BTYPE;
2012 r2=vtop->type.t & VT_BTYPE;
2013 if(t==VT_INT) {
2014 #ifdef TCC_ARM_VFP
2015 r=vfpr(gv(RC_FLOAT));
2016 u=u?0:0x10000;
2017 o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2018 r2=intr(vtop->r=get_reg(RC_INT));
2019 o(0xEE100A10|(r<<16)|(r2<<12));
2020 return;
2021 #else
2022 if(u) {
2023 if(r2 == VT_FLOAT)
2024 func=TOK___fixunssfsi;
2025 #if LDOUBLE_SIZE != 8
2026 else if(r2 == VT_LDOUBLE)
2027 func=TOK___fixunsxfsi;
2028 else if(r2 == VT_DOUBLE)
2029 #else
2030 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2031 #endif
2032 func=TOK___fixunsdfsi;
2033 } else {
2034 r=fpr(gv(RC_FLOAT));
2035 r2=intr(vtop->r=get_reg(RC_INT));
2036 o(0xEE100170|(r2<<12)|r);
2037 return;
2039 #endif
2040 } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2041 if(r2 == VT_FLOAT)
2042 func=TOK___fixsfdi;
2043 #if LDOUBLE_SIZE != 8
2044 else if(r2 == VT_LDOUBLE)
2045 func=TOK___fixxfdi;
2046 else if(r2 == VT_DOUBLE)
2047 #else
2048 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2049 #endif
2050 func=TOK___fixdfdi;
2052 if(func) {
2053 vpush_global_sym(&func_old_type, func);
2054 vswap();
2055 gfunc_call(1);
2056 vpushi(0);
2057 if(t == VT_LLONG)
2058 vtop->r2 = REG_LRET;
2059 vtop->r = REG_IRET;
2060 return;
2062 tcc_error("unimplemented gen_cvt_ftoi!");
2065 /* convert from one floating point type to another */
2066 void gen_cvt_ftof(int t)
2068 #ifdef TCC_ARM_VFP
2069 if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2070 uint32_t r = vfpr(gv(RC_FLOAT));
2071 o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2073 #else
2074 /* all we have to do on i386 and FPA ARM is to put the float in a register */
2075 gv(RC_FLOAT);
2076 #endif
2079 /* computed goto support */
2080 void ggoto(void)
2082 gcall_or_jmp(1);
2083 vtop--;
2086 /* Save the stack pointer onto the stack and return the location of its address */
2087 ST_FUNC void gen_vla_sp_save(int addr) {
2088 tcc_error("variable length arrays unsupported for this target");
2091 /* Restore the SP from a location on the stack */
2092 ST_FUNC void gen_vla_sp_restore(int addr) {
2093 tcc_error("variable length arrays unsupported for this target");
2096 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2097 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2098 tcc_error("variable length arrays unsupported for this target");
2101 /* end of ARM code generator */
2102 /*************************************************************/
2103 #endif
2104 /*************************************************************/