tests2: cleanup
[tinycc.git] / arm-gen.c
bloba9c05feb7c4399dc1da1af8aa5cf6506231492c8
1 /*
2 * ARMv4 code generator for TCC
4 * Copyright (c) 2003 Daniel Glöckner
5 * Copyright (c) 2012 Thomas Preud'homme
7 * Based on i386-gen.c by Fabrice Bellard
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #ifdef TARGET_DEFS_ONLY
26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
27 #error "Currently TinyCC only supports float computation with VFP instructions"
28 #endif
30 /* number of available registers */
31 #ifdef TCC_ARM_VFP
32 #define NB_REGS 13
33 #else
34 #define NB_REGS 9
35 #endif
37 #ifndef TCC_ARM_VERSION
38 # define TCC_ARM_VERSION 5
39 #endif
41 /* a register can belong to several classes. The classes must be
42 sorted from more general to more precise (see gv2() code which does
43 assumptions on it). */
44 #define RC_INT 0x0001 /* generic integer register */
45 #define RC_FLOAT 0x0002 /* generic float register */
46 #define RC_R0 0x0004
47 #define RC_R1 0x0008
48 #define RC_R2 0x0010
49 #define RC_R3 0x0020
50 #define RC_R12 0x0040
51 #define RC_F0 0x0080
52 #define RC_F1 0x0100
53 #define RC_F2 0x0200
54 #define RC_F3 0x0400
55 #ifdef TCC_ARM_VFP
56 #define RC_F4 0x0800
57 #define RC_F5 0x1000
58 #define RC_F6 0x2000
59 #define RC_F7 0x4000
60 #endif
61 #define RC_IRET RC_R0 /* function return: integer register */
62 #define RC_LRET RC_R1 /* function return: second integer register */
63 #define RC_FRET RC_F0 /* function return: float register */
65 /* pretty names for the registers */
66 enum {
67 TREG_R0 = 0,
68 TREG_R1,
69 TREG_R2,
70 TREG_R3,
71 TREG_R12,
72 TREG_F0,
73 TREG_F1,
74 TREG_F2,
75 TREG_F3,
76 #ifdef TCC_ARM_VFP
77 TREG_F4,
78 TREG_F5,
79 TREG_F6,
80 TREG_F7,
81 #endif
84 #ifdef TCC_ARM_VFP
85 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
86 #endif
88 /* return registers for function */
89 #define REG_IRET TREG_R0 /* single word int return register */
90 #define REG_LRET TREG_R1 /* second word return register (for long long) */
91 #define REG_FRET TREG_F0 /* float return register */
93 #ifdef TCC_ARM_EABI
94 #define TOK___divdi3 TOK___aeabi_ldivmod
95 #define TOK___moddi3 TOK___aeabi_ldivmod
96 #define TOK___udivdi3 TOK___aeabi_uldivmod
97 #define TOK___umoddi3 TOK___aeabi_uldivmod
98 #endif
100 /* defined if function parameters must be evaluated in reverse order */
101 #define INVERT_FUNC_PARAMS
103 /* defined if structures are passed as pointers. Otherwise structures
104 are directly pushed on stack. */
105 /* #define FUNC_STRUCT_PARAM_AS_PTR */
107 /* pointer size, in bytes */
108 #define PTR_SIZE 4
110 /* long double size and alignment, in bytes */
111 #ifdef TCC_ARM_VFP
112 #define LDOUBLE_SIZE 8
113 #endif
115 #ifndef LDOUBLE_SIZE
116 #define LDOUBLE_SIZE 8
117 #endif
119 #ifdef TCC_ARM_EABI
120 #define LDOUBLE_ALIGN 8
121 #else
122 #define LDOUBLE_ALIGN 4
123 #endif
125 /* maximum alignment (for aligned attribute support) */
126 #define MAX_ALIGN 8
128 #define CHAR_IS_UNSIGNED
130 /******************************************************/
131 /* ELF defines */
133 #define EM_TCC_TARGET EM_ARM
135 /* relocation type for 32 bit data relocation */
136 #define R_DATA_32 R_ARM_ABS32
137 #define R_DATA_PTR R_ARM_ABS32
138 #define R_JMP_SLOT R_ARM_JUMP_SLOT
139 #define R_COPY R_ARM_COPY
141 #define ELF_START_ADDR 0x00008000
142 #define ELF_PAGE_SIZE 0x1000
144 enum float_abi {
145 ARM_SOFTFP_FLOAT,
146 ARM_HARD_FLOAT,
149 /******************************************************/
150 #else /* ! TARGET_DEFS_ONLY */
151 /******************************************************/
152 #include "tcc.h"
154 enum float_abi float_abi;
156 ST_DATA const int reg_classes[NB_REGS] = {
157 /* r0 */ RC_INT | RC_R0,
158 /* r1 */ RC_INT | RC_R1,
159 /* r2 */ RC_INT | RC_R2,
160 /* r3 */ RC_INT | RC_R3,
161 /* r12 */ RC_INT | RC_R12,
162 /* f0 */ RC_FLOAT | RC_F0,
163 /* f1 */ RC_FLOAT | RC_F1,
164 /* f2 */ RC_FLOAT | RC_F2,
165 /* f3 */ RC_FLOAT | RC_F3,
166 #ifdef TCC_ARM_VFP
167 /* d4/s8 */ RC_FLOAT | RC_F4,
168 /* d5/s10 */ RC_FLOAT | RC_F5,
169 /* d6/s12 */ RC_FLOAT | RC_F6,
170 /* d7/s14 */ RC_FLOAT | RC_F7,
171 #endif
174 static int func_sub_sp_offset, last_itod_magic;
175 static int leaffunc;
177 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
178 static CType float_type, double_type, func_float_type, func_double_type;
179 ST_FUNC void arm_init(struct TCCState *s)
181 float_type.t = VT_FLOAT;
182 double_type.t = VT_DOUBLE;
183 func_float_type.t = VT_FUNC;
184 func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
185 func_double_type.t = VT_FUNC;
186 func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
188 float_abi = s->float_abi;
189 #ifndef TCC_ARM_HARDFLOAT
190 tcc_warning("soft float ABI currently not supported: default to softfp");
191 #endif
193 #else
194 #define func_float_type func_old_type
195 #define func_double_type func_old_type
196 #define func_ldouble_type func_old_type
197 ST_FUNC void arm_init(struct TCCState *s)
199 #if !defined (TCC_ARM_VFP)
200 tcc_warning("Support for FPA is deprecated and will be removed in next"
201 " release");
202 #endif
203 #if !defined (TCC_ARM_EABI)
204 tcc_warning("Support for OABI is deprecated and will be removed in next"
205 " release");
206 #endif
208 #endif
210 static int two2mask(int a,int b) {
211 return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
214 static int regmask(int r) {
215 return reg_classes[r]&~(RC_INT|RC_FLOAT);
218 /******************************************************/
220 #ifdef TCC_ARM_EABI
221 char *default_elfinterp(struct TCCState *s)
223 if (s->float_abi == ARM_HARD_FLOAT)
224 return "/lib/ld-linux-armhf.so.3";
225 else
226 return "/lib/ld-linux.so.3";
228 #endif
230 void o(uint32_t i)
232 /* this is a good place to start adding big-endian support*/
233 int ind1;
235 ind1 = ind + 4;
236 if (!cur_text_section)
237 tcc_error("compiler error! This happens f.ex. if the compiler\n"
238 "can't evaluate constant expressions outside of a function.");
239 if (ind1 > cur_text_section->data_allocated)
240 section_realloc(cur_text_section, ind1);
241 cur_text_section->data[ind++] = i&255;
242 i>>=8;
243 cur_text_section->data[ind++] = i&255;
244 i>>=8;
245 cur_text_section->data[ind++] = i&255;
246 i>>=8;
247 cur_text_section->data[ind++] = i;
250 static uint32_t stuff_const(uint32_t op, uint32_t c)
252 int try_neg=0;
253 uint32_t nc = 0, negop = 0;
255 switch(op&0x1F00000)
257 case 0x800000: //add
258 case 0x400000: //sub
259 try_neg=1;
260 negop=op^0xC00000;
261 nc=-c;
262 break;
263 case 0x1A00000: //mov
264 case 0x1E00000: //mvn
265 try_neg=1;
266 negop=op^0x400000;
267 nc=~c;
268 break;
269 case 0x200000: //xor
270 if(c==~0)
271 return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
272 break;
273 case 0x0: //and
274 if(c==~0)
275 return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
276 case 0x1C00000: //bic
277 try_neg=1;
278 negop=op^0x1C00000;
279 nc=~c;
280 break;
281 case 0x1800000: //orr
282 if(c==~0)
283 return (op&0xFFF0FFFF)|0x1E00000;
284 break;
286 do {
287 uint32_t m;
288 int i;
289 if(c<256) /* catch undefined <<32 */
290 return op|c;
291 for(i=2;i<32;i+=2) {
292 m=(0xff>>i)|(0xff<<(32-i));
293 if(!(c&~m))
294 return op|(i<<7)|(c<<i)|(c>>(32-i));
296 op=negop;
297 c=nc;
298 } while(try_neg--);
299 return 0;
303 //only add,sub
304 void stuff_const_harder(uint32_t op, uint32_t v) {
305 uint32_t x;
306 x=stuff_const(op,v);
307 if(x)
308 o(x);
309 else {
310 uint32_t a[16], nv, no, o2, n2;
311 int i,j,k;
312 a[0]=0xff;
313 o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
314 for(i=1;i<16;i++)
315 a[i]=(a[i-1]>>2)|(a[i-1]<<30);
316 for(i=0;i<12;i++)
317 for(j=i<4?i+12:15;j>=i+4;j--)
318 if((v&(a[i]|a[j]))==v) {
319 o(stuff_const(op,v&a[i]));
320 o(stuff_const(o2,v&a[j]));
321 return;
323 no=op^0xC00000;
324 n2=o2^0xC00000;
325 nv=-v;
326 for(i=0;i<12;i++)
327 for(j=i<4?i+12:15;j>=i+4;j--)
328 if((nv&(a[i]|a[j]))==nv) {
329 o(stuff_const(no,nv&a[i]));
330 o(stuff_const(n2,nv&a[j]));
331 return;
333 for(i=0;i<8;i++)
334 for(j=i+4;j<12;j++)
335 for(k=i<4?i+12:15;k>=j+4;k--)
336 if((v&(a[i]|a[j]|a[k]))==v) {
337 o(stuff_const(op,v&a[i]));
338 o(stuff_const(o2,v&a[j]));
339 o(stuff_const(o2,v&a[k]));
340 return;
342 no=op^0xC00000;
343 nv=-v;
344 for(i=0;i<8;i++)
345 for(j=i+4;j<12;j++)
346 for(k=i<4?i+12:15;k>=j+4;k--)
347 if((nv&(a[i]|a[j]|a[k]))==nv) {
348 o(stuff_const(no,nv&a[i]));
349 o(stuff_const(n2,nv&a[j]));
350 o(stuff_const(n2,nv&a[k]));
351 return;
353 o(stuff_const(op,v&a[0]));
354 o(stuff_const(o2,v&a[4]));
355 o(stuff_const(o2,v&a[8]));
356 o(stuff_const(o2,v&a[12]));
360 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
362 addr-=pos+8;
363 addr/=4;
364 if(addr>=0x1000000 || addr<-0x1000000) {
365 if(fail)
366 tcc_error("FIXME: function bigger than 32MB");
367 return 0;
369 return 0x0A000000|(addr&0xffffff);
372 int decbranch(int pos)
374 int x;
375 x=*(uint32_t *)(cur_text_section->data + pos);
376 x&=0x00ffffff;
377 if(x&0x800000)
378 x-=0x1000000;
379 return x*4+pos+8;
382 /* output a symbol and patch all calls to it */
383 void gsym_addr(int t, int a)
385 uint32_t *x;
386 int lt;
387 while(t) {
388 x=(uint32_t *)(cur_text_section->data + t);
389 t=decbranch(lt=t);
390 if(a==lt+4)
391 *x=0xE1A00000; // nop
392 else {
393 *x &= 0xff000000;
394 *x |= encbranch(lt,a,1);
399 void gsym(int t)
401 gsym_addr(t, ind);
404 #ifdef TCC_ARM_VFP
405 static uint32_t vfpr(int r)
407 if(r<TREG_F0 || r>TREG_F7)
408 tcc_error("compiler error! register %i is no vfp register",r);
409 return r-5;
411 #else
412 static uint32_t fpr(int r)
414 if(r<TREG_F0 || r>TREG_F3)
415 tcc_error("compiler error! register %i is no fpa register",r);
416 return r-5;
418 #endif
420 static uint32_t intr(int r)
422 if(r==4)
423 return 12;
424 if((r<0 || r>4) && r!=14)
425 tcc_error("compiler error! register %i is no int register",r);
426 return r;
429 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
431 if(*off>maxoff || *off&((1<<shift)-1)) {
432 uint32_t x, y;
433 x=0xE280E000;
434 if(*sgn)
435 x=0xE240E000;
436 x|=(*base)<<16;
437 *base=14; // lr
438 y=stuff_const(x,*off&~maxoff);
439 if(y) {
440 o(y);
441 *off&=maxoff;
442 return;
444 y=stuff_const(x,(*off+maxoff)&~maxoff);
445 if(y) {
446 o(y);
447 *sgn=!*sgn;
448 *off=((*off+maxoff)&~maxoff)-*off;
449 return;
451 stuff_const_harder(x,*off&~maxoff);
452 *off&=maxoff;
456 static uint32_t mapcc(int cc)
458 switch(cc)
460 case TOK_ULT:
461 return 0x30000000; /* CC/LO */
462 case TOK_UGE:
463 return 0x20000000; /* CS/HS */
464 case TOK_EQ:
465 return 0x00000000; /* EQ */
466 case TOK_NE:
467 return 0x10000000; /* NE */
468 case TOK_ULE:
469 return 0x90000000; /* LS */
470 case TOK_UGT:
471 return 0x80000000; /* HI */
472 case TOK_Nset:
473 return 0x40000000; /* MI */
474 case TOK_Nclear:
475 return 0x50000000; /* PL */
476 case TOK_LT:
477 return 0xB0000000; /* LT */
478 case TOK_GE:
479 return 0xA0000000; /* GE */
480 case TOK_LE:
481 return 0xD0000000; /* LE */
482 case TOK_GT:
483 return 0xC0000000; /* GT */
485 tcc_error("unexpected condition code");
486 return 0xE0000000; /* AL */
489 static int negcc(int cc)
491 switch(cc)
493 case TOK_ULT:
494 return TOK_UGE;
495 case TOK_UGE:
496 return TOK_ULT;
497 case TOK_EQ:
498 return TOK_NE;
499 case TOK_NE:
500 return TOK_EQ;
501 case TOK_ULE:
502 return TOK_UGT;
503 case TOK_UGT:
504 return TOK_ULE;
505 case TOK_Nset:
506 return TOK_Nclear;
507 case TOK_Nclear:
508 return TOK_Nset;
509 case TOK_LT:
510 return TOK_GE;
511 case TOK_GE:
512 return TOK_LT;
513 case TOK_LE:
514 return TOK_GT;
515 case TOK_GT:
516 return TOK_LE;
518 tcc_error("unexpected condition code");
519 return TOK_NE;
522 /* load 'r' from value 'sv' */
523 void load(int r, SValue *sv)
525 int v, ft, fc, fr, sign;
526 uint32_t op;
527 SValue v1;
529 fr = sv->r;
530 ft = sv->type.t;
531 fc = sv->c.ul;
533 if(fc>=0)
534 sign=0;
535 else {
536 sign=1;
537 fc=-fc;
540 v = fr & VT_VALMASK;
541 if (fr & VT_LVAL) {
542 uint32_t base = 0xB; // fp
543 if(v == VT_LLOCAL) {
544 v1.type.t = VT_PTR;
545 v1.r = VT_LOCAL | VT_LVAL;
546 v1.c.ul = sv->c.ul;
547 load(base=14 /* lr */, &v1);
548 fc=sign=0;
549 v=VT_LOCAL;
550 } else if(v == VT_CONST) {
551 v1.type.t = VT_PTR;
552 v1.r = fr&~VT_LVAL;
553 v1.c.ul = sv->c.ul;
554 v1.sym=sv->sym;
555 load(base=14, &v1);
556 fc=sign=0;
557 v=VT_LOCAL;
558 } else if(v < VT_CONST) {
559 base=intr(v);
560 fc=sign=0;
561 v=VT_LOCAL;
563 if(v == VT_LOCAL) {
564 if(is_float(ft)) {
565 calcaddr(&base,&fc,&sign,1020,2);
566 #ifdef TCC_ARM_VFP
567 op=0xED100A00; /* flds */
568 if(!sign)
569 op|=0x800000;
570 if ((ft & VT_BTYPE) != VT_FLOAT)
571 op|=0x100; /* flds -> fldd */
572 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
573 #else
574 op=0xED100100;
575 if(!sign)
576 op|=0x800000;
577 #if LDOUBLE_SIZE == 8
578 if ((ft & VT_BTYPE) != VT_FLOAT)
579 op|=0x8000;
580 #else
581 if ((ft & VT_BTYPE) == VT_DOUBLE)
582 op|=0x8000;
583 else if ((ft & VT_BTYPE) == VT_LDOUBLE)
584 op|=0x400000;
585 #endif
586 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
587 #endif
588 } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
589 || (ft & VT_BTYPE) == VT_SHORT) {
590 calcaddr(&base,&fc,&sign,255,0);
591 op=0xE1500090;
592 if ((ft & VT_BTYPE) == VT_SHORT)
593 op|=0x20;
594 if ((ft & VT_UNSIGNED) == 0)
595 op|=0x40;
596 if(!sign)
597 op|=0x800000;
598 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
599 } else {
600 calcaddr(&base,&fc,&sign,4095,0);
601 op=0xE5100000;
602 if(!sign)
603 op|=0x800000;
604 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
605 op|=0x400000;
606 o(op|(intr(r)<<12)|fc|(base<<16));
608 return;
610 } else {
611 if (v == VT_CONST) {
612 op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
613 if (fr & VT_SYM || !op) {
614 o(0xE59F0000|(intr(r)<<12));
615 o(0xEA000000);
616 if(fr & VT_SYM)
617 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
618 o(sv->c.ul);
619 } else
620 o(op);
621 return;
622 } else if (v == VT_LOCAL) {
623 op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
624 if (fr & VT_SYM || !op) {
625 o(0xE59F0000|(intr(r)<<12));
626 o(0xEA000000);
627 if(fr & VT_SYM) // needed ?
628 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
629 o(sv->c.ul);
630 o(0xE08B0000|(intr(r)<<12)|intr(r));
631 } else
632 o(op);
633 return;
634 } else if(v == VT_CMP) {
635 o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
636 o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
637 return;
638 } else if (v == VT_JMP || v == VT_JMPI) {
639 int t;
640 t = v & 1;
641 o(0xE3A00000|(intr(r)<<12)|t);
642 o(0xEA000000);
643 gsym(sv->c.ul);
644 o(0xE3A00000|(intr(r)<<12)|(t^1));
645 return;
646 } else if (v < VT_CONST) {
647 if(is_float(ft))
648 #ifdef TCC_ARM_VFP
649 o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
650 #else
651 o(0xEE008180|(fpr(r)<<12)|fpr(v));
652 #endif
653 else
654 o(0xE1A00000|(intr(r)<<12)|intr(v));
655 return;
658 tcc_error("load unimplemented!");
661 /* store register 'r' in lvalue 'v' */
662 void store(int r, SValue *sv)
664 SValue v1;
665 int v, ft, fc, fr, sign;
666 uint32_t op;
668 fr = sv->r;
669 ft = sv->type.t;
670 fc = sv->c.ul;
672 if(fc>=0)
673 sign=0;
674 else {
675 sign=1;
676 fc=-fc;
679 v = fr & VT_VALMASK;
680 if (fr & VT_LVAL || fr == VT_LOCAL) {
681 uint32_t base = 0xb;
682 if(v < VT_CONST) {
683 base=intr(v);
684 v=VT_LOCAL;
685 fc=sign=0;
686 } else if(v == VT_CONST) {
687 v1.type.t = ft;
688 v1.r = fr&~VT_LVAL;
689 v1.c.ul = sv->c.ul;
690 v1.sym=sv->sym;
691 load(base=14, &v1);
692 fc=sign=0;
693 v=VT_LOCAL;
695 if(v == VT_LOCAL) {
696 if(is_float(ft)) {
697 calcaddr(&base,&fc,&sign,1020,2);
698 #ifdef TCC_ARM_VFP
699 op=0xED000A00; /* fsts */
700 if(!sign)
701 op|=0x800000;
702 if ((ft & VT_BTYPE) != VT_FLOAT)
703 op|=0x100; /* fsts -> fstd */
704 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
705 #else
706 op=0xED000100;
707 if(!sign)
708 op|=0x800000;
709 #if LDOUBLE_SIZE == 8
710 if ((ft & VT_BTYPE) != VT_FLOAT)
711 op|=0x8000;
712 #else
713 if ((ft & VT_BTYPE) == VT_DOUBLE)
714 op|=0x8000;
715 if ((ft & VT_BTYPE) == VT_LDOUBLE)
716 op|=0x400000;
717 #endif
718 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
719 #endif
720 return;
721 } else if((ft & VT_BTYPE) == VT_SHORT) {
722 calcaddr(&base,&fc,&sign,255,0);
723 op=0xE14000B0;
724 if(!sign)
725 op|=0x800000;
726 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
727 } else {
728 calcaddr(&base,&fc,&sign,4095,0);
729 op=0xE5000000;
730 if(!sign)
731 op|=0x800000;
732 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
733 op|=0x400000;
734 o(op|(intr(r)<<12)|fc|(base<<16));
736 return;
739 tcc_error("store unimplemented");
742 static void gadd_sp(int val)
744 stuff_const_harder(0xE28DD000,val);
747 /* 'is_jmp' is '1' if it is a jump */
748 static void gcall_or_jmp(int is_jmp)
750 int r;
751 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
752 uint32_t x;
753 /* constant case */
754 x=encbranch(ind,ind+vtop->c.ul,0);
755 if(x) {
756 if (vtop->r & VT_SYM) {
757 /* relocation case */
758 greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
759 } else
760 put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
761 o(x|(is_jmp?0xE0000000:0xE1000000));
762 } else {
763 if(!is_jmp)
764 o(0xE28FE004); // add lr,pc,#4
765 o(0xE51FF004); // ldr pc,[pc,#-4]
766 if (vtop->r & VT_SYM)
767 greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
768 o(vtop->c.ul);
770 } else {
771 /* otherwise, indirect call */
772 r = gv(RC_INT);
773 if(!is_jmp)
774 o(0xE1A0E00F); // mov lr,pc
775 o(0xE1A0F000|intr(r)); // mov pc,r
779 /* Return whether a structure is an homogeneous float aggregate or not.
780 The answer is true if all the elements of the structure are of the same
781 primitive float type and there is less than 4 elements.
783 type: the type corresponding to the structure to be tested */
784 static int is_hgen_float_aggr(CType *type)
786 if ((type->t & VT_BTYPE) == VT_STRUCT) {
787 struct Sym *ref;
788 int btype, nb_fields = 0;
790 ref = type->ref->next;
791 btype = ref->type.t & VT_BTYPE;
792 if (btype == VT_FLOAT || btype == VT_DOUBLE) {
793 for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
794 return !ref && nb_fields <= 4;
797 return 0;
800 struct avail_regs {
801 signed char avail[3]; /* 3 holes max with only float and double alignments */
802 int first_hole; /* first available hole */
803 int last_hole; /* last available hole (none if equal to first_hole) */
804 int first_free_reg; /* next free register in the sequence, hole excluded */
807 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
809 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
810 param) according to the rules described in the procedure call standard for
811 the ARM architecture (AAPCS). If found, the registers are assigned to this
812 VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
813 and the parameter is a single float.
815 avregs: opaque structure to keep track of available VFP co-processor regs
816 align: alignment contraints for the param, as returned by type_size()
817 size: size of the parameter, as returned by type_size() */
818 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
820 int first_reg = 0;
822 if (avregs->first_free_reg == -1)
823 return -1;
824 if (align >> 3) { /* double alignment */
825 first_reg = avregs->first_free_reg;
826 /* alignment contraint not respected so use next reg and record hole */
827 if (first_reg & 1)
828 avregs->avail[avregs->last_hole++] = first_reg++;
829 } else { /* no special alignment (float or array of float) */
830 /* if single float and a hole is available, assign the param to it */
831 if (size == 4 && avregs->first_hole != avregs->last_hole)
832 return avregs->avail[avregs->first_hole++];
833 else
834 first_reg = avregs->first_free_reg;
836 if (first_reg + size / 4 <= 16) {
837 avregs->first_free_reg = first_reg + size / 4;
838 return first_reg;
840 avregs->first_free_reg = -1;
841 return -1;
844 /* Returns whether all params need to be passed in core registers or not.
845 This is the case for function part of the runtime ABI. */
846 int floats_in_core_regs(SValue *sval)
848 if (!sval->sym)
849 return 0;
851 switch (sval->sym->v) {
852 case TOK___floatundisf:
853 case TOK___floatundidf:
854 case TOK___fixunssfdi:
855 case TOK___fixunsdfdi:
856 #ifndef TCC_ARM_VFP
857 case TOK___fixunsxfdi:
858 #endif
859 case TOK___floatdisf:
860 case TOK___floatdidf:
861 case TOK___fixsfdi:
862 case TOK___fixdfdi:
863 return 1;
865 default:
866 return 0;
870 /* Return the number of registers needed to return the struct, or 0 if
871 returning via struct pointer. */
872 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align) {
873 #ifdef TCC_ARM_EABI
874 int size, align;
875 size = type_size(vt, &align);
876 if (float_abi == ARM_HARD_FLOAT && !variadic &&
877 (is_float(vt->t) || is_hgen_float_aggr(vt))) {
878 *ret_align = 8;
879 ret->ref = NULL;
880 ret->t = VT_DOUBLE;
881 return (size + 7) >> 3;
882 } else if (size <= 4) {
883 *ret_align = 4;
884 ret->ref = NULL;
885 ret->t = VT_INT;
886 return 1;
887 } else
888 return 0;
889 #else
890 return 0;
891 #endif
894 /* Parameters are classified according to how they are copied to their final
895 destination for the function call. Because the copying is performed class
896 after class according to the order in the union below, it is important that
897 some constraints about the order of the members of this union are respected:
898 - CORE_STRUCT_CLASS must come after STACK_CLASS;
899 - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
900 VFP_STRUCT_CLASS;
901 - VFP_STRUCT_CLASS must come after VFP_CLASS.
902 See the comment for the main loop in copy_params() for the reason. */
903 enum reg_class {
904 STACK_CLASS = 0,
905 CORE_STRUCT_CLASS,
906 VFP_CLASS,
907 VFP_STRUCT_CLASS,
908 CORE_CLASS,
909 NB_CLASSES
912 struct param_plan {
913 int start; /* first reg or addr used depending on the class */
914 int end; /* last reg used or next free addr depending on the class */
915 SValue *sval; /* pointer to SValue on the value stack */
916 struct param_plan *prev; /* previous element in this class */
919 struct plan {
920 struct param_plan *pplans; /* array of all the param plans */
921 struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
924 #define add_param_plan(plan,pplan,class) \
925 do { \
926 pplan.prev = plan->clsplans[class]; \
927 plan->pplans[plan ## _nb] = pplan; \
928 plan->clsplans[class] = &plan->pplans[plan ## _nb++]; \
929 } while(0)
931 /* Assign parameters to registers and stack with alignment according to the
932 rules in the procedure call standard for the ARM architecture (AAPCS).
933 The overall assignment is recorded in an array of per parameter structures
934 called parameter plans. The parameter plans are also further organized in a
935 number of linked lists, one per class of parameter (see the comment for the
936 definition of union reg_class).
938 nb_args: number of parameters of the function for which a call is generated
939 float_abi: float ABI in use for this function call
940 plan: the structure where the overall assignment is recorded
941 todo: a bitmap that record which core registers hold a parameter
943 Returns the amount of stack space needed for parameter passing
945 Note: this function allocated an array in plan->pplans with tcc_malloc. It
946 is the responsability of the caller to free this array once used (ie not
947 before copy_params). */
948 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
950 int i, size, align;
951 int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
952 int plan_nb = 0;
953 struct param_plan pplan;
954 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
956 ncrn = nsaa = 0;
957 *todo = 0;
958 plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
959 memset(plan->clsplans, 0, sizeof(plan->clsplans));
960 for(i = nb_args; i-- ;) {
961 int j, start_vfpreg = 0;
962 CType type = vtop[-i].type;
963 type.t &= ~VT_ARRAY;
964 size = type_size(&type, &align);
965 size = (size + 3) & ~3;
966 align = (align + 3) & ~3;
967 switch(vtop[-i].type.t & VT_BTYPE) {
968 case VT_STRUCT:
969 case VT_FLOAT:
970 case VT_DOUBLE:
971 case VT_LDOUBLE:
972 if (float_abi == ARM_HARD_FLOAT) {
973 int is_hfa = 0; /* Homogeneous float aggregate */
975 if (is_float(vtop[-i].type.t)
976 || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
977 int end_vfpreg;
979 start_vfpreg = assign_vfpreg(&avregs, align, size);
980 end_vfpreg = start_vfpreg + ((size - 1) >> 2);
981 if (start_vfpreg >= 0) {
982 pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
983 if (is_hfa)
984 add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
985 else
986 add_param_plan(plan, pplan, VFP_CLASS);
987 continue;
988 } else
989 break;
992 ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
993 if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
994 /* The parameter is allocated both in core register and on stack. As
995 * such, it can be of either class: it would either be the last of
996 * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
997 for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
998 *todo|=(1<<j);
999 pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1000 add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1001 ncrn += size/4;
1002 if (ncrn > 4)
1003 nsaa = (ncrn - 4) * 4;
1004 } else {
1005 ncrn = 4;
1006 break;
1008 continue;
1009 default:
1010 if (ncrn < 4) {
1011 int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1013 if (is_long) {
1014 ncrn = (ncrn + 1) & -2;
1015 if (ncrn == 4)
1016 break;
1018 pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1019 ncrn++;
1020 if (is_long)
1021 pplan.end = ncrn++;
1022 add_param_plan(plan, pplan, CORE_CLASS);
1023 continue;
1026 nsaa = (nsaa + (align - 1)) & ~(align - 1);
1027 pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1028 add_param_plan(plan, pplan, STACK_CLASS);
1029 nsaa += size; /* size already rounded up before */
1031 return nsaa;
1034 #undef add_param_plan
1036 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1037 function call.
1039 nb_args: number of parameters the function take
1040 plan: the overall assignment plan for parameters
1041 todo: a bitmap indicating what core reg will hold a parameter
1043 Returns the number of SValue added by this function on the value stack */
1044 static int copy_params(int nb_args, struct plan *plan, int todo)
1046 int size, align, r, i, nb_extra_sval = 0;
1047 struct param_plan *pplan;
1049 /* Several constraints require parameters to be copied in a specific order:
1050 - structures are copied to the stack before being loaded in a reg;
1051 - floats loaded to an odd numbered VFP reg are first copied to the
1052 preceding even numbered VFP reg and then moved to the next VFP reg.
1054 It is thus important that:
1055 - structures assigned to core regs must be copied after parameters
1056 assigned to the stack but before structures assigned to VFP regs because
1057 a structure can lie partly in core registers and partly on the stack;
1058 - parameters assigned to the stack and all structures be copied before
1059 parameters assigned to a core reg since copying a parameter to the stack
1060 require using a core reg;
1061 - parameters assigned to VFP regs be copied before structures assigned to
1062 VFP regs as the copy might use an even numbered VFP reg that already
1063 holds part of a structure. */
1064 for(i = 0; i < NB_CLASSES; i++) {
1065 for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1066 vpushv(pplan->sval);
1067 pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1068 switch(i) {
1069 case STACK_CLASS:
1070 case CORE_STRUCT_CLASS:
1071 case VFP_STRUCT_CLASS:
1072 if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1073 int padding = 0;
1074 size = type_size(&pplan->sval->type, &align);
1075 /* align to stack align size */
1076 size = (size + 3) & ~3;
1077 if (i == STACK_CLASS && pplan->prev)
1078 padding = pplan->start - pplan->prev->end;
1079 size += padding; /* Add padding if any */
1080 /* allocate the necessary size on stack */
1081 gadd_sp(-size);
1082 /* generate structure store */
1083 r = get_reg(RC_INT);
1084 o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1085 vset(&vtop->type, r | VT_LVAL, 0);
1086 vswap();
1087 vstore(); /* memcpy to current sp + potential padding */
1089 /* Homogeneous float aggregate are loaded to VFP registers
1090 immediately since there is no way of loading data in multiple
1091 non consecutive VFP registers as what is done for other
1092 structures (see the use of todo). */
1093 if (i == VFP_STRUCT_CLASS) {
1094 int first = pplan->start, nb = pplan->end - first + 1;
1095 /* vpop.32 {pplan->start, ..., pplan->end} */
1096 o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1097 /* No need to write the register used to a SValue since VFP regs
1098 cannot be used for gcall_or_jmp */
1100 } else {
1101 if (is_float(pplan->sval->type.t)) {
1102 #ifdef TCC_ARM_VFP
1103 r = vfpr(gv(RC_FLOAT)) << 12;
1104 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1105 size = 4;
1106 else {
1107 size = 8;
1108 r |= 0x101; /* vpush.32 -> vpush.64 */
1110 o(0xED2D0A01 + r); /* vpush */
1111 #else
1112 r = fpr(gv(RC_FLOAT)) << 12;
1113 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1114 size = 4;
1115 else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1116 size = 8;
1117 else
1118 size = LDOUBLE_SIZE;
1120 if (size == 12)
1121 r |= 0x400000;
1122 else if(size == 8)
1123 r|=0x8000;
1125 o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1126 #endif
1127 } else {
1128 /* simple type (currently always same size) */
1129 /* XXX: implicit cast ? */
1130 size=4;
1131 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1132 lexpand_nr();
1133 size = 8;
1134 r = gv(RC_INT);
1135 o(0xE52D0004|(intr(r)<<12)); /* push r */
1136 vtop--;
1138 r = gv(RC_INT);
1139 o(0xE52D0004|(intr(r)<<12)); /* push r */
1141 if (i == STACK_CLASS && pplan->prev)
1142 gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1144 break;
1146 case VFP_CLASS:
1147 gv(regmask(TREG_F0 + (pplan->start >> 1)));
1148 if (pplan->start & 1) { /* Must be in upper part of double register */
1149 o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1150 vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1152 break;
1154 case CORE_CLASS:
1155 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1156 lexpand_nr();
1157 gv(regmask(pplan->end));
1158 pplan->sval->r2 = vtop->r;
1159 vtop--;
1161 gv(regmask(pplan->start));
1162 /* Mark register as used so that gcall_or_jmp use another one
1163 (regs >=4 are free as never used to pass parameters) */
1164 pplan->sval->r = vtop->r;
1165 break;
1167 vtop--;
1171 /* Manually free remaining registers since next parameters are loaded
1172 * manually, without the help of gv(int). */
1173 save_regs(nb_args);
1175 if(todo) {
1176 o(0xE8BD0000|todo); /* pop {todo} */
1177 for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1178 int r;
1179 pplan->sval->r = pplan->start;
1180 /* An SValue can only pin 2 registers at best (r and r2) but a structure
1181 can occupy more than 2 registers. Thus, we need to push on the value
1182 stack some fake parameter to have on SValue for each registers used
1183 by a structure (r2 is not used). */
1184 for (r = pplan->start + 1; r <= pplan->end; r++) {
1185 if (todo & (1 << r)) {
1186 nb_extra_sval++;
1187 vpushi(0);
1188 vtop->r = r;
1193 return nb_extra_sval;
1196 /* Generate function call. The function address is pushed first, then
1197 all the parameters in call order. This functions pops all the
1198 parameters and the function address. */
1199 void gfunc_call(int nb_args)
1201 int r, args_size;
1202 int variadic, def_float_abi = float_abi;
1203 int todo;
1204 struct plan plan;
1206 #ifdef TCC_ARM_EABI
1207 if (float_abi == ARM_HARD_FLOAT) {
1208 variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1209 if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1210 float_abi = ARM_SOFTFP_FLOAT;
1212 #endif
1213 /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1214 VT_JMP anywhere except on the top of the stack because it would complicate
1215 the code generator. */
1216 r = vtop->r & VT_VALMASK;
1217 if (r == VT_CMP || (r & ~1) == VT_JMP)
1218 gv(RC_INT);
1220 args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1222 #ifdef TCC_ARM_EABI
1223 if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1224 args_size = (args_size + 7) & ~7;
1225 o(0xE24DD004); /* sub sp, sp, #4 */
1227 #endif
1229 nb_args += copy_params(nb_args, &plan, todo);
1230 tcc_free(plan.pplans);
1232 /* Move fct SValue on top as required by gcall_or_jmp */
1233 vrotb(nb_args + 1);
1234 gcall_or_jmp(0);
1235 if (args_size)
1236 gadd_sp(args_size); /* pop all parameters passed on the stack */
1237 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1238 if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1239 if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1240 o(0xEE000A10); /*vmov s0, r0 */
1241 } else {
1242 o(0xEE000B10); /* vmov.32 d0[0], r0 */
1243 o(0xEE201B10); /* vmov.32 d0[1], r1 */
1246 #endif
1247 vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1248 leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1249 float_abi = def_float_abi;
1252 /* generate function prolog of type 't' */
1253 void gfunc_prolog(CType *func_type)
1255 Sym *sym,*sym2;
1256 int n, nf, size, align, struct_ret = 0;
1257 int addr, pn, sn; /* pn=core, sn=stack */
1258 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1259 CType ret_type;
1261 sym = func_type->ref;
1262 func_vt = sym->type;
1263 func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1265 n = nf = 0;
1266 if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1267 !gfunc_sret(&func_vt, func_var, &ret_type, &align))
1269 n++;
1270 struct_ret = 1;
1271 func_vc = 12; /* Offset from fp of the place to store the result */
1273 for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1274 size = type_size(&sym2->type, &align);
1275 #ifdef TCC_ARM_EABI
1276 if (float_abi == ARM_HARD_FLOAT && !func_var &&
1277 (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1278 int tmpnf = assign_vfpreg(&avregs, align, size);
1279 tmpnf += (size + 3) / 4;
1280 nf = (tmpnf > nf) ? tmpnf : nf;
1281 } else
1282 #endif
1283 if (n < 4)
1284 n += (size + 3) / 4;
1286 o(0xE1A0C00D); /* mov ip,sp */
1287 if (func_var)
1288 n=4;
1289 if (n) {
1290 if(n>4)
1291 n=4;
1292 #ifdef TCC_ARM_EABI
1293 n=(n+1)&-2;
1294 #endif
1295 o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1297 if (nf) {
1298 if (nf>16)
1299 nf=16;
1300 nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1301 o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1303 o(0xE92D5800); /* save fp, ip, lr */
1304 o(0xE1A0B00D); /* mov fp, sp */
1305 func_sub_sp_offset = ind;
1306 o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1308 #ifdef TCC_ARM_EABI
1309 if (float_abi == ARM_HARD_FLOAT) {
1310 func_vc += nf * 4;
1311 avregs = AVAIL_REGS_INITIALIZER;
1313 #endif
1314 pn = struct_ret, sn = 0;
1315 while ((sym = sym->next)) {
1316 CType *type;
1317 type = &sym->type;
1318 size = type_size(type, &align);
1319 size = (size + 3) >> 2;
1320 align = (align + 3) & ~3;
1321 #ifdef TCC_ARM_EABI
1322 if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1323 || is_hgen_float_aggr(&sym->type))) {
1324 int fpn = assign_vfpreg(&avregs, align, size << 2);
1325 if (fpn >= 0)
1326 addr = fpn * 4;
1327 else
1328 goto from_stack;
1329 } else
1330 #endif
1331 if (pn < 4) {
1332 #ifdef TCC_ARM_EABI
1333 pn = (pn + (align-1)/4) & -(align/4);
1334 #endif
1335 addr = (nf + pn) * 4;
1336 pn += size;
1337 if (!sn && pn > 4)
1338 sn = (pn - 4);
1339 } else {
1340 from_stack:
1341 #ifdef TCC_ARM_EABI
1342 sn = (sn + (align-1)/4) & -(align/4);
1343 #endif
1344 addr = (n + nf + sn) * 4;
1345 sn += size;
1347 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1348 addr + 12);
1350 last_itod_magic=0;
1351 leaffunc = 1;
1352 loc = 0;
1355 /* generate function epilog */
1356 void gfunc_epilog(void)
1358 uint32_t x;
1359 int diff;
1360 /* Copy float return value to core register if base standard is used and
1361 float computation is made with VFP */
1362 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1363 if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1364 if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1365 o(0xEE100A10); /* fmrs r0, s0 */
1366 else {
1367 o(0xEE100B10); /* fmrdl r0, d0 */
1368 o(0xEE301B10); /* fmrdh r1, d0 */
1371 #endif
1372 o(0xE89BA800); /* restore fp, sp, pc */
1373 diff = (-loc + 3) & -4;
1374 #ifdef TCC_ARM_EABI
1375 if(!leaffunc)
1376 diff = ((diff + 11) & -8) - 4;
1377 #endif
1378 if(diff > 0) {
1379 x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1380 if(x)
1381 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1382 else {
1383 int addr;
1384 addr=ind;
1385 o(0xE59FC004); /* ldr ip,[pc+4] */
1386 o(0xE04BD00C); /* sub sp,fp,ip */
1387 o(0xE1A0F00E); /* mov pc,lr */
1388 o(diff);
1389 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1394 /* generate a jump to a label */
1395 int gjmp(int t)
1397 int r;
1398 r=ind;
1399 o(0xE0000000|encbranch(r,t,1));
1400 return r;
1403 /* generate a jump to a fixed address */
1404 void gjmp_addr(int a)
1406 gjmp(a);
1409 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1410 int gtst(int inv, int t)
1412 int v, r;
1413 uint32_t op;
1414 v = vtop->r & VT_VALMASK;
1415 r=ind;
1416 if (v == VT_CMP) {
1417 op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1418 op|=encbranch(r,t,1);
1419 o(op);
1420 t=r;
1421 } else { /* VT_JMP || VT_JMPI */
1422 if ((v & 1) == inv) {
1423 if(!vtop->c.i)
1424 vtop->c.i=t;
1425 else {
1426 uint32_t *x;
1427 int p,lp;
1428 if(t) {
1429 p = vtop->c.i;
1430 do {
1431 p = decbranch(lp=p);
1432 } while(p);
1433 x = (uint32_t *)(cur_text_section->data + lp);
1434 *x &= 0xff000000;
1435 *x |= encbranch(lp,t,1);
1437 t = vtop->c.i;
1439 } else {
1440 t = gjmp(t);
1441 gsym(vtop->c.i);
1444 vtop--;
1445 return t;
1448 /* generate an integer binary operation */
1449 void gen_opi(int op)
1451 int c, func = 0;
1452 uint32_t opc = 0, r, fr;
1453 unsigned short retreg = REG_IRET;
1455 c=0;
1456 switch(op) {
1457 case '+':
1458 opc = 0x8;
1459 c=1;
1460 break;
1461 case TOK_ADDC1: /* add with carry generation */
1462 opc = 0x9;
1463 c=1;
1464 break;
1465 case '-':
1466 opc = 0x4;
1467 c=1;
1468 break;
1469 case TOK_SUBC1: /* sub with carry generation */
1470 opc = 0x5;
1471 c=1;
1472 break;
1473 case TOK_ADDC2: /* add with carry use */
1474 opc = 0xA;
1475 c=1;
1476 break;
1477 case TOK_SUBC2: /* sub with carry use */
1478 opc = 0xC;
1479 c=1;
1480 break;
1481 case '&':
1482 opc = 0x0;
1483 c=1;
1484 break;
1485 case '^':
1486 opc = 0x2;
1487 c=1;
1488 break;
1489 case '|':
1490 opc = 0x18;
1491 c=1;
1492 break;
1493 case '*':
1494 gv2(RC_INT, RC_INT);
1495 r = vtop[-1].r;
1496 fr = vtop[0].r;
1497 vtop--;
1498 o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1499 return;
1500 case TOK_SHL:
1501 opc = 0;
1502 c=2;
1503 break;
1504 case TOK_SHR:
1505 opc = 1;
1506 c=2;
1507 break;
1508 case TOK_SAR:
1509 opc = 2;
1510 c=2;
1511 break;
1512 case '/':
1513 case TOK_PDIV:
1514 func=TOK___divsi3;
1515 c=3;
1516 break;
1517 case TOK_UDIV:
1518 func=TOK___udivsi3;
1519 c=3;
1520 break;
1521 case '%':
1522 #ifdef TCC_ARM_EABI
1523 func=TOK___aeabi_idivmod;
1524 retreg=REG_LRET;
1525 #else
1526 func=TOK___modsi3;
1527 #endif
1528 c=3;
1529 break;
1530 case TOK_UMOD:
1531 #ifdef TCC_ARM_EABI
1532 func=TOK___aeabi_uidivmod;
1533 retreg=REG_LRET;
1534 #else
1535 func=TOK___umodsi3;
1536 #endif
1537 c=3;
1538 break;
1539 case TOK_UMULL:
1540 gv2(RC_INT, RC_INT);
1541 r=intr(vtop[-1].r2=get_reg(RC_INT));
1542 c=vtop[-1].r;
1543 vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1544 vtop--;
1545 o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1546 return;
1547 default:
1548 opc = 0x15;
1549 c=1;
1550 break;
1552 switch(c) {
1553 case 1:
1554 if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1555 if(opc == 4 || opc == 5 || opc == 0xc) {
1556 vswap();
1557 opc|=2; // sub -> rsb
1560 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1561 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1562 gv(RC_INT);
1563 vswap();
1564 c=intr(gv(RC_INT));
1565 vswap();
1566 opc=0xE0000000|(opc<<20)|(c<<16);
1567 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1568 uint32_t x;
1569 x=stuff_const(opc|0x2000000,vtop->c.i);
1570 if(x) {
1571 r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1572 o(x|(r<<12));
1573 goto done;
1576 fr=intr(gv(RC_INT));
1577 r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1578 o(opc|(r<<12)|fr);
1579 done:
1580 vtop--;
1581 if (op >= TOK_ULT && op <= TOK_GT) {
1582 vtop->r = VT_CMP;
1583 vtop->c.i = op;
1585 break;
1586 case 2:
1587 opc=0xE1A00000|(opc<<5);
1588 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1589 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1590 gv(RC_INT);
1591 vswap();
1592 r=intr(gv(RC_INT));
1593 vswap();
1594 opc|=r;
1595 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1596 fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1597 c = vtop->c.i & 0x1f;
1598 o(opc|(c<<7)|(fr<<12));
1599 } else {
1600 fr=intr(gv(RC_INT));
1601 c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1602 o(opc|(c<<12)|(fr<<8)|0x10);
1604 vtop--;
1605 break;
1606 case 3:
1607 vpush_global_sym(&func_old_type, func);
1608 vrott(3);
1609 gfunc_call(2);
1610 vpushi(0);
1611 vtop->r = retreg;
1612 break;
1613 default:
1614 tcc_error("gen_opi %i unimplemented!",op);
1618 #ifdef TCC_ARM_VFP
1619 static int is_zero(int i)
1621 if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1622 return 0;
1623 if (vtop[i].type.t == VT_FLOAT)
1624 return (vtop[i].c.f == 0.f);
1625 else if (vtop[i].type.t == VT_DOUBLE)
1626 return (vtop[i].c.d == 0.0);
1627 return (vtop[i].c.ld == 0.l);
1630 /* generate a floating point operation 'v = t1 op t2' instruction. The
1631 * two operands are guaranted to have the same floating point type */
1632 void gen_opf(int op)
1634 uint32_t x;
1635 int fneg=0,r;
1636 x=0xEE000A00|T2CPR(vtop->type.t);
1637 switch(op) {
1638 case '+':
1639 if(is_zero(-1))
1640 vswap();
1641 if(is_zero(0)) {
1642 vtop--;
1643 return;
1645 x|=0x300000;
1646 break;
1647 case '-':
1648 x|=0x300040;
1649 if(is_zero(0)) {
1650 vtop--;
1651 return;
1653 if(is_zero(-1)) {
1654 x|=0x810000; /* fsubX -> fnegX */
1655 vswap();
1656 vtop--;
1657 fneg=1;
1659 break;
1660 case '*':
1661 x|=0x200000;
1662 break;
1663 case '/':
1664 x|=0x800000;
1665 break;
1666 default:
1667 if(op < TOK_ULT || op > TOK_GT) {
1668 tcc_error("unknown fp op %x!",op);
1669 return;
1671 if(is_zero(-1)) {
1672 vswap();
1673 switch(op) {
1674 case TOK_LT: op=TOK_GT; break;
1675 case TOK_GE: op=TOK_ULE; break;
1676 case TOK_LE: op=TOK_GE; break;
1677 case TOK_GT: op=TOK_ULT; break;
1680 x|=0xB40040; /* fcmpX */
1681 if(op!=TOK_EQ && op!=TOK_NE)
1682 x|=0x80; /* fcmpX -> fcmpeX */
1683 if(is_zero(0)) {
1684 vtop--;
1685 o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1686 } else {
1687 x|=vfpr(gv(RC_FLOAT));
1688 vswap();
1689 o(x|(vfpr(gv(RC_FLOAT))<<12));
1690 vtop--;
1692 o(0xEEF1FA10); /* fmstat */
1694 switch(op) {
1695 case TOK_LE: op=TOK_ULE; break;
1696 case TOK_LT: op=TOK_ULT; break;
1697 case TOK_UGE: op=TOK_GE; break;
1698 case TOK_UGT: op=TOK_GT; break;
1701 vtop->r = VT_CMP;
1702 vtop->c.i = op;
1703 return;
1705 r=gv(RC_FLOAT);
1706 x|=vfpr(r);
1707 r=regmask(r);
1708 if(!fneg) {
1709 int r2;
1710 vswap();
1711 r2=gv(RC_FLOAT);
1712 x|=vfpr(r2)<<16;
1713 r|=regmask(r2);
1715 vtop->r=get_reg_ex(RC_FLOAT,r);
1716 if(!fneg)
1717 vtop--;
1718 o(x|(vfpr(vtop->r)<<12));
1721 #else
1722 static uint32_t is_fconst()
1724 long double f;
1725 uint32_t r;
1726 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1727 return 0;
1728 if (vtop->type.t == VT_FLOAT)
1729 f = vtop->c.f;
1730 else if (vtop->type.t == VT_DOUBLE)
1731 f = vtop->c.d;
1732 else
1733 f = vtop->c.ld;
1734 if(!ieee_finite(f))
1735 return 0;
1736 r=0x8;
1737 if(f<0.0) {
1738 r=0x18;
1739 f=-f;
1741 if(f==0.0)
1742 return r;
1743 if(f==1.0)
1744 return r|1;
1745 if(f==2.0)
1746 return r|2;
1747 if(f==3.0)
1748 return r|3;
1749 if(f==4.0)
1750 return r|4;
1751 if(f==5.0)
1752 return r|5;
1753 if(f==0.5)
1754 return r|6;
1755 if(f==10.0)
1756 return r|7;
1757 return 0;
1760 /* generate a floating point operation 'v = t1 op t2' instruction. The
1761 two operands are guaranted to have the same floating point type */
1762 void gen_opf(int op)
1764 uint32_t x, r, r2, c1, c2;
1765 //fputs("gen_opf\n",stderr);
1766 vswap();
1767 c1 = is_fconst();
1768 vswap();
1769 c2 = is_fconst();
1770 x=0xEE000100;
1771 #if LDOUBLE_SIZE == 8
1772 if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1773 x|=0x80;
1774 #else
1775 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1776 x|=0x80;
1777 else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1778 x|=0x80000;
1779 #endif
1780 switch(op)
1782 case '+':
1783 if(!c2) {
1784 vswap();
1785 c2=c1;
1787 vswap();
1788 r=fpr(gv(RC_FLOAT));
1789 vswap();
1790 if(c2) {
1791 if(c2>0xf)
1792 x|=0x200000; // suf
1793 r2=c2&0xf;
1794 } else {
1795 r2=fpr(gv(RC_FLOAT));
1797 break;
1798 case '-':
1799 if(c2) {
1800 if(c2<=0xf)
1801 x|=0x200000; // suf
1802 r2=c2&0xf;
1803 vswap();
1804 r=fpr(gv(RC_FLOAT));
1805 vswap();
1806 } else if(c1 && c1<=0xf) {
1807 x|=0x300000; // rsf
1808 r2=c1;
1809 r=fpr(gv(RC_FLOAT));
1810 vswap();
1811 } else {
1812 x|=0x200000; // suf
1813 vswap();
1814 r=fpr(gv(RC_FLOAT));
1815 vswap();
1816 r2=fpr(gv(RC_FLOAT));
1818 break;
1819 case '*':
1820 if(!c2 || c2>0xf) {
1821 vswap();
1822 c2=c1;
1824 vswap();
1825 r=fpr(gv(RC_FLOAT));
1826 vswap();
1827 if(c2 && c2<=0xf)
1828 r2=c2;
1829 else
1830 r2=fpr(gv(RC_FLOAT));
1831 x|=0x100000; // muf
1832 break;
1833 case '/':
1834 if(c2 && c2<=0xf) {
1835 x|=0x400000; // dvf
1836 r2=c2;
1837 vswap();
1838 r=fpr(gv(RC_FLOAT));
1839 vswap();
1840 } else if(c1 && c1<=0xf) {
1841 x|=0x500000; // rdf
1842 r2=c1;
1843 r=fpr(gv(RC_FLOAT));
1844 vswap();
1845 } else {
1846 x|=0x400000; // dvf
1847 vswap();
1848 r=fpr(gv(RC_FLOAT));
1849 vswap();
1850 r2=fpr(gv(RC_FLOAT));
1852 break;
1853 default:
1854 if(op >= TOK_ULT && op <= TOK_GT) {
1855 x|=0xd0f110; // cmfe
1856 /* bug (intention?) in Linux FPU emulator
1857 doesn't set carry if equal */
1858 switch(op) {
1859 case TOK_ULT:
1860 case TOK_UGE:
1861 case TOK_ULE:
1862 case TOK_UGT:
1863 tcc_error("unsigned comparision on floats?");
1864 break;
1865 case TOK_LT:
1866 op=TOK_Nset;
1867 break;
1868 case TOK_LE:
1869 op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1870 break;
1871 case TOK_EQ:
1872 case TOK_NE:
1873 x&=~0x400000; // cmfe -> cmf
1874 break;
1876 if(c1 && !c2) {
1877 c2=c1;
1878 vswap();
1879 switch(op) {
1880 case TOK_Nset:
1881 op=TOK_GT;
1882 break;
1883 case TOK_GE:
1884 op=TOK_ULE;
1885 break;
1886 case TOK_ULE:
1887 op=TOK_GE;
1888 break;
1889 case TOK_GT:
1890 op=TOK_Nset;
1891 break;
1894 vswap();
1895 r=fpr(gv(RC_FLOAT));
1896 vswap();
1897 if(c2) {
1898 if(c2>0xf)
1899 x|=0x200000;
1900 r2=c2&0xf;
1901 } else {
1902 r2=fpr(gv(RC_FLOAT));
1904 vtop[-1].r = VT_CMP;
1905 vtop[-1].c.i = op;
1906 } else {
1907 tcc_error("unknown fp op %x!",op);
1908 return;
1911 if(vtop[-1].r == VT_CMP)
1912 c1=15;
1913 else {
1914 c1=vtop->r;
1915 if(r2&0x8)
1916 c1=vtop[-1].r;
1917 vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1918 c1=fpr(vtop[-1].r);
1920 vtop--;
1921 o(x|(r<<16)|(c1<<12)|r2);
1923 #endif
1925 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1926 and 'long long' cases. */
1927 ST_FUNC void gen_cvt_itof1(int t)
1929 uint32_t r, r2;
1930 int bt;
1931 bt=vtop->type.t & VT_BTYPE;
1932 if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1933 #ifndef TCC_ARM_VFP
1934 uint32_t dsize = 0;
1935 #endif
1936 r=intr(gv(RC_INT));
1937 #ifdef TCC_ARM_VFP
1938 r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1939 o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1940 r2|=r2<<12;
1941 if(!(vtop->type.t & VT_UNSIGNED))
1942 r2|=0x80; /* fuitoX -> fsituX */
1943 o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1944 #else
1945 r2=fpr(vtop->r=get_reg(RC_FLOAT));
1946 if((t & VT_BTYPE) != VT_FLOAT)
1947 dsize=0x80; /* flts -> fltd */
1948 o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1949 if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1950 uint32_t off = 0;
1951 o(0xE3500000|(r<<12)); /* cmp */
1952 r=fpr(get_reg(RC_FLOAT));
1953 if(last_itod_magic) {
1954 off=ind+8-last_itod_magic;
1955 off/=4;
1956 if(off>255)
1957 off=0;
1959 o(0xBD1F0100|(r<<12)|off); /* ldflts */
1960 if(!off) {
1961 o(0xEA000000); /* b */
1962 last_itod_magic=ind;
1963 o(0x4F800000); /* 4294967296.0f */
1965 o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1967 #endif
1968 return;
1969 } else if(bt == VT_LLONG) {
1970 int func;
1971 CType *func_type = 0;
1972 if((t & VT_BTYPE) == VT_FLOAT) {
1973 func_type = &func_float_type;
1974 if(vtop->type.t & VT_UNSIGNED)
1975 func=TOK___floatundisf;
1976 else
1977 func=TOK___floatdisf;
1978 #if LDOUBLE_SIZE != 8
1979 } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1980 func_type = &func_ldouble_type;
1981 if(vtop->type.t & VT_UNSIGNED)
1982 func=TOK___floatundixf;
1983 else
1984 func=TOK___floatdixf;
1985 } else if((t & VT_BTYPE) == VT_DOUBLE) {
1986 #else
1987 } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1988 #endif
1989 func_type = &func_double_type;
1990 if(vtop->type.t & VT_UNSIGNED)
1991 func=TOK___floatundidf;
1992 else
1993 func=TOK___floatdidf;
1995 if(func_type) {
1996 vpush_global_sym(func_type, func);
1997 vswap();
1998 gfunc_call(1);
1999 vpushi(0);
2000 vtop->r=TREG_F0;
2001 return;
2004 tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2007 /* convert fp to int 't' type */
2008 void gen_cvt_ftoi(int t)
2010 uint32_t r, r2;
2011 int u, func = 0;
2012 u=t&VT_UNSIGNED;
2013 t&=VT_BTYPE;
2014 r2=vtop->type.t & VT_BTYPE;
2015 if(t==VT_INT) {
2016 #ifdef TCC_ARM_VFP
2017 r=vfpr(gv(RC_FLOAT));
2018 u=u?0:0x10000;
2019 o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2020 r2=intr(vtop->r=get_reg(RC_INT));
2021 o(0xEE100A10|(r<<16)|(r2<<12));
2022 return;
2023 #else
2024 if(u) {
2025 if(r2 == VT_FLOAT)
2026 func=TOK___fixunssfsi;
2027 #if LDOUBLE_SIZE != 8
2028 else if(r2 == VT_LDOUBLE)
2029 func=TOK___fixunsxfsi;
2030 else if(r2 == VT_DOUBLE)
2031 #else
2032 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2033 #endif
2034 func=TOK___fixunsdfsi;
2035 } else {
2036 r=fpr(gv(RC_FLOAT));
2037 r2=intr(vtop->r=get_reg(RC_INT));
2038 o(0xEE100170|(r2<<12)|r);
2039 return;
2041 #endif
2042 } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2043 if(r2 == VT_FLOAT)
2044 func=TOK___fixsfdi;
2045 #if LDOUBLE_SIZE != 8
2046 else if(r2 == VT_LDOUBLE)
2047 func=TOK___fixxfdi;
2048 else if(r2 == VT_DOUBLE)
2049 #else
2050 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2051 #endif
2052 func=TOK___fixdfdi;
2054 if(func) {
2055 vpush_global_sym(&func_old_type, func);
2056 vswap();
2057 gfunc_call(1);
2058 vpushi(0);
2059 if(t == VT_LLONG)
2060 vtop->r2 = REG_LRET;
2061 vtop->r = REG_IRET;
2062 return;
2064 tcc_error("unimplemented gen_cvt_ftoi!");
2067 /* convert from one floating point type to another */
2068 void gen_cvt_ftof(int t)
2070 #ifdef TCC_ARM_VFP
2071 if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2072 uint32_t r = vfpr(gv(RC_FLOAT));
2073 o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2075 #else
2076 /* all we have to do on i386 and FPA ARM is to put the float in a register */
2077 gv(RC_FLOAT);
2078 #endif
2081 /* computed goto support */
2082 void ggoto(void)
2084 gcall_or_jmp(1);
2085 vtop--;
2088 /* Save the stack pointer onto the stack and return the location of its address */
2089 ST_FUNC void gen_vla_sp_save(int addr) {
2090 tcc_error("variable length arrays unsupported for this target");
2093 /* Restore the SP from a location on the stack */
2094 ST_FUNC void gen_vla_sp_restore(int addr) {
2095 tcc_error("variable length arrays unsupported for this target");
2098 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2099 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2100 tcc_error("variable length arrays unsupported for this target");
2103 /* end of ARM code generator */
2104 /*************************************************************/
2105 #endif
2106 /*************************************************************/