Disable floating-point test for ARM soft-float
[tinycc.git] / arm-gen.c
blob3bb5326c8adc50c023c73a5e397b7bffc630deb6
1 /*
2 * ARMv4 code generator for TCC
4 * Copyright (c) 2003 Daniel Glöckner
5 * Copyright (c) 2012 Thomas Preud'homme
7 * Based on i386-gen.c by Fabrice Bellard
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #ifdef TARGET_DEFS_ONLY
26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
27 #error "Currently TinyCC only supports float computation with VFP instructions"
28 #endif
30 /* number of available registers */
31 #ifdef TCC_ARM_VFP
32 #define NB_REGS 13
33 #else
34 #define NB_REGS 9
35 #endif
37 #ifndef TCC_ARM_VERSION
38 # define TCC_ARM_VERSION 5
39 #endif
41 /* a register can belong to several classes. The classes must be
42 sorted from more general to more precise (see gv2() code which does
43 assumptions on it). */
44 #define RC_INT 0x0001 /* generic integer register */
45 #define RC_FLOAT 0x0002 /* generic float register */
46 #define RC_R0 0x0004
47 #define RC_R1 0x0008
48 #define RC_R2 0x0010
49 #define RC_R3 0x0020
50 #define RC_R12 0x0040
51 #define RC_F0 0x0080
52 #define RC_F1 0x0100
53 #define RC_F2 0x0200
54 #define RC_F3 0x0400
55 #ifdef TCC_ARM_VFP
56 #define RC_F4 0x0800
57 #define RC_F5 0x1000
58 #define RC_F6 0x2000
59 #define RC_F7 0x4000
60 #endif
61 #define RC_IRET RC_R0 /* function return: integer register */
62 #define RC_LRET RC_R1 /* function return: second integer register */
63 #define RC_FRET RC_F0 /* function return: float register */
65 /* pretty names for the registers */
66 enum {
67 TREG_R0 = 0,
68 TREG_R1,
69 TREG_R2,
70 TREG_R3,
71 TREG_R12,
72 TREG_F0,
73 TREG_F1,
74 TREG_F2,
75 TREG_F3,
76 #ifdef TCC_ARM_VFP
77 TREG_F4,
78 TREG_F5,
79 TREG_F6,
80 TREG_F7,
81 #endif
84 #ifdef TCC_ARM_VFP
85 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
86 #endif
88 /* return registers for function */
89 #define REG_IRET TREG_R0 /* single word int return register */
90 #define REG_LRET TREG_R1 /* second word return register (for long long) */
91 #define REG_FRET TREG_F0 /* float return register */
93 #ifdef TCC_ARM_EABI
94 #define TOK___divdi3 TOK___aeabi_ldivmod
95 #define TOK___moddi3 TOK___aeabi_ldivmod
96 #define TOK___udivdi3 TOK___aeabi_uldivmod
97 #define TOK___umoddi3 TOK___aeabi_uldivmod
98 #endif
100 /* defined if function parameters must be evaluated in reverse order */
101 #define INVERT_FUNC_PARAMS
103 /* defined if structures are passed as pointers. Otherwise structures
104 are directly pushed on stack. */
105 /* #define FUNC_STRUCT_PARAM_AS_PTR */
107 /* pointer size, in bytes */
108 #define PTR_SIZE 4
110 /* long double size and alignment, in bytes */
111 #ifdef TCC_ARM_VFP
112 #define LDOUBLE_SIZE 8
113 #endif
115 #ifndef LDOUBLE_SIZE
116 #define LDOUBLE_SIZE 8
117 #endif
119 #ifdef TCC_ARM_EABI
120 #define LDOUBLE_ALIGN 8
121 #else
122 #define LDOUBLE_ALIGN 4
123 #endif
125 /* maximum alignment (for aligned attribute support) */
126 #define MAX_ALIGN 8
128 #define CHAR_IS_UNSIGNED
130 /******************************************************/
131 /* ELF defines */
133 #define EM_TCC_TARGET EM_ARM
135 /* relocation type for 32 bit data relocation */
136 #define R_DATA_32 R_ARM_ABS32
137 #define R_DATA_PTR R_ARM_ABS32
138 #define R_JMP_SLOT R_ARM_JUMP_SLOT
139 #define R_COPY R_ARM_COPY
141 #define ELF_START_ADDR 0x00008000
142 #define ELF_PAGE_SIZE 0x1000
144 enum float_abi {
145 ARM_SOFTFP_FLOAT,
146 ARM_HARD_FLOAT,
149 /******************************************************/
150 #else /* ! TARGET_DEFS_ONLY */
151 /******************************************************/
152 #include "tcc.h"
154 enum float_abi float_abi;
156 ST_DATA const int reg_classes[NB_REGS] = {
157 /* r0 */ RC_INT | RC_R0,
158 /* r1 */ RC_INT | RC_R1,
159 /* r2 */ RC_INT | RC_R2,
160 /* r3 */ RC_INT | RC_R3,
161 /* r12 */ RC_INT | RC_R12,
162 /* f0 */ RC_FLOAT | RC_F0,
163 /* f1 */ RC_FLOAT | RC_F1,
164 /* f2 */ RC_FLOAT | RC_F2,
165 /* f3 */ RC_FLOAT | RC_F3,
166 #ifdef TCC_ARM_VFP
167 /* d4/s8 */ RC_FLOAT | RC_F4,
168 /* d5/s10 */ RC_FLOAT | RC_F5,
169 /* d6/s12 */ RC_FLOAT | RC_F6,
170 /* d7/s14 */ RC_FLOAT | RC_F7,
171 #endif
174 static int func_sub_sp_offset, last_itod_magic;
175 static int leaffunc;
177 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
178 static CType float_type, double_type, func_float_type, func_double_type;
179 ST_FUNC void arm_init(struct TCCState *s)
181 float_type.t = VT_FLOAT;
182 double_type.t = VT_DOUBLE;
183 func_float_type.t = VT_FUNC;
184 func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
185 func_double_type.t = VT_FUNC;
186 func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
188 float_abi = s->float_abi;
189 #ifndef TCC_ARM_HARDFLOAT
190 tcc_warning("soft float ABI currently not supported: default to softfp");
191 #endif
193 #else
194 #define func_float_type func_old_type
195 #define func_double_type func_old_type
196 #define func_ldouble_type func_old_type
197 ST_FUNC void arm_init(struct TCCState *s)
199 #if !defined (TCC_ARM_VFP)
200 tcc_warning("Support for FPA is deprecated and will be removed in next"
201 " release");
202 #endif
203 #if !defined (TCC_ARM_EABI)
204 tcc_warning("Support for OABI is deprecated and will be removed in next"
205 " release");
206 #endif
208 #endif
210 static int two2mask(int a,int b) {
211 return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
214 static int regmask(int r) {
215 return reg_classes[r]&~(RC_INT|RC_FLOAT);
218 /******************************************************/
220 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
221 char *default_elfinterp(struct TCCState *s)
223 if (s->float_abi == ARM_HARD_FLOAT)
224 return "/lib/ld-linux-armhf.so.3";
225 else
226 return "/lib/ld-linux.so.3";
228 #endif
230 void o(uint32_t i)
232 /* this is a good place to start adding big-endian support*/
233 int ind1;
235 ind1 = ind + 4;
236 if (!cur_text_section)
237 tcc_error("compiler error! This happens f.ex. if the compiler\n"
238 "can't evaluate constant expressions outside of a function.");
239 if (ind1 > cur_text_section->data_allocated)
240 section_realloc(cur_text_section, ind1);
241 cur_text_section->data[ind++] = i&255;
242 i>>=8;
243 cur_text_section->data[ind++] = i&255;
244 i>>=8;
245 cur_text_section->data[ind++] = i&255;
246 i>>=8;
247 cur_text_section->data[ind++] = i;
250 static uint32_t stuff_const(uint32_t op, uint32_t c)
252 int try_neg=0;
253 uint32_t nc = 0, negop = 0;
255 switch(op&0x1F00000)
257 case 0x800000: //add
258 case 0x400000: //sub
259 try_neg=1;
260 negop=op^0xC00000;
261 nc=-c;
262 break;
263 case 0x1A00000: //mov
264 case 0x1E00000: //mvn
265 try_neg=1;
266 negop=op^0x400000;
267 nc=~c;
268 break;
269 case 0x200000: //xor
270 if(c==~0)
271 return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
272 break;
273 case 0x0: //and
274 if(c==~0)
275 return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
276 case 0x1C00000: //bic
277 try_neg=1;
278 negop=op^0x1C00000;
279 nc=~c;
280 break;
281 case 0x1800000: //orr
282 if(c==~0)
283 return (op&0xFFF0FFFF)|0x1E00000;
284 break;
286 do {
287 uint32_t m;
288 int i;
289 if(c<256) /* catch undefined <<32 */
290 return op|c;
291 for(i=2;i<32;i+=2) {
292 m=(0xff>>i)|(0xff<<(32-i));
293 if(!(c&~m))
294 return op|(i<<7)|(c<<i)|(c>>(32-i));
296 op=negop;
297 c=nc;
298 } while(try_neg--);
299 return 0;
303 //only add,sub
304 void stuff_const_harder(uint32_t op, uint32_t v) {
305 uint32_t x;
306 x=stuff_const(op,v);
307 if(x)
308 o(x);
309 else {
310 uint32_t a[16], nv, no, o2, n2;
311 int i,j,k;
312 a[0]=0xff;
313 o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
314 for(i=1;i<16;i++)
315 a[i]=(a[i-1]>>2)|(a[i-1]<<30);
316 for(i=0;i<12;i++)
317 for(j=i<4?i+12:15;j>=i+4;j--)
318 if((v&(a[i]|a[j]))==v) {
319 o(stuff_const(op,v&a[i]));
320 o(stuff_const(o2,v&a[j]));
321 return;
323 no=op^0xC00000;
324 n2=o2^0xC00000;
325 nv=-v;
326 for(i=0;i<12;i++)
327 for(j=i<4?i+12:15;j>=i+4;j--)
328 if((nv&(a[i]|a[j]))==nv) {
329 o(stuff_const(no,nv&a[i]));
330 o(stuff_const(n2,nv&a[j]));
331 return;
333 for(i=0;i<8;i++)
334 for(j=i+4;j<12;j++)
335 for(k=i<4?i+12:15;k>=j+4;k--)
336 if((v&(a[i]|a[j]|a[k]))==v) {
337 o(stuff_const(op,v&a[i]));
338 o(stuff_const(o2,v&a[j]));
339 o(stuff_const(o2,v&a[k]));
340 return;
342 no=op^0xC00000;
343 nv=-v;
344 for(i=0;i<8;i++)
345 for(j=i+4;j<12;j++)
346 for(k=i<4?i+12:15;k>=j+4;k--)
347 if((nv&(a[i]|a[j]|a[k]))==nv) {
348 o(stuff_const(no,nv&a[i]));
349 o(stuff_const(n2,nv&a[j]));
350 o(stuff_const(n2,nv&a[k]));
351 return;
353 o(stuff_const(op,v&a[0]));
354 o(stuff_const(o2,v&a[4]));
355 o(stuff_const(o2,v&a[8]));
356 o(stuff_const(o2,v&a[12]));
360 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
362 addr-=pos+8;
363 addr/=4;
364 if(addr>=0x1000000 || addr<-0x1000000) {
365 if(fail)
366 tcc_error("FIXME: function bigger than 32MB");
367 return 0;
369 return 0x0A000000|(addr&0xffffff);
372 int decbranch(int pos)
374 int x;
375 x=*(uint32_t *)(cur_text_section->data + pos);
376 x&=0x00ffffff;
377 if(x&0x800000)
378 x-=0x1000000;
379 return x*4+pos+8;
382 /* output a symbol and patch all calls to it */
383 void gsym_addr(int t, int a)
385 uint32_t *x;
386 int lt;
387 while(t) {
388 x=(uint32_t *)(cur_text_section->data + t);
389 t=decbranch(lt=t);
390 if(a==lt+4)
391 *x=0xE1A00000; // nop
392 else {
393 *x &= 0xff000000;
394 *x |= encbranch(lt,a,1);
399 void gsym(int t)
401 gsym_addr(t, ind);
404 #ifdef TCC_ARM_VFP
405 static uint32_t vfpr(int r)
407 if(r<TREG_F0 || r>TREG_F7)
408 tcc_error("compiler error! register %i is no vfp register",r);
409 return r-5;
411 #else
412 static uint32_t fpr(int r)
414 if(r<TREG_F0 || r>TREG_F3)
415 tcc_error("compiler error! register %i is no fpa register",r);
416 return r-5;
418 #endif
420 static uint32_t intr(int r)
422 if(r==4)
423 return 12;
424 if((r<0 || r>4) && r!=14)
425 tcc_error("compiler error! register %i is no int register",r);
426 return r;
429 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
431 if(*off>maxoff || *off&((1<<shift)-1)) {
432 uint32_t x, y;
433 x=0xE280E000;
434 if(*sgn)
435 x=0xE240E000;
436 x|=(*base)<<16;
437 *base=14; // lr
438 y=stuff_const(x,*off&~maxoff);
439 if(y) {
440 o(y);
441 *off&=maxoff;
442 return;
444 y=stuff_const(x,(*off+maxoff)&~maxoff);
445 if(y) {
446 o(y);
447 *sgn=!*sgn;
448 *off=((*off+maxoff)&~maxoff)-*off;
449 return;
451 stuff_const_harder(x,*off&~maxoff);
452 *off&=maxoff;
456 static uint32_t mapcc(int cc)
458 switch(cc)
460 case TOK_ULT:
461 return 0x30000000; /* CC/LO */
462 case TOK_UGE:
463 return 0x20000000; /* CS/HS */
464 case TOK_EQ:
465 return 0x00000000; /* EQ */
466 case TOK_NE:
467 return 0x10000000; /* NE */
468 case TOK_ULE:
469 return 0x90000000; /* LS */
470 case TOK_UGT:
471 return 0x80000000; /* HI */
472 case TOK_Nset:
473 return 0x40000000; /* MI */
474 case TOK_Nclear:
475 return 0x50000000; /* PL */
476 case TOK_LT:
477 return 0xB0000000; /* LT */
478 case TOK_GE:
479 return 0xA0000000; /* GE */
480 case TOK_LE:
481 return 0xD0000000; /* LE */
482 case TOK_GT:
483 return 0xC0000000; /* GT */
485 tcc_error("unexpected condition code");
486 return 0xE0000000; /* AL */
489 static int negcc(int cc)
491 switch(cc)
493 case TOK_ULT:
494 return TOK_UGE;
495 case TOK_UGE:
496 return TOK_ULT;
497 case TOK_EQ:
498 return TOK_NE;
499 case TOK_NE:
500 return TOK_EQ;
501 case TOK_ULE:
502 return TOK_UGT;
503 case TOK_UGT:
504 return TOK_ULE;
505 case TOK_Nset:
506 return TOK_Nclear;
507 case TOK_Nclear:
508 return TOK_Nset;
509 case TOK_LT:
510 return TOK_GE;
511 case TOK_GE:
512 return TOK_LT;
513 case TOK_LE:
514 return TOK_GT;
515 case TOK_GT:
516 return TOK_LE;
518 tcc_error("unexpected condition code");
519 return TOK_NE;
522 /* load 'r' from value 'sv' */
523 void load(int r, SValue *sv)
525 int v, ft, fc, fr, sign;
526 uint32_t op;
527 SValue v1;
529 fr = sv->r;
530 ft = sv->type.t;
531 fc = sv->c.ul;
533 if(fc>=0)
534 sign=0;
535 else {
536 sign=1;
537 fc=-fc;
540 v = fr & VT_VALMASK;
541 if (fr & VT_LVAL) {
542 uint32_t base = 0xB; // fp
543 if(v == VT_LLOCAL) {
544 v1.type.t = VT_PTR;
545 v1.r = VT_LOCAL | VT_LVAL;
546 v1.c.ul = sv->c.ul;
547 load(base=14 /* lr */, &v1);
548 fc=sign=0;
549 v=VT_LOCAL;
550 } else if(v == VT_CONST) {
551 v1.type.t = VT_PTR;
552 v1.r = fr&~VT_LVAL;
553 v1.c.ul = sv->c.ul;
554 v1.sym=sv->sym;
555 load(base=14, &v1);
556 fc=sign=0;
557 v=VT_LOCAL;
558 } else if(v < VT_CONST) {
559 base=intr(v);
560 fc=sign=0;
561 v=VT_LOCAL;
563 if(v == VT_LOCAL) {
564 if(is_float(ft)) {
565 calcaddr(&base,&fc,&sign,1020,2);
566 #ifdef TCC_ARM_VFP
567 op=0xED100A00; /* flds */
568 if(!sign)
569 op|=0x800000;
570 if ((ft & VT_BTYPE) != VT_FLOAT)
571 op|=0x100; /* flds -> fldd */
572 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
573 #else
574 op=0xED100100;
575 if(!sign)
576 op|=0x800000;
577 #if LDOUBLE_SIZE == 8
578 if ((ft & VT_BTYPE) != VT_FLOAT)
579 op|=0x8000;
580 #else
581 if ((ft & VT_BTYPE) == VT_DOUBLE)
582 op|=0x8000;
583 else if ((ft & VT_BTYPE) == VT_LDOUBLE)
584 op|=0x400000;
585 #endif
586 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
587 #endif
588 } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
589 || (ft & VT_BTYPE) == VT_SHORT) {
590 calcaddr(&base,&fc,&sign,255,0);
591 op=0xE1500090;
592 if ((ft & VT_BTYPE) == VT_SHORT)
593 op|=0x20;
594 if ((ft & VT_UNSIGNED) == 0)
595 op|=0x40;
596 if(!sign)
597 op|=0x800000;
598 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
599 } else {
600 calcaddr(&base,&fc,&sign,4095,0);
601 op=0xE5100000;
602 if(!sign)
603 op|=0x800000;
604 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
605 op|=0x400000;
606 o(op|(intr(r)<<12)|fc|(base<<16));
608 return;
610 } else {
611 if (v == VT_CONST) {
612 op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
613 if (fr & VT_SYM || !op) {
614 o(0xE59F0000|(intr(r)<<12));
615 o(0xEA000000);
616 if(fr & VT_SYM)
617 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
618 o(sv->c.ul);
619 } else
620 o(op);
621 return;
622 } else if (v == VT_LOCAL) {
623 op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
624 if (fr & VT_SYM || !op) {
625 o(0xE59F0000|(intr(r)<<12));
626 o(0xEA000000);
627 if(fr & VT_SYM) // needed ?
628 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
629 o(sv->c.ul);
630 o(0xE08B0000|(intr(r)<<12)|intr(r));
631 } else
632 o(op);
633 return;
634 } else if(v == VT_CMP) {
635 o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
636 o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
637 return;
638 } else if (v == VT_JMP || v == VT_JMPI) {
639 int t;
640 t = v & 1;
641 o(0xE3A00000|(intr(r)<<12)|t);
642 o(0xEA000000);
643 gsym(sv->c.ul);
644 o(0xE3A00000|(intr(r)<<12)|(t^1));
645 return;
646 } else if (v < VT_CONST) {
647 if(is_float(ft))
648 #ifdef TCC_ARM_VFP
649 o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
650 #else
651 o(0xEE008180|(fpr(r)<<12)|fpr(v));
652 #endif
653 else
654 o(0xE1A00000|(intr(r)<<12)|intr(v));
655 return;
658 tcc_error("load unimplemented!");
661 /* store register 'r' in lvalue 'v' */
662 void store(int r, SValue *sv)
664 SValue v1;
665 int v, ft, fc, fr, sign;
666 uint32_t op;
668 fr = sv->r;
669 ft = sv->type.t;
670 fc = sv->c.ul;
672 if(fc>=0)
673 sign=0;
674 else {
675 sign=1;
676 fc=-fc;
679 v = fr & VT_VALMASK;
680 if (fr & VT_LVAL || fr == VT_LOCAL) {
681 uint32_t base = 0xb;
682 if(v < VT_CONST) {
683 base=intr(v);
684 v=VT_LOCAL;
685 fc=sign=0;
686 } else if(v == VT_CONST) {
687 v1.type.t = ft;
688 v1.r = fr&~VT_LVAL;
689 v1.c.ul = sv->c.ul;
690 v1.sym=sv->sym;
691 load(base=14, &v1);
692 fc=sign=0;
693 v=VT_LOCAL;
695 if(v == VT_LOCAL) {
696 if(is_float(ft)) {
697 calcaddr(&base,&fc,&sign,1020,2);
698 #ifdef TCC_ARM_VFP
699 op=0xED000A00; /* fsts */
700 if(!sign)
701 op|=0x800000;
702 if ((ft & VT_BTYPE) != VT_FLOAT)
703 op|=0x100; /* fsts -> fstd */
704 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
705 #else
706 op=0xED000100;
707 if(!sign)
708 op|=0x800000;
709 #if LDOUBLE_SIZE == 8
710 if ((ft & VT_BTYPE) != VT_FLOAT)
711 op|=0x8000;
712 #else
713 if ((ft & VT_BTYPE) == VT_DOUBLE)
714 op|=0x8000;
715 if ((ft & VT_BTYPE) == VT_LDOUBLE)
716 op|=0x400000;
717 #endif
718 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
719 #endif
720 return;
721 } else if((ft & VT_BTYPE) == VT_SHORT) {
722 calcaddr(&base,&fc,&sign,255,0);
723 op=0xE14000B0;
724 if(!sign)
725 op|=0x800000;
726 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
727 } else {
728 calcaddr(&base,&fc,&sign,4095,0);
729 op=0xE5000000;
730 if(!sign)
731 op|=0x800000;
732 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
733 op|=0x400000;
734 o(op|(intr(r)<<12)|fc|(base<<16));
736 return;
739 tcc_error("store unimplemented");
742 static void gadd_sp(int val)
744 stuff_const_harder(0xE28DD000,val);
747 /* 'is_jmp' is '1' if it is a jump */
748 static void gcall_or_jmp(int is_jmp)
750 int r;
751 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
752 uint32_t x;
753 /* constant case */
754 x=encbranch(ind,ind+vtop->c.ul,0);
755 if(x) {
756 if (vtop->r & VT_SYM) {
757 /* relocation case */
758 greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
759 } else
760 put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
761 o(x|(is_jmp?0xE0000000:0xE1000000));
762 } else {
763 if(!is_jmp)
764 o(0xE28FE004); // add lr,pc,#4
765 o(0xE51FF004); // ldr pc,[pc,#-4]
766 if (vtop->r & VT_SYM)
767 greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
768 o(vtop->c.ul);
770 } else {
771 /* otherwise, indirect call */
772 r = gv(RC_INT);
773 if(!is_jmp)
774 o(0xE1A0E00F); // mov lr,pc
775 o(0xE1A0F000|intr(r)); // mov pc,r
779 /* Return whether a structure is an homogeneous float aggregate or not.
780 The answer is true if all the elements of the structure are of the same
781 primitive float type and there is less than 4 elements.
783 type: the type corresponding to the structure to be tested */
784 static int is_hgen_float_aggr(CType *type)
786 if ((type->t & VT_BTYPE) == VT_STRUCT) {
787 struct Sym *ref;
788 int btype, nb_fields = 0;
790 ref = type->ref->next;
791 btype = ref->type.t & VT_BTYPE;
792 if (btype == VT_FLOAT || btype == VT_DOUBLE) {
793 for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
794 return !ref && nb_fields <= 4;
797 return 0;
800 struct avail_regs {
801 signed char avail[3]; /* 3 holes max with only float and double alignments */
802 int first_hole; /* first available hole */
803 int last_hole; /* last available hole (none if equal to first_hole) */
804 int first_free_reg; /* next free register in the sequence, hole excluded */
807 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
809 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
810 param) according to the rules described in the procedure call standard for
811 the ARM architecture (AAPCS). If found, the registers are assigned to this
812 VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
813 and the parameter is a single float.
815 avregs: opaque structure to keep track of available VFP co-processor regs
816 align: alignment contraints for the param, as returned by type_size()
817 size: size of the parameter, as returned by type_size() */
818 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
820 int first_reg = 0;
822 if (avregs->first_free_reg == -1)
823 return -1;
824 if (align >> 3) { /* double alignment */
825 first_reg = avregs->first_free_reg;
826 /* alignment contraint not respected so use next reg and record hole */
827 if (first_reg & 1)
828 avregs->avail[avregs->last_hole++] = first_reg++;
829 } else { /* no special alignment (float or array of float) */
830 /* if single float and a hole is available, assign the param to it */
831 if (size == 4 && avregs->first_hole != avregs->last_hole)
832 return avregs->avail[avregs->first_hole++];
833 else
834 first_reg = avregs->first_free_reg;
836 if (first_reg + size / 4 <= 16) {
837 avregs->first_free_reg = first_reg + size / 4;
838 return first_reg;
840 avregs->first_free_reg = -1;
841 return -1;
844 /* Returns whether all params need to be passed in core registers or not.
845 This is the case for function part of the runtime ABI. */
846 int floats_in_core_regs(SValue *sval)
848 if (!sval->sym)
849 return 0;
851 switch (sval->sym->v) {
852 case TOK___floatundisf:
853 case TOK___floatundidf:
854 case TOK___fixunssfdi:
855 case TOK___fixunsdfdi:
856 #ifndef TCC_ARM_VFP
857 case TOK___fixunsxfdi:
858 #endif
859 case TOK___floatdisf:
860 case TOK___floatdidf:
861 case TOK___fixsfdi:
862 case TOK___fixdfdi:
863 return 1;
865 default:
866 return 0;
870 /* Return the number of registers needed to return the struct, or 0 if
871 returning via struct pointer. */
872 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align) {
873 #ifdef TCC_ARM_EABI
874 int size, align;
875 size = type_size(vt, &align);
876 if (float_abi == ARM_HARD_FLOAT && !variadic &&
877 (is_float(vt->t) || is_hgen_float_aggr(vt))) {
878 *ret_align = 8;
879 ret->ref = NULL;
880 ret->t = VT_DOUBLE;
881 return (size + 7) >> 3;
882 } else if (size <= 4) {
883 *ret_align = 4;
884 ret->ref = NULL;
885 ret->t = VT_INT;
886 return 1;
887 } else
888 return 0;
889 #else
890 return 0;
891 #endif
894 /* Parameters are classified according to how they are copied to their final
895 destination for the function call. Because the copying is performed class
896 after class according to the order in the union below, it is important that
897 some constraints about the order of the members of this union are respected:
898 - CORE_STRUCT_CLASS must come after STACK_CLASS;
899 - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
900 VFP_STRUCT_CLASS;
901 - VFP_STRUCT_CLASS must come after VFP_CLASS.
902 See the comment for the main loop in copy_params() for the reason. */
903 enum reg_class {
904 STACK_CLASS = 0,
905 CORE_STRUCT_CLASS,
906 VFP_CLASS,
907 VFP_STRUCT_CLASS,
908 CORE_CLASS,
909 NB_CLASSES
912 struct param_plan {
913 int start; /* first reg or addr used depending on the class */
914 int end; /* last reg used or next free addr depending on the class */
915 SValue *sval; /* pointer to SValue on the value stack */
916 struct param_plan *prev; /* previous element in this class */
919 struct plan {
920 struct param_plan *pplans; /* array of all the param plans */
921 struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
924 #define add_param_plan(plan,pplan,class) \
925 do { \
926 pplan.prev = plan->clsplans[class]; \
927 plan->pplans[plan ## _nb] = pplan; \
928 plan->clsplans[class] = &plan->pplans[plan ## _nb++]; \
929 } while(0)
931 /* Assign parameters to registers and stack with alignment according to the
932 rules in the procedure call standard for the ARM architecture (AAPCS).
933 The overall assignment is recorded in an array of per parameter structures
934 called parameter plans. The parameter plans are also further organized in a
935 number of linked lists, one per class of parameter (see the comment for the
936 definition of union reg_class).
938 nb_args: number of parameters of the function for which a call is generated
939 float_abi: float ABI in use for this function call
940 plan: the structure where the overall assignment is recorded
941 todo: a bitmap that record which core registers hold a parameter
943 Returns the amount of stack space needed for parameter passing
945 Note: this function allocated an array in plan->pplans with tcc_malloc. It
946 is the responsibility of the caller to free this array once used (ie not
947 before copy_params). */
948 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
950 int i, size, align;
951 int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
952 int plan_nb = 0;
953 struct param_plan pplan;
954 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
956 ncrn = nsaa = 0;
957 *todo = 0;
958 plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
959 memset(plan->clsplans, 0, sizeof(plan->clsplans));
960 for(i = nb_args; i-- ;) {
961 int j, start_vfpreg = 0;
962 CType type = vtop[-i].type;
963 type.t &= ~VT_ARRAY;
964 size = type_size(&type, &align);
965 size = (size + 3) & ~3;
966 align = (align + 3) & ~3;
967 switch(vtop[-i].type.t & VT_BTYPE) {
968 case VT_STRUCT:
969 case VT_FLOAT:
970 case VT_DOUBLE:
971 case VT_LDOUBLE:
972 if (float_abi == ARM_HARD_FLOAT) {
973 int is_hfa = 0; /* Homogeneous float aggregate */
975 if (is_float(vtop[-i].type.t)
976 || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
977 int end_vfpreg;
979 start_vfpreg = assign_vfpreg(&avregs, align, size);
980 end_vfpreg = start_vfpreg + ((size - 1) >> 2);
981 if (start_vfpreg >= 0) {
982 pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
983 if (is_hfa)
984 add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
985 else
986 add_param_plan(plan, pplan, VFP_CLASS);
987 continue;
988 } else
989 break;
992 ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
993 if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
994 /* The parameter is allocated both in core register and on stack. As
995 * such, it can be of either class: it would either be the last of
996 * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
997 for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
998 *todo|=(1<<j);
999 pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1000 add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1001 ncrn += size/4;
1002 if (ncrn > 4)
1003 nsaa = (ncrn - 4) * 4;
1004 } else {
1005 ncrn = 4;
1006 break;
1008 continue;
1009 default:
1010 if (ncrn < 4) {
1011 int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1013 if (is_long) {
1014 ncrn = (ncrn + 1) & -2;
1015 if (ncrn == 4)
1016 break;
1018 pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1019 ncrn++;
1020 if (is_long)
1021 pplan.end = ncrn++;
1022 add_param_plan(plan, pplan, CORE_CLASS);
1023 continue;
1026 nsaa = (nsaa + (align - 1)) & ~(align - 1);
1027 pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1028 add_param_plan(plan, pplan, STACK_CLASS);
1029 nsaa += size; /* size already rounded up before */
1031 return nsaa;
1034 #undef add_param_plan
1036 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1037 function call.
1039 nb_args: number of parameters the function take
1040 plan: the overall assignment plan for parameters
1041 todo: a bitmap indicating what core reg will hold a parameter
1043 Returns the number of SValue added by this function on the value stack */
1044 static int copy_params(int nb_args, struct plan *plan, int todo)
1046 int size, align, r, i, nb_extra_sval = 0;
1047 struct param_plan *pplan;
1049 /* Several constraints require parameters to be copied in a specific order:
1050 - structures are copied to the stack before being loaded in a reg;
1051 - floats loaded to an odd numbered VFP reg are first copied to the
1052 preceding even numbered VFP reg and then moved to the next VFP reg.
1054 It is thus important that:
1055 - structures assigned to core regs must be copied after parameters
1056 assigned to the stack but before structures assigned to VFP regs because
1057 a structure can lie partly in core registers and partly on the stack;
1058 - parameters assigned to the stack and all structures be copied before
1059 parameters assigned to a core reg since copying a parameter to the stack
1060 require using a core reg;
1061 - parameters assigned to VFP regs be copied before structures assigned to
1062 VFP regs as the copy might use an even numbered VFP reg that already
1063 holds part of a structure. */
1064 for(i = 0; i < NB_CLASSES; i++) {
1065 for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1066 vpushv(pplan->sval);
1067 pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1068 switch(i) {
1069 case STACK_CLASS:
1070 case CORE_STRUCT_CLASS:
1071 case VFP_STRUCT_CLASS:
1072 if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1073 int padding = 0;
1074 size = type_size(&pplan->sval->type, &align);
1075 /* align to stack align size */
1076 size = (size + 3) & ~3;
1077 if (i == STACK_CLASS && pplan->prev)
1078 padding = pplan->start - pplan->prev->end;
1079 size += padding; /* Add padding if any */
1080 /* allocate the necessary size on stack */
1081 gadd_sp(-size);
1082 /* generate structure store */
1083 r = get_reg(RC_INT);
1084 o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1085 vset(&vtop->type, r | VT_LVAL, 0);
1086 vswap();
1087 vstore(); /* memcpy to current sp + potential padding */
1089 /* Homogeneous float aggregate are loaded to VFP registers
1090 immediately since there is no way of loading data in multiple
1091 non consecutive VFP registers as what is done for other
1092 structures (see the use of todo). */
1093 if (i == VFP_STRUCT_CLASS) {
1094 int first = pplan->start, nb = pplan->end - first + 1;
1095 /* vpop.32 {pplan->start, ..., pplan->end} */
1096 o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1097 /* No need to write the register used to a SValue since VFP regs
1098 cannot be used for gcall_or_jmp */
1100 } else {
1101 if (is_float(pplan->sval->type.t)) {
1102 #ifdef TCC_ARM_VFP
1103 r = vfpr(gv(RC_FLOAT)) << 12;
1104 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1105 size = 4;
1106 else {
1107 size = 8;
1108 r |= 0x101; /* vpush.32 -> vpush.64 */
1110 o(0xED2D0A01 + r); /* vpush */
1111 #else
1112 r = fpr(gv(RC_FLOAT)) << 12;
1113 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1114 size = 4;
1115 else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1116 size = 8;
1117 else
1118 size = LDOUBLE_SIZE;
1120 if (size == 12)
1121 r |= 0x400000;
1122 else if(size == 8)
1123 r|=0x8000;
1125 o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1126 #endif
1127 } else {
1128 /* simple type (currently always same size) */
1129 /* XXX: implicit cast ? */
1130 size=4;
1131 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1132 lexpand_nr();
1133 size = 8;
1134 r = gv(RC_INT);
1135 o(0xE52D0004|(intr(r)<<12)); /* push r */
1136 vtop--;
1138 r = gv(RC_INT);
1139 o(0xE52D0004|(intr(r)<<12)); /* push r */
1141 if (i == STACK_CLASS && pplan->prev)
1142 gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1144 break;
1146 case VFP_CLASS:
1147 gv(regmask(TREG_F0 + (pplan->start >> 1)));
1148 if (pplan->start & 1) { /* Must be in upper part of double register */
1149 o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1150 vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1152 break;
1154 case CORE_CLASS:
1155 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1156 lexpand_nr();
1157 gv(regmask(pplan->end));
1158 pplan->sval->r2 = vtop->r;
1159 vtop--;
1161 gv(regmask(pplan->start));
1162 /* Mark register as used so that gcall_or_jmp use another one
1163 (regs >=4 are free as never used to pass parameters) */
1164 pplan->sval->r = vtop->r;
1165 break;
1167 vtop--;
1171 /* Manually free remaining registers since next parameters are loaded
1172 * manually, without the help of gv(int). */
1173 save_regs(nb_args);
1175 if(todo) {
1176 o(0xE8BD0000|todo); /* pop {todo} */
1177 for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1178 int r;
1179 pplan->sval->r = pplan->start;
1180 /* An SValue can only pin 2 registers at best (r and r2) but a structure
1181 can occupy more than 2 registers. Thus, we need to push on the value
1182 stack some fake parameter to have on SValue for each registers used
1183 by a structure (r2 is not used). */
1184 for (r = pplan->start + 1; r <= pplan->end; r++) {
1185 if (todo & (1 << r)) {
1186 nb_extra_sval++;
1187 vpushi(0);
1188 vtop->r = r;
1193 return nb_extra_sval;
1196 /* Generate function call. The function address is pushed first, then
1197 all the parameters in call order. This functions pops all the
1198 parameters and the function address. */
1199 void gfunc_call(int nb_args)
1201 int r, args_size;
1202 int def_float_abi = float_abi;
1203 int todo;
1204 struct plan plan;
1206 #ifdef TCC_ARM_EABI
1207 int variadic;
1209 if (float_abi == ARM_HARD_FLOAT) {
1210 variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1211 if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1212 float_abi = ARM_SOFTFP_FLOAT;
1214 #endif
1215 /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1216 VT_JMP anywhere except on the top of the stack because it would complicate
1217 the code generator. */
1218 r = vtop->r & VT_VALMASK;
1219 if (r == VT_CMP || (r & ~1) == VT_JMP)
1220 gv(RC_INT);
1222 args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1224 #ifdef TCC_ARM_EABI
1225 if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1226 args_size = (args_size + 7) & ~7;
1227 o(0xE24DD004); /* sub sp, sp, #4 */
1229 #endif
1231 nb_args += copy_params(nb_args, &plan, todo);
1232 tcc_free(plan.pplans);
1234 /* Move fct SValue on top as required by gcall_or_jmp */
1235 vrotb(nb_args + 1);
1236 gcall_or_jmp(0);
1237 if (args_size)
1238 gadd_sp(args_size); /* pop all parameters passed on the stack */
1239 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1240 if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1241 if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1242 o(0xEE000A10); /*vmov s0, r0 */
1243 } else {
1244 o(0xEE000B10); /* vmov.32 d0[0], r0 */
1245 o(0xEE201B10); /* vmov.32 d0[1], r1 */
1248 #endif
1249 vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1250 leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1251 float_abi = def_float_abi;
1254 /* generate function prolog of type 't' */
1255 void gfunc_prolog(CType *func_type)
1257 Sym *sym,*sym2;
1258 int n, nf, size, align, struct_ret = 0;
1259 int addr, pn, sn; /* pn=core, sn=stack */
1260 CType ret_type;
1262 #ifdef TCC_ARM_EABI
1263 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1264 #endif
1266 sym = func_type->ref;
1267 func_vt = sym->type;
1268 func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1270 n = nf = 0;
1271 if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1272 !gfunc_sret(&func_vt, func_var, &ret_type, &align))
1274 n++;
1275 struct_ret = 1;
1276 func_vc = 12; /* Offset from fp of the place to store the result */
1278 for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1279 size = type_size(&sym2->type, &align);
1280 #ifdef TCC_ARM_EABI
1281 if (float_abi == ARM_HARD_FLOAT && !func_var &&
1282 (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1283 int tmpnf = assign_vfpreg(&avregs, align, size);
1284 tmpnf += (size + 3) / 4;
1285 nf = (tmpnf > nf) ? tmpnf : nf;
1286 } else
1287 #endif
1288 if (n < 4)
1289 n += (size + 3) / 4;
1291 o(0xE1A0C00D); /* mov ip,sp */
1292 if (func_var)
1293 n=4;
1294 if (n) {
1295 if(n>4)
1296 n=4;
1297 #ifdef TCC_ARM_EABI
1298 n=(n+1)&-2;
1299 #endif
1300 o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1302 if (nf) {
1303 if (nf>16)
1304 nf=16;
1305 nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1306 o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1308 o(0xE92D5800); /* save fp, ip, lr */
1309 o(0xE1A0B00D); /* mov fp, sp */
1310 func_sub_sp_offset = ind;
1311 o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1313 #ifdef TCC_ARM_EABI
1314 if (float_abi == ARM_HARD_FLOAT) {
1315 func_vc += nf * 4;
1316 avregs = AVAIL_REGS_INITIALIZER;
1318 #endif
1319 pn = struct_ret, sn = 0;
1320 while ((sym = sym->next)) {
1321 CType *type;
1322 type = &sym->type;
1323 size = type_size(type, &align);
1324 size = (size + 3) >> 2;
1325 align = (align + 3) & ~3;
1326 #ifdef TCC_ARM_EABI
1327 if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1328 || is_hgen_float_aggr(&sym->type))) {
1329 int fpn = assign_vfpreg(&avregs, align, size << 2);
1330 if (fpn >= 0)
1331 addr = fpn * 4;
1332 else
1333 goto from_stack;
1334 } else
1335 #endif
1336 if (pn < 4) {
1337 #ifdef TCC_ARM_EABI
1338 pn = (pn + (align-1)/4) & -(align/4);
1339 #endif
1340 addr = (nf + pn) * 4;
1341 pn += size;
1342 if (!sn && pn > 4)
1343 sn = (pn - 4);
1344 } else {
1345 #ifdef TCC_ARM_EABI
1346 from_stack:
1347 sn = (sn + (align-1)/4) & -(align/4);
1348 #endif
1349 addr = (n + nf + sn) * 4;
1350 sn += size;
1352 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1353 addr + 12);
1355 last_itod_magic=0;
1356 leaffunc = 1;
1357 loc = 0;
1360 /* generate function epilog */
1361 void gfunc_epilog(void)
1363 uint32_t x;
1364 int diff;
1365 /* Copy float return value to core register if base standard is used and
1366 float computation is made with VFP */
1367 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1368 if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1369 if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1370 o(0xEE100A10); /* fmrs r0, s0 */
1371 else {
1372 o(0xEE100B10); /* fmrdl r0, d0 */
1373 o(0xEE301B10); /* fmrdh r1, d0 */
1376 #endif
1377 o(0xE89BA800); /* restore fp, sp, pc */
1378 diff = (-loc + 3) & -4;
1379 #ifdef TCC_ARM_EABI
1380 if(!leaffunc)
1381 diff = ((diff + 11) & -8) - 4;
1382 #endif
1383 if(diff > 0) {
1384 x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1385 if(x)
1386 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1387 else {
1388 int addr;
1389 addr=ind;
1390 o(0xE59FC004); /* ldr ip,[pc+4] */
1391 o(0xE04BD00C); /* sub sp,fp,ip */
1392 o(0xE1A0F00E); /* mov pc,lr */
1393 o(diff);
1394 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1399 /* generate a jump to a label */
1400 int gjmp(int t)
1402 int r;
1403 r=ind;
1404 o(0xE0000000|encbranch(r,t,1));
1405 return r;
1408 /* generate a jump to a fixed address */
1409 void gjmp_addr(int a)
1411 gjmp(a);
1414 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1415 int gtst(int inv, int t)
1417 int v, r;
1418 uint32_t op;
1419 v = vtop->r & VT_VALMASK;
1420 r=ind;
1421 if (v == VT_CMP) {
1422 op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1423 op|=encbranch(r,t,1);
1424 o(op);
1425 t=r;
1426 } else if (v == VT_JMP || v == VT_JMPI) {
1427 if ((v & 1) == inv) {
1428 if(!vtop->c.i)
1429 vtop->c.i=t;
1430 else {
1431 uint32_t *x;
1432 int p,lp;
1433 if(t) {
1434 p = vtop->c.i;
1435 do {
1436 p = decbranch(lp=p);
1437 } while(p);
1438 x = (uint32_t *)(cur_text_section->data + lp);
1439 *x &= 0xff000000;
1440 *x |= encbranch(lp,t,1);
1442 t = vtop->c.i;
1444 } else {
1445 t = gjmp(t);
1446 gsym(vtop->c.i);
1449 vtop--;
1450 return t;
1453 /* generate an integer binary operation */
1454 void gen_opi(int op)
1456 int c, func = 0;
1457 uint32_t opc = 0, r, fr;
1458 unsigned short retreg = REG_IRET;
1460 c=0;
1461 switch(op) {
1462 case '+':
1463 opc = 0x8;
1464 c=1;
1465 break;
1466 case TOK_ADDC1: /* add with carry generation */
1467 opc = 0x9;
1468 c=1;
1469 break;
1470 case '-':
1471 opc = 0x4;
1472 c=1;
1473 break;
1474 case TOK_SUBC1: /* sub with carry generation */
1475 opc = 0x5;
1476 c=1;
1477 break;
1478 case TOK_ADDC2: /* add with carry use */
1479 opc = 0xA;
1480 c=1;
1481 break;
1482 case TOK_SUBC2: /* sub with carry use */
1483 opc = 0xC;
1484 c=1;
1485 break;
1486 case '&':
1487 opc = 0x0;
1488 c=1;
1489 break;
1490 case '^':
1491 opc = 0x2;
1492 c=1;
1493 break;
1494 case '|':
1495 opc = 0x18;
1496 c=1;
1497 break;
1498 case '*':
1499 gv2(RC_INT, RC_INT);
1500 r = vtop[-1].r;
1501 fr = vtop[0].r;
1502 vtop--;
1503 o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1504 return;
1505 case TOK_SHL:
1506 opc = 0;
1507 c=2;
1508 break;
1509 case TOK_SHR:
1510 opc = 1;
1511 c=2;
1512 break;
1513 case TOK_SAR:
1514 opc = 2;
1515 c=2;
1516 break;
1517 case '/':
1518 case TOK_PDIV:
1519 func=TOK___divsi3;
1520 c=3;
1521 break;
1522 case TOK_UDIV:
1523 func=TOK___udivsi3;
1524 c=3;
1525 break;
1526 case '%':
1527 #ifdef TCC_ARM_EABI
1528 func=TOK___aeabi_idivmod;
1529 retreg=REG_LRET;
1530 #else
1531 func=TOK___modsi3;
1532 #endif
1533 c=3;
1534 break;
1535 case TOK_UMOD:
1536 #ifdef TCC_ARM_EABI
1537 func=TOK___aeabi_uidivmod;
1538 retreg=REG_LRET;
1539 #else
1540 func=TOK___umodsi3;
1541 #endif
1542 c=3;
1543 break;
1544 case TOK_UMULL:
1545 gv2(RC_INT, RC_INT);
1546 r=intr(vtop[-1].r2=get_reg(RC_INT));
1547 c=vtop[-1].r;
1548 vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1549 vtop--;
1550 o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1551 return;
1552 default:
1553 opc = 0x15;
1554 c=1;
1555 break;
1557 switch(c) {
1558 case 1:
1559 if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1560 if(opc == 4 || opc == 5 || opc == 0xc) {
1561 vswap();
1562 opc|=2; // sub -> rsb
1565 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1566 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1567 gv(RC_INT);
1568 vswap();
1569 c=intr(gv(RC_INT));
1570 vswap();
1571 opc=0xE0000000|(opc<<20)|(c<<16);
1572 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1573 uint32_t x;
1574 x=stuff_const(opc|0x2000000,vtop->c.i);
1575 if(x) {
1576 r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1577 o(x|(r<<12));
1578 goto done;
1581 fr=intr(gv(RC_INT));
1582 r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1583 o(opc|(r<<12)|fr);
1584 done:
1585 vtop--;
1586 if (op >= TOK_ULT && op <= TOK_GT) {
1587 vtop->r = VT_CMP;
1588 vtop->c.i = op;
1590 break;
1591 case 2:
1592 opc=0xE1A00000|(opc<<5);
1593 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1594 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1595 gv(RC_INT);
1596 vswap();
1597 r=intr(gv(RC_INT));
1598 vswap();
1599 opc|=r;
1600 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1601 fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1602 c = vtop->c.i & 0x1f;
1603 o(opc|(c<<7)|(fr<<12));
1604 } else {
1605 fr=intr(gv(RC_INT));
1606 c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1607 o(opc|(c<<12)|(fr<<8)|0x10);
1609 vtop--;
1610 break;
1611 case 3:
1612 vpush_global_sym(&func_old_type, func);
1613 vrott(3);
1614 gfunc_call(2);
1615 vpushi(0);
1616 vtop->r = retreg;
1617 break;
1618 default:
1619 tcc_error("gen_opi %i unimplemented!",op);
1623 #ifdef TCC_ARM_VFP
1624 static int is_zero(int i)
1626 if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1627 return 0;
1628 if (vtop[i].type.t == VT_FLOAT)
1629 return (vtop[i].c.f == 0.f);
1630 else if (vtop[i].type.t == VT_DOUBLE)
1631 return (vtop[i].c.d == 0.0);
1632 return (vtop[i].c.ld == 0.l);
1635 /* generate a floating point operation 'v = t1 op t2' instruction. The
1636 * two operands are guaranted to have the same floating point type */
1637 void gen_opf(int op)
1639 uint32_t x;
1640 int fneg=0,r;
1641 x=0xEE000A00|T2CPR(vtop->type.t);
1642 switch(op) {
1643 case '+':
1644 if(is_zero(-1))
1645 vswap();
1646 if(is_zero(0)) {
1647 vtop--;
1648 return;
1650 x|=0x300000;
1651 break;
1652 case '-':
1653 x|=0x300040;
1654 if(is_zero(0)) {
1655 vtop--;
1656 return;
1658 if(is_zero(-1)) {
1659 x|=0x810000; /* fsubX -> fnegX */
1660 vswap();
1661 vtop--;
1662 fneg=1;
1664 break;
1665 case '*':
1666 x|=0x200000;
1667 break;
1668 case '/':
1669 x|=0x800000;
1670 break;
1671 default:
1672 if(op < TOK_ULT || op > TOK_GT) {
1673 tcc_error("unknown fp op %x!",op);
1674 return;
1676 if(is_zero(-1)) {
1677 vswap();
1678 switch(op) {
1679 case TOK_LT: op=TOK_GT; break;
1680 case TOK_GE: op=TOK_ULE; break;
1681 case TOK_LE: op=TOK_GE; break;
1682 case TOK_GT: op=TOK_ULT; break;
1685 x|=0xB40040; /* fcmpX */
1686 if(op!=TOK_EQ && op!=TOK_NE)
1687 x|=0x80; /* fcmpX -> fcmpeX */
1688 if(is_zero(0)) {
1689 vtop--;
1690 o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1691 } else {
1692 x|=vfpr(gv(RC_FLOAT));
1693 vswap();
1694 o(x|(vfpr(gv(RC_FLOAT))<<12));
1695 vtop--;
1697 o(0xEEF1FA10); /* fmstat */
1699 switch(op) {
1700 case TOK_LE: op=TOK_ULE; break;
1701 case TOK_LT: op=TOK_ULT; break;
1702 case TOK_UGE: op=TOK_GE; break;
1703 case TOK_UGT: op=TOK_GT; break;
1706 vtop->r = VT_CMP;
1707 vtop->c.i = op;
1708 return;
1710 r=gv(RC_FLOAT);
1711 x|=vfpr(r);
1712 r=regmask(r);
1713 if(!fneg) {
1714 int r2;
1715 vswap();
1716 r2=gv(RC_FLOAT);
1717 x|=vfpr(r2)<<16;
1718 r|=regmask(r2);
1720 vtop->r=get_reg_ex(RC_FLOAT,r);
1721 if(!fneg)
1722 vtop--;
1723 o(x|(vfpr(vtop->r)<<12));
1726 #else
1727 static uint32_t is_fconst()
1729 long double f;
1730 uint32_t r;
1731 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1732 return 0;
1733 if (vtop->type.t == VT_FLOAT)
1734 f = vtop->c.f;
1735 else if (vtop->type.t == VT_DOUBLE)
1736 f = vtop->c.d;
1737 else
1738 f = vtop->c.ld;
1739 if(!ieee_finite(f))
1740 return 0;
1741 r=0x8;
1742 if(f<0.0) {
1743 r=0x18;
1744 f=-f;
1746 if(f==0.0)
1747 return r;
1748 if(f==1.0)
1749 return r|1;
1750 if(f==2.0)
1751 return r|2;
1752 if(f==3.0)
1753 return r|3;
1754 if(f==4.0)
1755 return r|4;
1756 if(f==5.0)
1757 return r|5;
1758 if(f==0.5)
1759 return r|6;
1760 if(f==10.0)
1761 return r|7;
1762 return 0;
1765 /* generate a floating point operation 'v = t1 op t2' instruction. The
1766 two operands are guaranted to have the same floating point type */
1767 void gen_opf(int op)
1769 uint32_t x, r, r2, c1, c2;
1770 //fputs("gen_opf\n",stderr);
1771 vswap();
1772 c1 = is_fconst();
1773 vswap();
1774 c2 = is_fconst();
1775 x=0xEE000100;
1776 #if LDOUBLE_SIZE == 8
1777 if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1778 x|=0x80;
1779 #else
1780 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1781 x|=0x80;
1782 else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1783 x|=0x80000;
1784 #endif
1785 switch(op)
1787 case '+':
1788 if(!c2) {
1789 vswap();
1790 c2=c1;
1792 vswap();
1793 r=fpr(gv(RC_FLOAT));
1794 vswap();
1795 if(c2) {
1796 if(c2>0xf)
1797 x|=0x200000; // suf
1798 r2=c2&0xf;
1799 } else {
1800 r2=fpr(gv(RC_FLOAT));
1802 break;
1803 case '-':
1804 if(c2) {
1805 if(c2<=0xf)
1806 x|=0x200000; // suf
1807 r2=c2&0xf;
1808 vswap();
1809 r=fpr(gv(RC_FLOAT));
1810 vswap();
1811 } else if(c1 && c1<=0xf) {
1812 x|=0x300000; // rsf
1813 r2=c1;
1814 r=fpr(gv(RC_FLOAT));
1815 vswap();
1816 } else {
1817 x|=0x200000; // suf
1818 vswap();
1819 r=fpr(gv(RC_FLOAT));
1820 vswap();
1821 r2=fpr(gv(RC_FLOAT));
1823 break;
1824 case '*':
1825 if(!c2 || c2>0xf) {
1826 vswap();
1827 c2=c1;
1829 vswap();
1830 r=fpr(gv(RC_FLOAT));
1831 vswap();
1832 if(c2 && c2<=0xf)
1833 r2=c2;
1834 else
1835 r2=fpr(gv(RC_FLOAT));
1836 x|=0x100000; // muf
1837 break;
1838 case '/':
1839 if(c2 && c2<=0xf) {
1840 x|=0x400000; // dvf
1841 r2=c2;
1842 vswap();
1843 r=fpr(gv(RC_FLOAT));
1844 vswap();
1845 } else if(c1 && c1<=0xf) {
1846 x|=0x500000; // rdf
1847 r2=c1;
1848 r=fpr(gv(RC_FLOAT));
1849 vswap();
1850 } else {
1851 x|=0x400000; // dvf
1852 vswap();
1853 r=fpr(gv(RC_FLOAT));
1854 vswap();
1855 r2=fpr(gv(RC_FLOAT));
1857 break;
1858 default:
1859 if(op >= TOK_ULT && op <= TOK_GT) {
1860 x|=0xd0f110; // cmfe
1861 /* bug (intention?) in Linux FPU emulator
1862 doesn't set carry if equal */
1863 switch(op) {
1864 case TOK_ULT:
1865 case TOK_UGE:
1866 case TOK_ULE:
1867 case TOK_UGT:
1868 tcc_error("unsigned comparison on floats?");
1869 break;
1870 case TOK_LT:
1871 op=TOK_Nset;
1872 break;
1873 case TOK_LE:
1874 op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1875 break;
1876 case TOK_EQ:
1877 case TOK_NE:
1878 x&=~0x400000; // cmfe -> cmf
1879 break;
1881 if(c1 && !c2) {
1882 c2=c1;
1883 vswap();
1884 switch(op) {
1885 case TOK_Nset:
1886 op=TOK_GT;
1887 break;
1888 case TOK_GE:
1889 op=TOK_ULE;
1890 break;
1891 case TOK_ULE:
1892 op=TOK_GE;
1893 break;
1894 case TOK_GT:
1895 op=TOK_Nset;
1896 break;
1899 vswap();
1900 r=fpr(gv(RC_FLOAT));
1901 vswap();
1902 if(c2) {
1903 if(c2>0xf)
1904 x|=0x200000;
1905 r2=c2&0xf;
1906 } else {
1907 r2=fpr(gv(RC_FLOAT));
1909 vtop[-1].r = VT_CMP;
1910 vtop[-1].c.i = op;
1911 } else {
1912 tcc_error("unknown fp op %x!",op);
1913 return;
1916 if(vtop[-1].r == VT_CMP)
1917 c1=15;
1918 else {
1919 c1=vtop->r;
1920 if(r2&0x8)
1921 c1=vtop[-1].r;
1922 vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1923 c1=fpr(vtop[-1].r);
1925 vtop--;
1926 o(x|(r<<16)|(c1<<12)|r2);
1928 #endif
1930 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1931 and 'long long' cases. */
1932 ST_FUNC void gen_cvt_itof1(int t)
1934 uint32_t r, r2;
1935 int bt;
1936 bt=vtop->type.t & VT_BTYPE;
1937 if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1938 #ifndef TCC_ARM_VFP
1939 uint32_t dsize = 0;
1940 #endif
1941 r=intr(gv(RC_INT));
1942 #ifdef TCC_ARM_VFP
1943 r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1944 o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1945 r2|=r2<<12;
1946 if(!(vtop->type.t & VT_UNSIGNED))
1947 r2|=0x80; /* fuitoX -> fsituX */
1948 o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1949 #else
1950 r2=fpr(vtop->r=get_reg(RC_FLOAT));
1951 if((t & VT_BTYPE) != VT_FLOAT)
1952 dsize=0x80; /* flts -> fltd */
1953 o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1954 if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1955 uint32_t off = 0;
1956 o(0xE3500000|(r<<12)); /* cmp */
1957 r=fpr(get_reg(RC_FLOAT));
1958 if(last_itod_magic) {
1959 off=ind+8-last_itod_magic;
1960 off/=4;
1961 if(off>255)
1962 off=0;
1964 o(0xBD1F0100|(r<<12)|off); /* ldflts */
1965 if(!off) {
1966 o(0xEA000000); /* b */
1967 last_itod_magic=ind;
1968 o(0x4F800000); /* 4294967296.0f */
1970 o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1972 #endif
1973 return;
1974 } else if(bt == VT_LLONG) {
1975 int func;
1976 CType *func_type = 0;
1977 if((t & VT_BTYPE) == VT_FLOAT) {
1978 func_type = &func_float_type;
1979 if(vtop->type.t & VT_UNSIGNED)
1980 func=TOK___floatundisf;
1981 else
1982 func=TOK___floatdisf;
1983 #if LDOUBLE_SIZE != 8
1984 } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1985 func_type = &func_ldouble_type;
1986 if(vtop->type.t & VT_UNSIGNED)
1987 func=TOK___floatundixf;
1988 else
1989 func=TOK___floatdixf;
1990 } else if((t & VT_BTYPE) == VT_DOUBLE) {
1991 #else
1992 } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1993 #endif
1994 func_type = &func_double_type;
1995 if(vtop->type.t & VT_UNSIGNED)
1996 func=TOK___floatundidf;
1997 else
1998 func=TOK___floatdidf;
2000 if(func_type) {
2001 vpush_global_sym(func_type, func);
2002 vswap();
2003 gfunc_call(1);
2004 vpushi(0);
2005 vtop->r=TREG_F0;
2006 return;
2009 tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2012 /* convert fp to int 't' type */
2013 void gen_cvt_ftoi(int t)
2015 uint32_t r, r2;
2016 int u, func = 0;
2017 u=t&VT_UNSIGNED;
2018 t&=VT_BTYPE;
2019 r2=vtop->type.t & VT_BTYPE;
2020 if(t==VT_INT) {
2021 #ifdef TCC_ARM_VFP
2022 r=vfpr(gv(RC_FLOAT));
2023 u=u?0:0x10000;
2024 o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2025 r2=intr(vtop->r=get_reg(RC_INT));
2026 o(0xEE100A10|(r<<16)|(r2<<12));
2027 return;
2028 #else
2029 if(u) {
2030 if(r2 == VT_FLOAT)
2031 func=TOK___fixunssfsi;
2032 #if LDOUBLE_SIZE != 8
2033 else if(r2 == VT_LDOUBLE)
2034 func=TOK___fixunsxfsi;
2035 else if(r2 == VT_DOUBLE)
2036 #else
2037 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2038 #endif
2039 func=TOK___fixunsdfsi;
2040 } else {
2041 r=fpr(gv(RC_FLOAT));
2042 r2=intr(vtop->r=get_reg(RC_INT));
2043 o(0xEE100170|(r2<<12)|r);
2044 return;
2046 #endif
2047 } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2048 if(r2 == VT_FLOAT)
2049 func=TOK___fixsfdi;
2050 #if LDOUBLE_SIZE != 8
2051 else if(r2 == VT_LDOUBLE)
2052 func=TOK___fixxfdi;
2053 else if(r2 == VT_DOUBLE)
2054 #else
2055 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2056 #endif
2057 func=TOK___fixdfdi;
2059 if(func) {
2060 vpush_global_sym(&func_old_type, func);
2061 vswap();
2062 gfunc_call(1);
2063 vpushi(0);
2064 if(t == VT_LLONG)
2065 vtop->r2 = REG_LRET;
2066 vtop->r = REG_IRET;
2067 return;
2069 tcc_error("unimplemented gen_cvt_ftoi!");
2072 /* convert from one floating point type to another */
2073 void gen_cvt_ftof(int t)
2075 #ifdef TCC_ARM_VFP
2076 if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2077 uint32_t r = vfpr(gv(RC_FLOAT));
2078 o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2080 #else
2081 /* all we have to do on i386 and FPA ARM is to put the float in a register */
2082 gv(RC_FLOAT);
2083 #endif
2086 /* computed goto support */
2087 void ggoto(void)
2089 gcall_or_jmp(1);
2090 vtop--;
2093 /* Save the stack pointer onto the stack and return the location of its address */
2094 ST_FUNC void gen_vla_sp_save(int addr) {
2095 tcc_error("variable length arrays unsupported for this target");
2098 /* Restore the SP from a location on the stack */
2099 ST_FUNC void gen_vla_sp_restore(int addr) {
2100 tcc_error("variable length arrays unsupported for this target");
2103 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2104 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2105 tcc_error("variable length arrays unsupported for this target");
2108 /* end of ARM code generator */
2109 /*************************************************************/
2110 #endif
2111 /*************************************************************/