a small revers for bcheck.o changes (d80593bc4d43)
[tinycc.git] / arm-gen.c
bloba705073617dde835b408f797b0162e51972941ed
1 /*
2 * ARMv4 code generator for TCC
4 * Copyright (c) 2003 Daniel Glöckner
5 * Copyright (c) 2012 Thomas Preud'homme
7 * Based on i386-gen.c by Fabrice Bellard
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #ifdef TARGET_DEFS_ONLY
26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
27 #error "Currently TinyCC only supports float computation with VFP instructions"
28 #endif
30 /* number of available registers */
31 #ifdef TCC_ARM_VFP
32 #define NB_REGS 13
33 #else
34 #define NB_REGS 9
35 #endif
37 #ifndef TCC_ARM_VERSION
38 # define TCC_ARM_VERSION 5
39 #endif
41 /* a register can belong to several classes. The classes must be
42 sorted from more general to more precise (see gv2() code which does
43 assumptions on it). */
44 #define RC_INT 0x0001 /* generic integer register */
45 #define RC_FLOAT 0x0002 /* generic float register */
46 #define RC_R0 0x0004
47 #define RC_R1 0x0008
48 #define RC_R2 0x0010
49 #define RC_R3 0x0020
50 #define RC_R12 0x0040
51 #define RC_F0 0x0080
52 #define RC_F1 0x0100
53 #define RC_F2 0x0200
54 #define RC_F3 0x0400
55 #ifdef TCC_ARM_VFP
56 #define RC_F4 0x0800
57 #define RC_F5 0x1000
58 #define RC_F6 0x2000
59 #define RC_F7 0x4000
60 #endif
61 #define RC_IRET RC_R0 /* function return: integer register */
62 #define RC_LRET RC_R1 /* function return: second integer register */
63 #define RC_FRET RC_F0 /* function return: float register */
65 /* pretty names for the registers */
66 enum {
67 TREG_R0 = 0,
68 TREG_R1,
69 TREG_R2,
70 TREG_R3,
71 TREG_R12,
72 TREG_F0,
73 TREG_F1,
74 TREG_F2,
75 TREG_F3,
76 #ifdef TCC_ARM_VFP
77 TREG_F4,
78 TREG_F5,
79 TREG_F6,
80 TREG_F7,
81 #endif
84 #ifdef TCC_ARM_VFP
85 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
86 #endif
88 /* return registers for function */
89 #define REG_IRET TREG_R0 /* single word int return register */
90 #define REG_LRET TREG_R1 /* second word return register (for long long) */
91 #define REG_FRET TREG_F0 /* float return register */
93 #ifdef TCC_ARM_EABI
94 #define TOK___divdi3 TOK___aeabi_ldivmod
95 #define TOK___moddi3 TOK___aeabi_ldivmod
96 #define TOK___udivdi3 TOK___aeabi_uldivmod
97 #define TOK___umoddi3 TOK___aeabi_uldivmod
98 #endif
100 /* defined if function parameters must be evaluated in reverse order */
101 #define INVERT_FUNC_PARAMS
103 /* defined if structures are passed as pointers. Otherwise structures
104 are directly pushed on stack. */
105 /* #define FUNC_STRUCT_PARAM_AS_PTR */
107 /* pointer size, in bytes */
108 #define PTR_SIZE 4
110 /* long double size and alignment, in bytes */
111 #ifdef TCC_ARM_VFP
112 #define LDOUBLE_SIZE 8
113 #endif
115 #ifndef LDOUBLE_SIZE
116 #define LDOUBLE_SIZE 8
117 #endif
119 #ifdef TCC_ARM_EABI
120 #define LDOUBLE_ALIGN 8
121 #else
122 #define LDOUBLE_ALIGN 4
123 #endif
125 /* maximum alignment (for aligned attribute support) */
126 #define MAX_ALIGN 8
128 #define CHAR_IS_UNSIGNED
130 /******************************************************/
131 /* ELF defines */
133 #define EM_TCC_TARGET EM_ARM
135 /* relocation type for 32 bit data relocation */
136 #define R_DATA_32 R_ARM_ABS32
137 #define R_DATA_PTR R_ARM_ABS32
138 #define R_JMP_SLOT R_ARM_JUMP_SLOT
139 #define R_COPY R_ARM_COPY
141 #define ELF_START_ADDR 0x00008000
142 #define ELF_PAGE_SIZE 0x1000
144 enum float_abi {
145 ARM_SOFTFP_FLOAT,
146 ARM_HARD_FLOAT,
149 /******************************************************/
150 #else /* ! TARGET_DEFS_ONLY */
151 /******************************************************/
152 #include "tcc.h"
154 enum float_abi float_abi;
156 ST_DATA const int reg_classes[NB_REGS] = {
157 /* r0 */ RC_INT | RC_R0,
158 /* r1 */ RC_INT | RC_R1,
159 /* r2 */ RC_INT | RC_R2,
160 /* r3 */ RC_INT | RC_R3,
161 /* r12 */ RC_INT | RC_R12,
162 /* f0 */ RC_FLOAT | RC_F0,
163 /* f1 */ RC_FLOAT | RC_F1,
164 /* f2 */ RC_FLOAT | RC_F2,
165 /* f3 */ RC_FLOAT | RC_F3,
166 #ifdef TCC_ARM_VFP
167 /* d4/s8 */ RC_FLOAT | RC_F4,
168 /* d5/s10 */ RC_FLOAT | RC_F5,
169 /* d6/s12 */ RC_FLOAT | RC_F6,
170 /* d7/s14 */ RC_FLOAT | RC_F7,
171 #endif
174 static int func_sub_sp_offset, last_itod_magic;
175 static int leaffunc;
177 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
178 static CType float_type, double_type, func_float_type, func_double_type;
179 ST_FUNC void arm_init(struct TCCState *s)
181 float_type.t = VT_FLOAT;
182 double_type.t = VT_DOUBLE;
183 func_float_type.t = VT_FUNC;
184 func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
185 func_double_type.t = VT_FUNC;
186 func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
188 float_abi = s->float_abi;
189 #ifndef TCC_ARM_HARDFLOAT
190 tcc_warning("soft float ABI currently not supported: default to softfp");
191 #endif
193 #else
194 #define func_float_type func_old_type
195 #define func_double_type func_old_type
196 #define func_ldouble_type func_old_type
197 ST_FUNC void arm_init(struct TCCState *s)
199 #if !defined (TCC_ARM_VFP)
200 tcc_warning("Support for FPA is deprecated and will be removed in next"
201 " release");
202 #endif
203 #if !defined (TCC_ARM_EABI)
204 tcc_warning("Support for OABI is deprecated and will be removed in next"
205 " release");
206 #endif
208 #endif
210 static int two2mask(int a,int b) {
211 return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
214 static int regmask(int r) {
215 return reg_classes[r]&~(RC_INT|RC_FLOAT);
218 /******************************************************/
220 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
221 char *default_elfinterp(struct TCCState *s)
223 if (s->float_abi == ARM_HARD_FLOAT)
224 return "/lib/ld-linux-armhf.so.3";
225 else
226 return "/lib/ld-linux.so.3";
228 #endif
230 void o(uint32_t i)
232 /* this is a good place to start adding big-endian support*/
233 int ind1;
235 ind1 = ind + 4;
236 if (!cur_text_section)
237 tcc_error("compiler error! This happens f.ex. if the compiler\n"
238 "can't evaluate constant expressions outside of a function.");
239 if (ind1 > cur_text_section->data_allocated)
240 section_realloc(cur_text_section, ind1);
241 cur_text_section->data[ind++] = i&255;
242 i>>=8;
243 cur_text_section->data[ind++] = i&255;
244 i>>=8;
245 cur_text_section->data[ind++] = i&255;
246 i>>=8;
247 cur_text_section->data[ind++] = i;
250 static uint32_t stuff_const(uint32_t op, uint32_t c)
252 int try_neg=0;
253 uint32_t nc = 0, negop = 0;
255 switch(op&0x1F00000)
257 case 0x800000: //add
258 case 0x400000: //sub
259 try_neg=1;
260 negop=op^0xC00000;
261 nc=-c;
262 break;
263 case 0x1A00000: //mov
264 case 0x1E00000: //mvn
265 try_neg=1;
266 negop=op^0x400000;
267 nc=~c;
268 break;
269 case 0x200000: //xor
270 if(c==~0)
271 return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
272 break;
273 case 0x0: //and
274 if(c==~0)
275 return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
276 case 0x1C00000: //bic
277 try_neg=1;
278 negop=op^0x1C00000;
279 nc=~c;
280 break;
281 case 0x1800000: //orr
282 if(c==~0)
283 return (op&0xFFF0FFFF)|0x1E00000;
284 break;
286 do {
287 uint32_t m;
288 int i;
289 if(c<256) /* catch undefined <<32 */
290 return op|c;
291 for(i=2;i<32;i+=2) {
292 m=(0xff>>i)|(0xff<<(32-i));
293 if(!(c&~m))
294 return op|(i<<7)|(c<<i)|(c>>(32-i));
296 op=negop;
297 c=nc;
298 } while(try_neg--);
299 return 0;
303 //only add,sub
304 void stuff_const_harder(uint32_t op, uint32_t v) {
305 uint32_t x;
306 x=stuff_const(op,v);
307 if(x)
308 o(x);
309 else {
310 uint32_t a[16], nv, no, o2, n2;
311 int i,j,k;
312 a[0]=0xff;
313 o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
314 for(i=1;i<16;i++)
315 a[i]=(a[i-1]>>2)|(a[i-1]<<30);
316 for(i=0;i<12;i++)
317 for(j=i<4?i+12:15;j>=i+4;j--)
318 if((v&(a[i]|a[j]))==v) {
319 o(stuff_const(op,v&a[i]));
320 o(stuff_const(o2,v&a[j]));
321 return;
323 no=op^0xC00000;
324 n2=o2^0xC00000;
325 nv=-v;
326 for(i=0;i<12;i++)
327 for(j=i<4?i+12:15;j>=i+4;j--)
328 if((nv&(a[i]|a[j]))==nv) {
329 o(stuff_const(no,nv&a[i]));
330 o(stuff_const(n2,nv&a[j]));
331 return;
333 for(i=0;i<8;i++)
334 for(j=i+4;j<12;j++)
335 for(k=i<4?i+12:15;k>=j+4;k--)
336 if((v&(a[i]|a[j]|a[k]))==v) {
337 o(stuff_const(op,v&a[i]));
338 o(stuff_const(o2,v&a[j]));
339 o(stuff_const(o2,v&a[k]));
340 return;
342 no=op^0xC00000;
343 nv=-v;
344 for(i=0;i<8;i++)
345 for(j=i+4;j<12;j++)
346 for(k=i<4?i+12:15;k>=j+4;k--)
347 if((nv&(a[i]|a[j]|a[k]))==nv) {
348 o(stuff_const(no,nv&a[i]));
349 o(stuff_const(n2,nv&a[j]));
350 o(stuff_const(n2,nv&a[k]));
351 return;
353 o(stuff_const(op,v&a[0]));
354 o(stuff_const(o2,v&a[4]));
355 o(stuff_const(o2,v&a[8]));
356 o(stuff_const(o2,v&a[12]));
360 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
362 addr-=pos+8;
363 addr/=4;
364 if(addr>=0x1000000 || addr<-0x1000000) {
365 if(fail)
366 tcc_error("FIXME: function bigger than 32MB");
367 return 0;
369 return 0x0A000000|(addr&0xffffff);
372 int decbranch(int pos)
374 int x;
375 x=*(uint32_t *)(cur_text_section->data + pos);
376 x&=0x00ffffff;
377 if(x&0x800000)
378 x-=0x1000000;
379 return x*4+pos+8;
382 /* output a symbol and patch all calls to it */
383 void gsym_addr(int t, int a)
385 uint32_t *x;
386 int lt;
387 while(t) {
388 x=(uint32_t *)(cur_text_section->data + t);
389 t=decbranch(lt=t);
390 if(a==lt+4)
391 *x=0xE1A00000; // nop
392 else {
393 *x &= 0xff000000;
394 *x |= encbranch(lt,a,1);
399 void gsym(int t)
401 gsym_addr(t, ind);
404 #ifdef TCC_ARM_VFP
405 static uint32_t vfpr(int r)
407 if(r<TREG_F0 || r>TREG_F7)
408 tcc_error("compiler error! register %i is no vfp register",r);
409 return r-5;
411 #else
412 static uint32_t fpr(int r)
414 if(r<TREG_F0 || r>TREG_F3)
415 tcc_error("compiler error! register %i is no fpa register",r);
416 return r-5;
418 #endif
420 static uint32_t intr(int r)
422 if(r==4)
423 return 12;
424 if((r<0 || r>4) && r!=14)
425 tcc_error("compiler error! register %i is no int register",r);
426 return r;
429 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
431 if(*off>maxoff || *off&((1<<shift)-1)) {
432 uint32_t x, y;
433 x=0xE280E000;
434 if(*sgn)
435 x=0xE240E000;
436 x|=(*base)<<16;
437 *base=14; // lr
438 y=stuff_const(x,*off&~maxoff);
439 if(y) {
440 o(y);
441 *off&=maxoff;
442 return;
444 y=stuff_const(x,(*off+maxoff)&~maxoff);
445 if(y) {
446 o(y);
447 *sgn=!*sgn;
448 *off=((*off+maxoff)&~maxoff)-*off;
449 return;
451 stuff_const_harder(x,*off&~maxoff);
452 *off&=maxoff;
456 static uint32_t mapcc(int cc)
458 switch(cc)
460 case TOK_ULT:
461 return 0x30000000; /* CC/LO */
462 case TOK_UGE:
463 return 0x20000000; /* CS/HS */
464 case TOK_EQ:
465 return 0x00000000; /* EQ */
466 case TOK_NE:
467 return 0x10000000; /* NE */
468 case TOK_ULE:
469 return 0x90000000; /* LS */
470 case TOK_UGT:
471 return 0x80000000; /* HI */
472 case TOK_Nset:
473 return 0x40000000; /* MI */
474 case TOK_Nclear:
475 return 0x50000000; /* PL */
476 case TOK_LT:
477 return 0xB0000000; /* LT */
478 case TOK_GE:
479 return 0xA0000000; /* GE */
480 case TOK_LE:
481 return 0xD0000000; /* LE */
482 case TOK_GT:
483 return 0xC0000000; /* GT */
485 tcc_error("unexpected condition code");
486 return 0xE0000000; /* AL */
489 static int negcc(int cc)
491 switch(cc)
493 case TOK_ULT:
494 return TOK_UGE;
495 case TOK_UGE:
496 return TOK_ULT;
497 case TOK_EQ:
498 return TOK_NE;
499 case TOK_NE:
500 return TOK_EQ;
501 case TOK_ULE:
502 return TOK_UGT;
503 case TOK_UGT:
504 return TOK_ULE;
505 case TOK_Nset:
506 return TOK_Nclear;
507 case TOK_Nclear:
508 return TOK_Nset;
509 case TOK_LT:
510 return TOK_GE;
511 case TOK_GE:
512 return TOK_LT;
513 case TOK_LE:
514 return TOK_GT;
515 case TOK_GT:
516 return TOK_LE;
518 tcc_error("unexpected condition code");
519 return TOK_NE;
522 /* load 'r' from value 'sv' */
523 void load(int r, SValue *sv)
525 int v, ft, fc, fr, sign;
526 uint32_t op;
527 SValue v1;
529 fr = sv->r;
530 ft = sv->type.t;
531 fc = sv->c.ul;
533 if(fc>=0)
534 sign=0;
535 else {
536 sign=1;
537 fc=-fc;
540 v = fr & VT_VALMASK;
541 if (fr & VT_LVAL) {
542 uint32_t base = 0xB; // fp
543 if(v == VT_LLOCAL) {
544 v1.type.t = VT_PTR;
545 v1.r = VT_LOCAL | VT_LVAL;
546 v1.c.ul = sv->c.ul;
547 load(base=14 /* lr */, &v1);
548 fc=sign=0;
549 v=VT_LOCAL;
550 } else if(v == VT_CONST) {
551 v1.type.t = VT_PTR;
552 v1.r = fr&~VT_LVAL;
553 v1.c.ul = sv->c.ul;
554 v1.sym=sv->sym;
555 load(base=14, &v1);
556 fc=sign=0;
557 v=VT_LOCAL;
558 } else if(v < VT_CONST) {
559 base=intr(v);
560 fc=sign=0;
561 v=VT_LOCAL;
563 if(v == VT_LOCAL) {
564 if(is_float(ft)) {
565 calcaddr(&base,&fc,&sign,1020,2);
566 #ifdef TCC_ARM_VFP
567 op=0xED100A00; /* flds */
568 if(!sign)
569 op|=0x800000;
570 if ((ft & VT_BTYPE) != VT_FLOAT)
571 op|=0x100; /* flds -> fldd */
572 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
573 #else
574 op=0xED100100;
575 if(!sign)
576 op|=0x800000;
577 #if LDOUBLE_SIZE == 8
578 if ((ft & VT_BTYPE) != VT_FLOAT)
579 op|=0x8000;
580 #else
581 if ((ft & VT_BTYPE) == VT_DOUBLE)
582 op|=0x8000;
583 else if ((ft & VT_BTYPE) == VT_LDOUBLE)
584 op|=0x400000;
585 #endif
586 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
587 #endif
588 } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
589 || (ft & VT_BTYPE) == VT_SHORT) {
590 calcaddr(&base,&fc,&sign,255,0);
591 op=0xE1500090;
592 if ((ft & VT_BTYPE) == VT_SHORT)
593 op|=0x20;
594 if ((ft & VT_UNSIGNED) == 0)
595 op|=0x40;
596 if(!sign)
597 op|=0x800000;
598 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
599 } else {
600 calcaddr(&base,&fc,&sign,4095,0);
601 op=0xE5100000;
602 if(!sign)
603 op|=0x800000;
604 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
605 op|=0x400000;
606 o(op|(intr(r)<<12)|fc|(base<<16));
608 return;
610 } else {
611 if (v == VT_CONST) {
612 op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
613 if (fr & VT_SYM || !op) {
614 o(0xE59F0000|(intr(r)<<12));
615 o(0xEA000000);
616 if(fr & VT_SYM)
617 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
618 o(sv->c.ul);
619 } else
620 o(op);
621 return;
622 } else if (v == VT_LOCAL) {
623 op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
624 if (fr & VT_SYM || !op) {
625 o(0xE59F0000|(intr(r)<<12));
626 o(0xEA000000);
627 if(fr & VT_SYM) // needed ?
628 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
629 o(sv->c.ul);
630 o(0xE08B0000|(intr(r)<<12)|intr(r));
631 } else
632 o(op);
633 return;
634 } else if(v == VT_CMP) {
635 o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
636 o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
637 return;
638 } else if (v == VT_JMP || v == VT_JMPI) {
639 int t;
640 t = v & 1;
641 o(0xE3A00000|(intr(r)<<12)|t);
642 o(0xEA000000);
643 gsym(sv->c.ul);
644 o(0xE3A00000|(intr(r)<<12)|(t^1));
645 return;
646 } else if (v < VT_CONST) {
647 if(is_float(ft))
648 #ifdef TCC_ARM_VFP
649 o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
650 #else
651 o(0xEE008180|(fpr(r)<<12)|fpr(v));
652 #endif
653 else
654 o(0xE1A00000|(intr(r)<<12)|intr(v));
655 return;
658 tcc_error("load unimplemented!");
661 /* store register 'r' in lvalue 'v' */
662 void store(int r, SValue *sv)
664 SValue v1;
665 int v, ft, fc, fr, sign;
666 uint32_t op;
668 fr = sv->r;
669 ft = sv->type.t;
670 fc = sv->c.ul;
672 if(fc>=0)
673 sign=0;
674 else {
675 sign=1;
676 fc=-fc;
679 v = fr & VT_VALMASK;
680 if (fr & VT_LVAL || fr == VT_LOCAL) {
681 uint32_t base = 0xb;
682 if(v < VT_CONST) {
683 base=intr(v);
684 v=VT_LOCAL;
685 fc=sign=0;
686 } else if(v == VT_CONST) {
687 v1.type.t = ft;
688 v1.r = fr&~VT_LVAL;
689 v1.c.ul = sv->c.ul;
690 v1.sym=sv->sym;
691 load(base=14, &v1);
692 fc=sign=0;
693 v=VT_LOCAL;
695 if(v == VT_LOCAL) {
696 if(is_float(ft)) {
697 calcaddr(&base,&fc,&sign,1020,2);
698 #ifdef TCC_ARM_VFP
699 op=0xED000A00; /* fsts */
700 if(!sign)
701 op|=0x800000;
702 if ((ft & VT_BTYPE) != VT_FLOAT)
703 op|=0x100; /* fsts -> fstd */
704 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
705 #else
706 op=0xED000100;
707 if(!sign)
708 op|=0x800000;
709 #if LDOUBLE_SIZE == 8
710 if ((ft & VT_BTYPE) != VT_FLOAT)
711 op|=0x8000;
712 #else
713 if ((ft & VT_BTYPE) == VT_DOUBLE)
714 op|=0x8000;
715 if ((ft & VT_BTYPE) == VT_LDOUBLE)
716 op|=0x400000;
717 #endif
718 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
719 #endif
720 return;
721 } else if((ft & VT_BTYPE) == VT_SHORT) {
722 calcaddr(&base,&fc,&sign,255,0);
723 op=0xE14000B0;
724 if(!sign)
725 op|=0x800000;
726 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
727 } else {
728 calcaddr(&base,&fc,&sign,4095,0);
729 op=0xE5000000;
730 if(!sign)
731 op|=0x800000;
732 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
733 op|=0x400000;
734 o(op|(intr(r)<<12)|fc|(base<<16));
736 return;
739 tcc_error("store unimplemented");
742 static void gadd_sp(int val)
744 stuff_const_harder(0xE28DD000,val);
747 /* 'is_jmp' is '1' if it is a jump */
748 static void gcall_or_jmp(int is_jmp)
750 int r;
751 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
752 uint32_t x;
753 /* constant case */
754 x=encbranch(ind,ind+vtop->c.ul,0);
755 if(x) {
756 if (vtop->r & VT_SYM) {
757 /* relocation case */
758 greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
759 } else
760 put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
761 o(x|(is_jmp?0xE0000000:0xE1000000));
762 } else {
763 if(!is_jmp)
764 o(0xE28FE004); // add lr,pc,#4
765 o(0xE51FF004); // ldr pc,[pc,#-4]
766 if (vtop->r & VT_SYM)
767 greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
768 o(vtop->c.ul);
770 } else {
771 /* otherwise, indirect call */
772 r = gv(RC_INT);
773 if(!is_jmp)
774 o(0xE1A0E00F); // mov lr,pc
775 o(0xE1A0F000|intr(r)); // mov pc,r
779 /* Return whether a structure is an homogeneous float aggregate or not.
780 The answer is true if all the elements of the structure are of the same
781 primitive float type and there is less than 4 elements.
783 type: the type corresponding to the structure to be tested */
784 static int is_hgen_float_aggr(CType *type)
786 if ((type->t & VT_BTYPE) == VT_STRUCT) {
787 struct Sym *ref;
788 int btype, nb_fields = 0;
790 ref = type->ref->next;
791 btype = ref->type.t & VT_BTYPE;
792 if (btype == VT_FLOAT || btype == VT_DOUBLE) {
793 for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
794 return !ref && nb_fields <= 4;
797 return 0;
800 struct avail_regs {
801 signed char avail[3]; /* 3 holes max with only float and double alignments */
802 int first_hole; /* first available hole */
803 int last_hole; /* last available hole (none if equal to first_hole) */
804 int first_free_reg; /* next free register in the sequence, hole excluded */
807 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
809 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
810 param) according to the rules described in the procedure call standard for
811 the ARM architecture (AAPCS). If found, the registers are assigned to this
812 VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
813 and the parameter is a single float.
815 avregs: opaque structure to keep track of available VFP co-processor regs
816 align: alignment contraints for the param, as returned by type_size()
817 size: size of the parameter, as returned by type_size() */
818 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
820 int first_reg = 0;
822 if (avregs->first_free_reg == -1)
823 return -1;
824 if (align >> 3) { /* double alignment */
825 first_reg = avregs->first_free_reg;
826 /* alignment contraint not respected so use next reg and record hole */
827 if (first_reg & 1)
828 avregs->avail[avregs->last_hole++] = first_reg++;
829 } else { /* no special alignment (float or array of float) */
830 /* if single float and a hole is available, assign the param to it */
831 if (size == 4 && avregs->first_hole != avregs->last_hole)
832 return avregs->avail[avregs->first_hole++];
833 else
834 first_reg = avregs->first_free_reg;
836 if (first_reg + size / 4 <= 16) {
837 avregs->first_free_reg = first_reg + size / 4;
838 return first_reg;
840 avregs->first_free_reg = -1;
841 return -1;
844 /* Returns whether all params need to be passed in core registers or not.
845 This is the case for function part of the runtime ABI. */
846 int floats_in_core_regs(SValue *sval)
848 if (!sval->sym)
849 return 0;
851 switch (sval->sym->v) {
852 case TOK___floatundisf:
853 case TOK___floatundidf:
854 case TOK___fixunssfdi:
855 case TOK___fixunsdfdi:
856 #ifndef TCC_ARM_VFP
857 case TOK___fixunsxfdi:
858 #endif
859 case TOK___floatdisf:
860 case TOK___floatdidf:
861 case TOK___fixsfdi:
862 case TOK___fixdfdi:
863 return 1;
865 default:
866 return 0;
870 /* Return the number of registers needed to return the struct, or 0 if
871 returning via struct pointer. */
872 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
873 #ifdef TCC_ARM_EABI
874 int size, align;
875 size = type_size(vt, &align);
876 if (float_abi == ARM_HARD_FLOAT && !variadic &&
877 (is_float(vt->t) || is_hgen_float_aggr(vt))) {
878 *ret_align = 8;
879 *regsize = 8;
880 ret->ref = NULL;
881 ret->t = VT_DOUBLE;
882 return (size + 7) >> 3;
883 } else if (size <= 4) {
884 *ret_align = 4;
885 *regsize = 4;
886 ret->ref = NULL;
887 ret->t = VT_INT;
888 return 1;
889 } else
890 return 0;
891 #else
892 return 0;
893 #endif
896 /* Parameters are classified according to how they are copied to their final
897 destination for the function call. Because the copying is performed class
898 after class according to the order in the union below, it is important that
899 some constraints about the order of the members of this union are respected:
900 - CORE_STRUCT_CLASS must come after STACK_CLASS;
901 - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
902 VFP_STRUCT_CLASS;
903 - VFP_STRUCT_CLASS must come after VFP_CLASS.
904 See the comment for the main loop in copy_params() for the reason. */
905 enum reg_class {
906 STACK_CLASS = 0,
907 CORE_STRUCT_CLASS,
908 VFP_CLASS,
909 VFP_STRUCT_CLASS,
910 CORE_CLASS,
911 NB_CLASSES
914 struct param_plan {
915 int start; /* first reg or addr used depending on the class */
916 int end; /* last reg used or next free addr depending on the class */
917 SValue *sval; /* pointer to SValue on the value stack */
918 struct param_plan *prev; /* previous element in this class */
921 struct plan {
922 struct param_plan *pplans; /* array of all the param plans */
923 struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
926 #define add_param_plan(plan,pplan,class) \
927 do { \
928 pplan.prev = plan->clsplans[class]; \
929 plan->pplans[plan ## _nb] = pplan; \
930 plan->clsplans[class] = &plan->pplans[plan ## _nb++]; \
931 } while(0)
933 /* Assign parameters to registers and stack with alignment according to the
934 rules in the procedure call standard for the ARM architecture (AAPCS).
935 The overall assignment is recorded in an array of per parameter structures
936 called parameter plans. The parameter plans are also further organized in a
937 number of linked lists, one per class of parameter (see the comment for the
938 definition of union reg_class).
940 nb_args: number of parameters of the function for which a call is generated
941 float_abi: float ABI in use for this function call
942 plan: the structure where the overall assignment is recorded
943 todo: a bitmap that record which core registers hold a parameter
945 Returns the amount of stack space needed for parameter passing
947 Note: this function allocated an array in plan->pplans with tcc_malloc. It
948 is the responsibility of the caller to free this array once used (ie not
949 before copy_params). */
950 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
952 int i, size, align;
953 int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
954 int plan_nb = 0;
955 struct param_plan pplan;
956 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
958 ncrn = nsaa = 0;
959 *todo = 0;
960 plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
961 memset(plan->clsplans, 0, sizeof(plan->clsplans));
962 for(i = nb_args; i-- ;) {
963 int j, start_vfpreg = 0;
964 CType type = vtop[-i].type;
965 type.t &= ~VT_ARRAY;
966 size = type_size(&type, &align);
967 size = (size + 3) & ~3;
968 align = (align + 3) & ~3;
969 switch(vtop[-i].type.t & VT_BTYPE) {
970 case VT_STRUCT:
971 case VT_FLOAT:
972 case VT_DOUBLE:
973 case VT_LDOUBLE:
974 if (float_abi == ARM_HARD_FLOAT) {
975 int is_hfa = 0; /* Homogeneous float aggregate */
977 if (is_float(vtop[-i].type.t)
978 || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
979 int end_vfpreg;
981 start_vfpreg = assign_vfpreg(&avregs, align, size);
982 end_vfpreg = start_vfpreg + ((size - 1) >> 2);
983 if (start_vfpreg >= 0) {
984 pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
985 if (is_hfa)
986 add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
987 else
988 add_param_plan(plan, pplan, VFP_CLASS);
989 continue;
990 } else
991 break;
994 ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
995 if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
996 /* The parameter is allocated both in core register and on stack. As
997 * such, it can be of either class: it would either be the last of
998 * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
999 for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1000 *todo|=(1<<j);
1001 pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
1002 add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
1003 ncrn += size/4;
1004 if (ncrn > 4)
1005 nsaa = (ncrn - 4) * 4;
1006 } else {
1007 ncrn = 4;
1008 break;
1010 continue;
1011 default:
1012 if (ncrn < 4) {
1013 int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1015 if (is_long) {
1016 ncrn = (ncrn + 1) & -2;
1017 if (ncrn == 4)
1018 break;
1020 pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
1021 ncrn++;
1022 if (is_long)
1023 pplan.end = ncrn++;
1024 add_param_plan(plan, pplan, CORE_CLASS);
1025 continue;
1028 nsaa = (nsaa + (align - 1)) & ~(align - 1);
1029 pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
1030 add_param_plan(plan, pplan, STACK_CLASS);
1031 nsaa += size; /* size already rounded up before */
1033 return nsaa;
1036 #undef add_param_plan
1038 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1039 function call.
1041 nb_args: number of parameters the function take
1042 plan: the overall assignment plan for parameters
1043 todo: a bitmap indicating what core reg will hold a parameter
1045 Returns the number of SValue added by this function on the value stack */
1046 static int copy_params(int nb_args, struct plan *plan, int todo)
1048 int size, align, r, i, nb_extra_sval = 0;
1049 struct param_plan *pplan;
1051 /* Several constraints require parameters to be copied in a specific order:
1052 - structures are copied to the stack before being loaded in a reg;
1053 - floats loaded to an odd numbered VFP reg are first copied to the
1054 preceding even numbered VFP reg and then moved to the next VFP reg.
1056 It is thus important that:
1057 - structures assigned to core regs must be copied after parameters
1058 assigned to the stack but before structures assigned to VFP regs because
1059 a structure can lie partly in core registers and partly on the stack;
1060 - parameters assigned to the stack and all structures be copied before
1061 parameters assigned to a core reg since copying a parameter to the stack
1062 require using a core reg;
1063 - parameters assigned to VFP regs be copied before structures assigned to
1064 VFP regs as the copy might use an even numbered VFP reg that already
1065 holds part of a structure. */
1066 for(i = 0; i < NB_CLASSES; i++) {
1067 for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1068 vpushv(pplan->sval);
1069 pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1070 switch(i) {
1071 case STACK_CLASS:
1072 case CORE_STRUCT_CLASS:
1073 case VFP_STRUCT_CLASS:
1074 if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1075 int padding = 0;
1076 size = type_size(&pplan->sval->type, &align);
1077 /* align to stack align size */
1078 size = (size + 3) & ~3;
1079 if (i == STACK_CLASS && pplan->prev)
1080 padding = pplan->start - pplan->prev->end;
1081 size += padding; /* Add padding if any */
1082 /* allocate the necessary size on stack */
1083 gadd_sp(-size);
1084 /* generate structure store */
1085 r = get_reg(RC_INT);
1086 o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1087 vset(&vtop->type, r | VT_LVAL, 0);
1088 vswap();
1089 vstore(); /* memcpy to current sp + potential padding */
1091 /* Homogeneous float aggregate are loaded to VFP registers
1092 immediately since there is no way of loading data in multiple
1093 non consecutive VFP registers as what is done for other
1094 structures (see the use of todo). */
1095 if (i == VFP_STRUCT_CLASS) {
1096 int first = pplan->start, nb = pplan->end - first + 1;
1097 /* vpop.32 {pplan->start, ..., pplan->end} */
1098 o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1099 /* No need to write the register used to a SValue since VFP regs
1100 cannot be used for gcall_or_jmp */
1102 } else {
1103 if (is_float(pplan->sval->type.t)) {
1104 #ifdef TCC_ARM_VFP
1105 r = vfpr(gv(RC_FLOAT)) << 12;
1106 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1107 size = 4;
1108 else {
1109 size = 8;
1110 r |= 0x101; /* vpush.32 -> vpush.64 */
1112 o(0xED2D0A01 + r); /* vpush */
1113 #else
1114 r = fpr(gv(RC_FLOAT)) << 12;
1115 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1116 size = 4;
1117 else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1118 size = 8;
1119 else
1120 size = LDOUBLE_SIZE;
1122 if (size == 12)
1123 r |= 0x400000;
1124 else if(size == 8)
1125 r|=0x8000;
1127 o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1128 #endif
1129 } else {
1130 /* simple type (currently always same size) */
1131 /* XXX: implicit cast ? */
1132 size=4;
1133 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1134 lexpand_nr();
1135 size = 8;
1136 r = gv(RC_INT);
1137 o(0xE52D0004|(intr(r)<<12)); /* push r */
1138 vtop--;
1140 r = gv(RC_INT);
1141 o(0xE52D0004|(intr(r)<<12)); /* push r */
1143 if (i == STACK_CLASS && pplan->prev)
1144 gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1146 break;
1148 case VFP_CLASS:
1149 gv(regmask(TREG_F0 + (pplan->start >> 1)));
1150 if (pplan->start & 1) { /* Must be in upper part of double register */
1151 o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1152 vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1154 break;
1156 case CORE_CLASS:
1157 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1158 lexpand_nr();
1159 gv(regmask(pplan->end));
1160 pplan->sval->r2 = vtop->r;
1161 vtop--;
1163 gv(regmask(pplan->start));
1164 /* Mark register as used so that gcall_or_jmp use another one
1165 (regs >=4 are free as never used to pass parameters) */
1166 pplan->sval->r = vtop->r;
1167 break;
1169 vtop--;
1173 /* Manually free remaining registers since next parameters are loaded
1174 * manually, without the help of gv(int). */
1175 save_regs(nb_args);
1177 if(todo) {
1178 o(0xE8BD0000|todo); /* pop {todo} */
1179 for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1180 int r;
1181 pplan->sval->r = pplan->start;
1182 /* An SValue can only pin 2 registers at best (r and r2) but a structure
1183 can occupy more than 2 registers. Thus, we need to push on the value
1184 stack some fake parameter to have on SValue for each registers used
1185 by a structure (r2 is not used). */
1186 for (r = pplan->start + 1; r <= pplan->end; r++) {
1187 if (todo & (1 << r)) {
1188 nb_extra_sval++;
1189 vpushi(0);
1190 vtop->r = r;
1195 return nb_extra_sval;
1198 /* Generate function call. The function address is pushed first, then
1199 all the parameters in call order. This functions pops all the
1200 parameters and the function address. */
1201 void gfunc_call(int nb_args)
1203 int r, args_size;
1204 int def_float_abi = float_abi;
1205 int todo;
1206 struct plan plan;
1208 #ifdef TCC_ARM_EABI
1209 int variadic;
1211 if (float_abi == ARM_HARD_FLOAT) {
1212 variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1213 if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1214 float_abi = ARM_SOFTFP_FLOAT;
1216 #endif
1217 /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1218 VT_JMP anywhere except on the top of the stack because it would complicate
1219 the code generator. */
1220 r = vtop->r & VT_VALMASK;
1221 if (r == VT_CMP || (r & ~1) == VT_JMP)
1222 gv(RC_INT);
1224 args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1226 #ifdef TCC_ARM_EABI
1227 if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1228 args_size = (args_size + 7) & ~7;
1229 o(0xE24DD004); /* sub sp, sp, #4 */
1231 #endif
1233 nb_args += copy_params(nb_args, &plan, todo);
1234 tcc_free(plan.pplans);
1236 /* Move fct SValue on top as required by gcall_or_jmp */
1237 vrotb(nb_args + 1);
1238 gcall_or_jmp(0);
1239 if (args_size)
1240 gadd_sp(args_size); /* pop all parameters passed on the stack */
1241 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1242 if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1243 if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1244 o(0xEE000A10); /*vmov s0, r0 */
1245 } else {
1246 o(0xEE000B10); /* vmov.32 d0[0], r0 */
1247 o(0xEE201B10); /* vmov.32 d0[1], r1 */
1250 #endif
1251 vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1252 leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1253 float_abi = def_float_abi;
1256 /* generate function prolog of type 't' */
1257 void gfunc_prolog(CType *func_type)
1259 Sym *sym,*sym2;
1260 int n, nf, size, align, rs, struct_ret = 0;
1261 int addr, pn, sn; /* pn=core, sn=stack */
1262 CType ret_type;
1264 #ifdef TCC_ARM_EABI
1265 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1266 #endif
1268 sym = func_type->ref;
1269 func_vt = sym->type;
1270 func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1272 n = nf = 0;
1273 if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1274 !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1276 n++;
1277 struct_ret = 1;
1278 func_vc = 12; /* Offset from fp of the place to store the result */
1280 for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1281 size = type_size(&sym2->type, &align);
1282 #ifdef TCC_ARM_EABI
1283 if (float_abi == ARM_HARD_FLOAT && !func_var &&
1284 (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1285 int tmpnf = assign_vfpreg(&avregs, align, size);
1286 tmpnf += (size + 3) / 4;
1287 nf = (tmpnf > nf) ? tmpnf : nf;
1288 } else
1289 #endif
1290 if (n < 4)
1291 n += (size + 3) / 4;
1293 o(0xE1A0C00D); /* mov ip,sp */
1294 if (func_var)
1295 n=4;
1296 if (n) {
1297 if(n>4)
1298 n=4;
1299 #ifdef TCC_ARM_EABI
1300 n=(n+1)&-2;
1301 #endif
1302 o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1304 if (nf) {
1305 if (nf>16)
1306 nf=16;
1307 nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1308 o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1310 o(0xE92D5800); /* save fp, ip, lr */
1311 o(0xE1A0B00D); /* mov fp, sp */
1312 func_sub_sp_offset = ind;
1313 o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1315 #ifdef TCC_ARM_EABI
1316 if (float_abi == ARM_HARD_FLOAT) {
1317 func_vc += nf * 4;
1318 avregs = AVAIL_REGS_INITIALIZER;
1320 #endif
1321 pn = struct_ret, sn = 0;
1322 while ((sym = sym->next)) {
1323 CType *type;
1324 type = &sym->type;
1325 size = type_size(type, &align);
1326 size = (size + 3) >> 2;
1327 align = (align + 3) & ~3;
1328 #ifdef TCC_ARM_EABI
1329 if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1330 || is_hgen_float_aggr(&sym->type))) {
1331 int fpn = assign_vfpreg(&avregs, align, size << 2);
1332 if (fpn >= 0)
1333 addr = fpn * 4;
1334 else
1335 goto from_stack;
1336 } else
1337 #endif
1338 if (pn < 4) {
1339 #ifdef TCC_ARM_EABI
1340 pn = (pn + (align-1)/4) & -(align/4);
1341 #endif
1342 addr = (nf + pn) * 4;
1343 pn += size;
1344 if (!sn && pn > 4)
1345 sn = (pn - 4);
1346 } else {
1347 #ifdef TCC_ARM_EABI
1348 from_stack:
1349 sn = (sn + (align-1)/4) & -(align/4);
1350 #endif
1351 addr = (n + nf + sn) * 4;
1352 sn += size;
1354 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t),
1355 addr + 12);
1357 last_itod_magic=0;
1358 leaffunc = 1;
1359 loc = 0;
1362 /* generate function epilog */
1363 void gfunc_epilog(void)
1365 uint32_t x;
1366 int diff;
1367 /* Copy float return value to core register if base standard is used and
1368 float computation is made with VFP */
1369 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1370 if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1371 if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1372 o(0xEE100A10); /* fmrs r0, s0 */
1373 else {
1374 o(0xEE100B10); /* fmrdl r0, d0 */
1375 o(0xEE301B10); /* fmrdh r1, d0 */
1378 #endif
1379 o(0xE89BA800); /* restore fp, sp, pc */
1380 diff = (-loc + 3) & -4;
1381 #ifdef TCC_ARM_EABI
1382 if(!leaffunc)
1383 diff = ((diff + 11) & -8) - 4;
1384 #endif
1385 if(diff > 0) {
1386 x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1387 if(x)
1388 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1389 else {
1390 int addr;
1391 addr=ind;
1392 o(0xE59FC004); /* ldr ip,[pc+4] */
1393 o(0xE04BD00C); /* sub sp,fp,ip */
1394 o(0xE1A0F00E); /* mov pc,lr */
1395 o(diff);
1396 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1401 /* generate a jump to a label */
1402 int gjmp(int t)
1404 int r;
1405 r=ind;
1406 o(0xE0000000|encbranch(r,t,1));
1407 return r;
1410 /* generate a jump to a fixed address */
1411 void gjmp_addr(int a)
1413 gjmp(a);
1416 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1417 int gtst(int inv, int t)
1419 int v, r;
1420 uint32_t op;
1421 v = vtop->r & VT_VALMASK;
1422 r=ind;
1423 if (v == VT_CMP) {
1424 op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1425 op|=encbranch(r,t,1);
1426 o(op);
1427 t=r;
1428 } else if (v == VT_JMP || v == VT_JMPI) {
1429 if ((v & 1) == inv) {
1430 if(!vtop->c.i)
1431 vtop->c.i=t;
1432 else {
1433 uint32_t *x;
1434 int p,lp;
1435 if(t) {
1436 p = vtop->c.i;
1437 do {
1438 p = decbranch(lp=p);
1439 } while(p);
1440 x = (uint32_t *)(cur_text_section->data + lp);
1441 *x &= 0xff000000;
1442 *x |= encbranch(lp,t,1);
1444 t = vtop->c.i;
1446 } else {
1447 t = gjmp(t);
1448 gsym(vtop->c.i);
1451 vtop--;
1452 return t;
1455 /* generate an integer binary operation */
1456 void gen_opi(int op)
1458 int c, func = 0;
1459 uint32_t opc = 0, r, fr;
1460 unsigned short retreg = REG_IRET;
1462 c=0;
1463 switch(op) {
1464 case '+':
1465 opc = 0x8;
1466 c=1;
1467 break;
1468 case TOK_ADDC1: /* add with carry generation */
1469 opc = 0x9;
1470 c=1;
1471 break;
1472 case '-':
1473 opc = 0x4;
1474 c=1;
1475 break;
1476 case TOK_SUBC1: /* sub with carry generation */
1477 opc = 0x5;
1478 c=1;
1479 break;
1480 case TOK_ADDC2: /* add with carry use */
1481 opc = 0xA;
1482 c=1;
1483 break;
1484 case TOK_SUBC2: /* sub with carry use */
1485 opc = 0xC;
1486 c=1;
1487 break;
1488 case '&':
1489 opc = 0x0;
1490 c=1;
1491 break;
1492 case '^':
1493 opc = 0x2;
1494 c=1;
1495 break;
1496 case '|':
1497 opc = 0x18;
1498 c=1;
1499 break;
1500 case '*':
1501 gv2(RC_INT, RC_INT);
1502 r = vtop[-1].r;
1503 fr = vtop[0].r;
1504 vtop--;
1505 o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1506 return;
1507 case TOK_SHL:
1508 opc = 0;
1509 c=2;
1510 break;
1511 case TOK_SHR:
1512 opc = 1;
1513 c=2;
1514 break;
1515 case TOK_SAR:
1516 opc = 2;
1517 c=2;
1518 break;
1519 case '/':
1520 case TOK_PDIV:
1521 func=TOK___divsi3;
1522 c=3;
1523 break;
1524 case TOK_UDIV:
1525 func=TOK___udivsi3;
1526 c=3;
1527 break;
1528 case '%':
1529 #ifdef TCC_ARM_EABI
1530 func=TOK___aeabi_idivmod;
1531 retreg=REG_LRET;
1532 #else
1533 func=TOK___modsi3;
1534 #endif
1535 c=3;
1536 break;
1537 case TOK_UMOD:
1538 #ifdef TCC_ARM_EABI
1539 func=TOK___aeabi_uidivmod;
1540 retreg=REG_LRET;
1541 #else
1542 func=TOK___umodsi3;
1543 #endif
1544 c=3;
1545 break;
1546 case TOK_UMULL:
1547 gv2(RC_INT, RC_INT);
1548 r=intr(vtop[-1].r2=get_reg(RC_INT));
1549 c=vtop[-1].r;
1550 vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1551 vtop--;
1552 o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1553 return;
1554 default:
1555 opc = 0x15;
1556 c=1;
1557 break;
1559 switch(c) {
1560 case 1:
1561 if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1562 if(opc == 4 || opc == 5 || opc == 0xc) {
1563 vswap();
1564 opc|=2; // sub -> rsb
1567 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1568 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1569 gv(RC_INT);
1570 vswap();
1571 c=intr(gv(RC_INT));
1572 vswap();
1573 opc=0xE0000000|(opc<<20)|(c<<16);
1574 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1575 uint32_t x;
1576 x=stuff_const(opc|0x2000000,vtop->c.i);
1577 if(x) {
1578 r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1579 o(x|(r<<12));
1580 goto done;
1583 fr=intr(gv(RC_INT));
1584 r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1585 o(opc|(r<<12)|fr);
1586 done:
1587 vtop--;
1588 if (op >= TOK_ULT && op <= TOK_GT) {
1589 vtop->r = VT_CMP;
1590 vtop->c.i = op;
1592 break;
1593 case 2:
1594 opc=0xE1A00000|(opc<<5);
1595 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1596 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1597 gv(RC_INT);
1598 vswap();
1599 r=intr(gv(RC_INT));
1600 vswap();
1601 opc|=r;
1602 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1603 fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1604 c = vtop->c.i & 0x1f;
1605 o(opc|(c<<7)|(fr<<12));
1606 } else {
1607 fr=intr(gv(RC_INT));
1608 c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1609 o(opc|(c<<12)|(fr<<8)|0x10);
1611 vtop--;
1612 break;
1613 case 3:
1614 vpush_global_sym(&func_old_type, func);
1615 vrott(3);
1616 gfunc_call(2);
1617 vpushi(0);
1618 vtop->r = retreg;
1619 break;
1620 default:
1621 tcc_error("gen_opi %i unimplemented!",op);
1625 #ifdef TCC_ARM_VFP
1626 static int is_zero(int i)
1628 if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1629 return 0;
1630 if (vtop[i].type.t == VT_FLOAT)
1631 return (vtop[i].c.f == 0.f);
1632 else if (vtop[i].type.t == VT_DOUBLE)
1633 return (vtop[i].c.d == 0.0);
1634 return (vtop[i].c.ld == 0.l);
1637 /* generate a floating point operation 'v = t1 op t2' instruction. The
1638 * two operands are guaranted to have the same floating point type */
1639 void gen_opf(int op)
1641 uint32_t x;
1642 int fneg=0,r;
1643 x=0xEE000A00|T2CPR(vtop->type.t);
1644 switch(op) {
1645 case '+':
1646 if(is_zero(-1))
1647 vswap();
1648 if(is_zero(0)) {
1649 vtop--;
1650 return;
1652 x|=0x300000;
1653 break;
1654 case '-':
1655 x|=0x300040;
1656 if(is_zero(0)) {
1657 vtop--;
1658 return;
1660 if(is_zero(-1)) {
1661 x|=0x810000; /* fsubX -> fnegX */
1662 vswap();
1663 vtop--;
1664 fneg=1;
1666 break;
1667 case '*':
1668 x|=0x200000;
1669 break;
1670 case '/':
1671 x|=0x800000;
1672 break;
1673 default:
1674 if(op < TOK_ULT || op > TOK_GT) {
1675 tcc_error("unknown fp op %x!",op);
1676 return;
1678 if(is_zero(-1)) {
1679 vswap();
1680 switch(op) {
1681 case TOK_LT: op=TOK_GT; break;
1682 case TOK_GE: op=TOK_ULE; break;
1683 case TOK_LE: op=TOK_GE; break;
1684 case TOK_GT: op=TOK_ULT; break;
1687 x|=0xB40040; /* fcmpX */
1688 if(op!=TOK_EQ && op!=TOK_NE)
1689 x|=0x80; /* fcmpX -> fcmpeX */
1690 if(is_zero(0)) {
1691 vtop--;
1692 o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1693 } else {
1694 x|=vfpr(gv(RC_FLOAT));
1695 vswap();
1696 o(x|(vfpr(gv(RC_FLOAT))<<12));
1697 vtop--;
1699 o(0xEEF1FA10); /* fmstat */
1701 switch(op) {
1702 case TOK_LE: op=TOK_ULE; break;
1703 case TOK_LT: op=TOK_ULT; break;
1704 case TOK_UGE: op=TOK_GE; break;
1705 case TOK_UGT: op=TOK_GT; break;
1708 vtop->r = VT_CMP;
1709 vtop->c.i = op;
1710 return;
1712 r=gv(RC_FLOAT);
1713 x|=vfpr(r);
1714 r=regmask(r);
1715 if(!fneg) {
1716 int r2;
1717 vswap();
1718 r2=gv(RC_FLOAT);
1719 x|=vfpr(r2)<<16;
1720 r|=regmask(r2);
1722 vtop->r=get_reg_ex(RC_FLOAT,r);
1723 if(!fneg)
1724 vtop--;
1725 o(x|(vfpr(vtop->r)<<12));
1728 #else
1729 static uint32_t is_fconst()
1731 long double f;
1732 uint32_t r;
1733 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1734 return 0;
1735 if (vtop->type.t == VT_FLOAT)
1736 f = vtop->c.f;
1737 else if (vtop->type.t == VT_DOUBLE)
1738 f = vtop->c.d;
1739 else
1740 f = vtop->c.ld;
1741 if(!ieee_finite(f))
1742 return 0;
1743 r=0x8;
1744 if(f<0.0) {
1745 r=0x18;
1746 f=-f;
1748 if(f==0.0)
1749 return r;
1750 if(f==1.0)
1751 return r|1;
1752 if(f==2.0)
1753 return r|2;
1754 if(f==3.0)
1755 return r|3;
1756 if(f==4.0)
1757 return r|4;
1758 if(f==5.0)
1759 return r|5;
1760 if(f==0.5)
1761 return r|6;
1762 if(f==10.0)
1763 return r|7;
1764 return 0;
1767 /* generate a floating point operation 'v = t1 op t2' instruction. The
1768 two operands are guaranted to have the same floating point type */
1769 void gen_opf(int op)
1771 uint32_t x, r, r2, c1, c2;
1772 //fputs("gen_opf\n",stderr);
1773 vswap();
1774 c1 = is_fconst();
1775 vswap();
1776 c2 = is_fconst();
1777 x=0xEE000100;
1778 #if LDOUBLE_SIZE == 8
1779 if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1780 x|=0x80;
1781 #else
1782 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1783 x|=0x80;
1784 else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1785 x|=0x80000;
1786 #endif
1787 switch(op)
1789 case '+':
1790 if(!c2) {
1791 vswap();
1792 c2=c1;
1794 vswap();
1795 r=fpr(gv(RC_FLOAT));
1796 vswap();
1797 if(c2) {
1798 if(c2>0xf)
1799 x|=0x200000; // suf
1800 r2=c2&0xf;
1801 } else {
1802 r2=fpr(gv(RC_FLOAT));
1804 break;
1805 case '-':
1806 if(c2) {
1807 if(c2<=0xf)
1808 x|=0x200000; // suf
1809 r2=c2&0xf;
1810 vswap();
1811 r=fpr(gv(RC_FLOAT));
1812 vswap();
1813 } else if(c1 && c1<=0xf) {
1814 x|=0x300000; // rsf
1815 r2=c1;
1816 r=fpr(gv(RC_FLOAT));
1817 vswap();
1818 } else {
1819 x|=0x200000; // suf
1820 vswap();
1821 r=fpr(gv(RC_FLOAT));
1822 vswap();
1823 r2=fpr(gv(RC_FLOAT));
1825 break;
1826 case '*':
1827 if(!c2 || c2>0xf) {
1828 vswap();
1829 c2=c1;
1831 vswap();
1832 r=fpr(gv(RC_FLOAT));
1833 vswap();
1834 if(c2 && c2<=0xf)
1835 r2=c2;
1836 else
1837 r2=fpr(gv(RC_FLOAT));
1838 x|=0x100000; // muf
1839 break;
1840 case '/':
1841 if(c2 && c2<=0xf) {
1842 x|=0x400000; // dvf
1843 r2=c2;
1844 vswap();
1845 r=fpr(gv(RC_FLOAT));
1846 vswap();
1847 } else if(c1 && c1<=0xf) {
1848 x|=0x500000; // rdf
1849 r2=c1;
1850 r=fpr(gv(RC_FLOAT));
1851 vswap();
1852 } else {
1853 x|=0x400000; // dvf
1854 vswap();
1855 r=fpr(gv(RC_FLOAT));
1856 vswap();
1857 r2=fpr(gv(RC_FLOAT));
1859 break;
1860 default:
1861 if(op >= TOK_ULT && op <= TOK_GT) {
1862 x|=0xd0f110; // cmfe
1863 /* bug (intention?) in Linux FPU emulator
1864 doesn't set carry if equal */
1865 switch(op) {
1866 case TOK_ULT:
1867 case TOK_UGE:
1868 case TOK_ULE:
1869 case TOK_UGT:
1870 tcc_error("unsigned comparison on floats?");
1871 break;
1872 case TOK_LT:
1873 op=TOK_Nset;
1874 break;
1875 case TOK_LE:
1876 op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1877 break;
1878 case TOK_EQ:
1879 case TOK_NE:
1880 x&=~0x400000; // cmfe -> cmf
1881 break;
1883 if(c1 && !c2) {
1884 c2=c1;
1885 vswap();
1886 switch(op) {
1887 case TOK_Nset:
1888 op=TOK_GT;
1889 break;
1890 case TOK_GE:
1891 op=TOK_ULE;
1892 break;
1893 case TOK_ULE:
1894 op=TOK_GE;
1895 break;
1896 case TOK_GT:
1897 op=TOK_Nset;
1898 break;
1901 vswap();
1902 r=fpr(gv(RC_FLOAT));
1903 vswap();
1904 if(c2) {
1905 if(c2>0xf)
1906 x|=0x200000;
1907 r2=c2&0xf;
1908 } else {
1909 r2=fpr(gv(RC_FLOAT));
1911 vtop[-1].r = VT_CMP;
1912 vtop[-1].c.i = op;
1913 } else {
1914 tcc_error("unknown fp op %x!",op);
1915 return;
1918 if(vtop[-1].r == VT_CMP)
1919 c1=15;
1920 else {
1921 c1=vtop->r;
1922 if(r2&0x8)
1923 c1=vtop[-1].r;
1924 vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1925 c1=fpr(vtop[-1].r);
1927 vtop--;
1928 o(x|(r<<16)|(c1<<12)|r2);
1930 #endif
1932 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1933 and 'long long' cases. */
1934 ST_FUNC void gen_cvt_itof1(int t)
1936 uint32_t r, r2;
1937 int bt;
1938 bt=vtop->type.t & VT_BTYPE;
1939 if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1940 #ifndef TCC_ARM_VFP
1941 uint32_t dsize = 0;
1942 #endif
1943 r=intr(gv(RC_INT));
1944 #ifdef TCC_ARM_VFP
1945 r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1946 o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1947 r2|=r2<<12;
1948 if(!(vtop->type.t & VT_UNSIGNED))
1949 r2|=0x80; /* fuitoX -> fsituX */
1950 o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1951 #else
1952 r2=fpr(vtop->r=get_reg(RC_FLOAT));
1953 if((t & VT_BTYPE) != VT_FLOAT)
1954 dsize=0x80; /* flts -> fltd */
1955 o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1956 if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1957 uint32_t off = 0;
1958 o(0xE3500000|(r<<12)); /* cmp */
1959 r=fpr(get_reg(RC_FLOAT));
1960 if(last_itod_magic) {
1961 off=ind+8-last_itod_magic;
1962 off/=4;
1963 if(off>255)
1964 off=0;
1966 o(0xBD1F0100|(r<<12)|off); /* ldflts */
1967 if(!off) {
1968 o(0xEA000000); /* b */
1969 last_itod_magic=ind;
1970 o(0x4F800000); /* 4294967296.0f */
1972 o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1974 #endif
1975 return;
1976 } else if(bt == VT_LLONG) {
1977 int func;
1978 CType *func_type = 0;
1979 if((t & VT_BTYPE) == VT_FLOAT) {
1980 func_type = &func_float_type;
1981 if(vtop->type.t & VT_UNSIGNED)
1982 func=TOK___floatundisf;
1983 else
1984 func=TOK___floatdisf;
1985 #if LDOUBLE_SIZE != 8
1986 } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1987 func_type = &func_ldouble_type;
1988 if(vtop->type.t & VT_UNSIGNED)
1989 func=TOK___floatundixf;
1990 else
1991 func=TOK___floatdixf;
1992 } else if((t & VT_BTYPE) == VT_DOUBLE) {
1993 #else
1994 } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1995 #endif
1996 func_type = &func_double_type;
1997 if(vtop->type.t & VT_UNSIGNED)
1998 func=TOK___floatundidf;
1999 else
2000 func=TOK___floatdidf;
2002 if(func_type) {
2003 vpush_global_sym(func_type, func);
2004 vswap();
2005 gfunc_call(1);
2006 vpushi(0);
2007 vtop->r=TREG_F0;
2008 return;
2011 tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2014 /* convert fp to int 't' type */
2015 void gen_cvt_ftoi(int t)
2017 uint32_t r, r2;
2018 int u, func = 0;
2019 u=t&VT_UNSIGNED;
2020 t&=VT_BTYPE;
2021 r2=vtop->type.t & VT_BTYPE;
2022 if(t==VT_INT) {
2023 #ifdef TCC_ARM_VFP
2024 r=vfpr(gv(RC_FLOAT));
2025 u=u?0:0x10000;
2026 o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2027 r2=intr(vtop->r=get_reg(RC_INT));
2028 o(0xEE100A10|(r<<16)|(r2<<12));
2029 return;
2030 #else
2031 if(u) {
2032 if(r2 == VT_FLOAT)
2033 func=TOK___fixunssfsi;
2034 #if LDOUBLE_SIZE != 8
2035 else if(r2 == VT_LDOUBLE)
2036 func=TOK___fixunsxfsi;
2037 else if(r2 == VT_DOUBLE)
2038 #else
2039 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2040 #endif
2041 func=TOK___fixunsdfsi;
2042 } else {
2043 r=fpr(gv(RC_FLOAT));
2044 r2=intr(vtop->r=get_reg(RC_INT));
2045 o(0xEE100170|(r2<<12)|r);
2046 return;
2048 #endif
2049 } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2050 if(r2 == VT_FLOAT)
2051 func=TOK___fixsfdi;
2052 #if LDOUBLE_SIZE != 8
2053 else if(r2 == VT_LDOUBLE)
2054 func=TOK___fixxfdi;
2055 else if(r2 == VT_DOUBLE)
2056 #else
2057 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2058 #endif
2059 func=TOK___fixdfdi;
2061 if(func) {
2062 vpush_global_sym(&func_old_type, func);
2063 vswap();
2064 gfunc_call(1);
2065 vpushi(0);
2066 if(t == VT_LLONG)
2067 vtop->r2 = REG_LRET;
2068 vtop->r = REG_IRET;
2069 return;
2071 tcc_error("unimplemented gen_cvt_ftoi!");
2074 /* convert from one floating point type to another */
2075 void gen_cvt_ftof(int t)
2077 #ifdef TCC_ARM_VFP
2078 if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2079 uint32_t r = vfpr(gv(RC_FLOAT));
2080 o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2082 #else
2083 /* all we have to do on i386 and FPA ARM is to put the float in a register */
2084 gv(RC_FLOAT);
2085 #endif
2088 /* computed goto support */
2089 void ggoto(void)
2091 gcall_or_jmp(1);
2092 vtop--;
2095 /* Save the stack pointer onto the stack and return the location of its address */
2096 ST_FUNC void gen_vla_sp_save(int addr) {
2097 tcc_error("variable length arrays unsupported for this target");
2100 /* Restore the SP from a location on the stack */
2101 ST_FUNC void gen_vla_sp_restore(int addr) {
2102 tcc_error("variable length arrays unsupported for this target");
2105 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2106 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2107 tcc_error("variable length arrays unsupported for this target");
2110 /* end of ARM code generator */
2111 /*************************************************************/
2112 #endif
2113 /*************************************************************/