Add support for struct > 4B returned via registers
[tinycc.git] / arm-gen.c
blob6d0acd81dc53019c1bdeb141f141b8af4151d517
1 /*
2 * ARMv4 code generator for TCC
4 * Copyright (c) 2003 Daniel Glöckner
5 * Copyright (c) 2012 Thomas Preud'homme
7 * Based on i386-gen.c by Fabrice Bellard
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #ifdef TARGET_DEFS_ONLY
26 #ifdef TCC_ARM_EABI
27 #ifndef TCC_ARM_VFP /* Avoid useless warning */
28 #define TCC_ARM_VFP
29 #endif
30 #endif
32 /* number of available registers */
33 #ifdef TCC_ARM_VFP
34 #define NB_REGS 13
35 #else
36 #define NB_REGS 9
37 #endif
39 #ifndef TCC_ARM_VERSION
40 # define TCC_ARM_VERSION 5
41 #endif
43 /* a register can belong to several classes. The classes must be
44 sorted from more general to more precise (see gv2() code which does
45 assumptions on it). */
46 #define RC_INT 0x0001 /* generic integer register */
47 #define RC_FLOAT 0x0002 /* generic float register */
48 #define RC_R0 0x0004
49 #define RC_R1 0x0008
50 #define RC_R2 0x0010
51 #define RC_R3 0x0020
52 #define RC_R12 0x0040
53 #define RC_F0 0x0080
54 #define RC_F1 0x0100
55 #define RC_F2 0x0200
56 #define RC_F3 0x0400
57 #ifdef TCC_ARM_VFP
58 #define RC_F4 0x0800
59 #define RC_F5 0x1000
60 #define RC_F6 0x2000
61 #define RC_F7 0x4000
62 #endif
63 #define RC_IRET RC_R0 /* function return: integer register */
64 #define RC_LRET RC_R1 /* function return: second integer register */
65 #define RC_FRET RC_F0 /* function return: float register */
67 /* pretty names for the registers */
68 enum {
69 TREG_R0 = 0,
70 TREG_R1,
71 TREG_R2,
72 TREG_R3,
73 TREG_R12,
74 TREG_F0,
75 TREG_F1,
76 TREG_F2,
77 TREG_F3,
78 #ifdef TCC_ARM_VFP
79 TREG_F4,
80 TREG_F5,
81 TREG_F6,
82 TREG_F7,
83 #endif
86 #ifdef TCC_ARM_VFP
87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
88 #endif
90 /* return registers for function */
91 #define REG_IRET TREG_R0 /* single word int return register */
92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
93 #define REG_FRET TREG_F0 /* float return register */
95 #ifdef TCC_ARM_EABI
96 #define TOK___divdi3 TOK___aeabi_ldivmod
97 #define TOK___moddi3 TOK___aeabi_ldivmod
98 #define TOK___udivdi3 TOK___aeabi_uldivmod
99 #define TOK___umoddi3 TOK___aeabi_uldivmod
100 #endif
102 /* defined if function parameters must be evaluated in reverse order */
103 #define INVERT_FUNC_PARAMS
105 /* defined if structures are passed as pointers. Otherwise structures
106 are directly pushed on stack. */
107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
109 /* pointer size, in bytes */
110 #define PTR_SIZE 4
112 /* long double size and alignment, in bytes */
113 #ifdef TCC_ARM_VFP
114 #define LDOUBLE_SIZE 8
115 #endif
117 #ifndef LDOUBLE_SIZE
118 #define LDOUBLE_SIZE 8
119 #endif
121 #ifdef TCC_ARM_EABI
122 #define LDOUBLE_ALIGN 8
123 #else
124 #define LDOUBLE_ALIGN 4
125 #endif
127 /* maximum alignment (for aligned attribute support) */
128 #define MAX_ALIGN 8
130 #define CHAR_IS_UNSIGNED
132 /******************************************************/
133 /* ELF defines */
135 #define EM_TCC_TARGET EM_ARM
137 /* relocation type for 32 bit data relocation */
138 #define R_DATA_32 R_ARM_ABS32
139 #define R_DATA_PTR R_ARM_ABS32
140 #define R_JMP_SLOT R_ARM_JUMP_SLOT
141 #define R_COPY R_ARM_COPY
143 #define ELF_START_ADDR 0x00008000
144 #define ELF_PAGE_SIZE 0x1000
146 /******************************************************/
147 #else /* ! TARGET_DEFS_ONLY */
148 /******************************************************/
149 #include "tcc.h"
151 ST_DATA const int reg_classes[NB_REGS] = {
152 /* r0 */ RC_INT | RC_R0,
153 /* r1 */ RC_INT | RC_R1,
154 /* r2 */ RC_INT | RC_R2,
155 /* r3 */ RC_INT | RC_R3,
156 /* r12 */ RC_INT | RC_R12,
157 /* f0 */ RC_FLOAT | RC_F0,
158 /* f1 */ RC_FLOAT | RC_F1,
159 /* f2 */ RC_FLOAT | RC_F2,
160 /* f3 */ RC_FLOAT | RC_F3,
161 #ifdef TCC_ARM_VFP
162 /* d4/s8 */ RC_FLOAT | RC_F4,
163 /* d5/s10 */ RC_FLOAT | RC_F5,
164 /* d6/s12 */ RC_FLOAT | RC_F6,
165 /* d7/s14 */ RC_FLOAT | RC_F7,
166 #endif
169 static int func_sub_sp_offset, last_itod_magic;
170 static int leaffunc;
172 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
173 static CType float_type, double_type, func_float_type, func_double_type;
174 ST_FUNC void arm_init_types(void)
176 float_type.t = VT_FLOAT;
177 double_type.t = VT_DOUBLE;
178 func_float_type.t = VT_FUNC;
179 func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
180 func_double_type.t = VT_FUNC;
181 func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
183 #else
184 #define func_float_type func_old_type
185 #define func_double_type func_old_type
186 #define func_ldouble_type func_old_type
187 ST_FUNC void arm_init_types(void) {}
188 #endif
190 static int two2mask(int a,int b) {
191 return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
194 static int regmask(int r) {
195 return reg_classes[r]&~(RC_INT|RC_FLOAT);
198 /******************************************************/
200 void o(uint32_t i)
202 /* this is a good place to start adding big-endian support*/
203 int ind1;
205 ind1 = ind + 4;
206 if (!cur_text_section)
207 tcc_error("compiler error! This happens f.ex. if the compiler\n"
208 "can't evaluate constant expressions outside of a function.");
209 if (ind1 > cur_text_section->data_allocated)
210 section_realloc(cur_text_section, ind1);
211 cur_text_section->data[ind++] = i&255;
212 i>>=8;
213 cur_text_section->data[ind++] = i&255;
214 i>>=8;
215 cur_text_section->data[ind++] = i&255;
216 i>>=8;
217 cur_text_section->data[ind++] = i;
220 static uint32_t stuff_const(uint32_t op, uint32_t c)
222 int try_neg=0;
223 uint32_t nc = 0, negop = 0;
225 switch(op&0x1F00000)
227 case 0x800000: //add
228 case 0x400000: //sub
229 try_neg=1;
230 negop=op^0xC00000;
231 nc=-c;
232 break;
233 case 0x1A00000: //mov
234 case 0x1E00000: //mvn
235 try_neg=1;
236 negop=op^0x400000;
237 nc=~c;
238 break;
239 case 0x200000: //xor
240 if(c==~0)
241 return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
242 break;
243 case 0x0: //and
244 if(c==~0)
245 return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
246 case 0x1C00000: //bic
247 try_neg=1;
248 negop=op^0x1C00000;
249 nc=~c;
250 break;
251 case 0x1800000: //orr
252 if(c==~0)
253 return (op&0xFFF0FFFF)|0x1E00000;
254 break;
256 do {
257 uint32_t m;
258 int i;
259 if(c<256) /* catch undefined <<32 */
260 return op|c;
261 for(i=2;i<32;i+=2) {
262 m=(0xff>>i)|(0xff<<(32-i));
263 if(!(c&~m))
264 return op|(i<<7)|(c<<i)|(c>>(32-i));
266 op=negop;
267 c=nc;
268 } while(try_neg--);
269 return 0;
273 //only add,sub
274 void stuff_const_harder(uint32_t op, uint32_t v) {
275 uint32_t x;
276 x=stuff_const(op,v);
277 if(x)
278 o(x);
279 else {
280 uint32_t a[16], nv, no, o2, n2;
281 int i,j,k;
282 a[0]=0xff;
283 o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
284 for(i=1;i<16;i++)
285 a[i]=(a[i-1]>>2)|(a[i-1]<<30);
286 for(i=0;i<12;i++)
287 for(j=i<4?i+12:15;j>=i+4;j--)
288 if((v&(a[i]|a[j]))==v) {
289 o(stuff_const(op,v&a[i]));
290 o(stuff_const(o2,v&a[j]));
291 return;
293 no=op^0xC00000;
294 n2=o2^0xC00000;
295 nv=-v;
296 for(i=0;i<12;i++)
297 for(j=i<4?i+12:15;j>=i+4;j--)
298 if((nv&(a[i]|a[j]))==nv) {
299 o(stuff_const(no,nv&a[i]));
300 o(stuff_const(n2,nv&a[j]));
301 return;
303 for(i=0;i<8;i++)
304 for(j=i+4;j<12;j++)
305 for(k=i<4?i+12:15;k>=j+4;k--)
306 if((v&(a[i]|a[j]|a[k]))==v) {
307 o(stuff_const(op,v&a[i]));
308 o(stuff_const(o2,v&a[j]));
309 o(stuff_const(o2,v&a[k]));
310 return;
312 no=op^0xC00000;
313 nv=-v;
314 for(i=0;i<8;i++)
315 for(j=i+4;j<12;j++)
316 for(k=i<4?i+12:15;k>=j+4;k--)
317 if((nv&(a[i]|a[j]|a[k]))==nv) {
318 o(stuff_const(no,nv&a[i]));
319 o(stuff_const(n2,nv&a[j]));
320 o(stuff_const(n2,nv&a[k]));
321 return;
323 o(stuff_const(op,v&a[0]));
324 o(stuff_const(o2,v&a[4]));
325 o(stuff_const(o2,v&a[8]));
326 o(stuff_const(o2,v&a[12]));
330 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
332 addr-=pos+8;
333 addr/=4;
334 if(addr>=0x1000000 || addr<-0x1000000) {
335 if(fail)
336 tcc_error("FIXME: function bigger than 32MB");
337 return 0;
339 return 0x0A000000|(addr&0xffffff);
342 int decbranch(int pos)
344 int x;
345 x=*(uint32_t *)(cur_text_section->data + pos);
346 x&=0x00ffffff;
347 if(x&0x800000)
348 x-=0x1000000;
349 return x*4+pos+8;
352 /* output a symbol and patch all calls to it */
353 void gsym_addr(int t, int a)
355 uint32_t *x;
356 int lt;
357 while(t) {
358 x=(uint32_t *)(cur_text_section->data + t);
359 t=decbranch(lt=t);
360 if(a==lt+4)
361 *x=0xE1A00000; // nop
362 else {
363 *x &= 0xff000000;
364 *x |= encbranch(lt,a,1);
369 void gsym(int t)
371 gsym_addr(t, ind);
374 #ifdef TCC_ARM_VFP
375 static uint32_t vfpr(int r)
377 if(r<TREG_F0 || r>TREG_F7)
378 tcc_error("compiler error! register %i is no vfp register",r);
379 return r-5;
381 #else
382 static uint32_t fpr(int r)
384 if(r<TREG_F0 || r>TREG_F3)
385 tcc_error("compiler error! register %i is no fpa register",r);
386 return r-5;
388 #endif
390 static uint32_t intr(int r)
392 if(r==4)
393 return 12;
394 if((r<0 || r>4) && r!=14)
395 tcc_error("compiler error! register %i is no int register",r);
396 return r;
399 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
401 if(*off>maxoff || *off&((1<<shift)-1)) {
402 uint32_t x, y;
403 x=0xE280E000;
404 if(*sgn)
405 x=0xE240E000;
406 x|=(*base)<<16;
407 *base=14; // lr
408 y=stuff_const(x,*off&~maxoff);
409 if(y) {
410 o(y);
411 *off&=maxoff;
412 return;
414 y=stuff_const(x,(*off+maxoff)&~maxoff);
415 if(y) {
416 o(y);
417 *sgn=!*sgn;
418 *off=((*off+maxoff)&~maxoff)-*off;
419 return;
421 stuff_const_harder(x,*off&~maxoff);
422 *off&=maxoff;
426 static uint32_t mapcc(int cc)
428 switch(cc)
430 case TOK_ULT:
431 return 0x30000000; /* CC/LO */
432 case TOK_UGE:
433 return 0x20000000; /* CS/HS */
434 case TOK_EQ:
435 return 0x00000000; /* EQ */
436 case TOK_NE:
437 return 0x10000000; /* NE */
438 case TOK_ULE:
439 return 0x90000000; /* LS */
440 case TOK_UGT:
441 return 0x80000000; /* HI */
442 case TOK_Nset:
443 return 0x40000000; /* MI */
444 case TOK_Nclear:
445 return 0x50000000; /* PL */
446 case TOK_LT:
447 return 0xB0000000; /* LT */
448 case TOK_GE:
449 return 0xA0000000; /* GE */
450 case TOK_LE:
451 return 0xD0000000; /* LE */
452 case TOK_GT:
453 return 0xC0000000; /* GT */
455 tcc_error("unexpected condition code");
456 return 0xE0000000; /* AL */
459 static int negcc(int cc)
461 switch(cc)
463 case TOK_ULT:
464 return TOK_UGE;
465 case TOK_UGE:
466 return TOK_ULT;
467 case TOK_EQ:
468 return TOK_NE;
469 case TOK_NE:
470 return TOK_EQ;
471 case TOK_ULE:
472 return TOK_UGT;
473 case TOK_UGT:
474 return TOK_ULE;
475 case TOK_Nset:
476 return TOK_Nclear;
477 case TOK_Nclear:
478 return TOK_Nset;
479 case TOK_LT:
480 return TOK_GE;
481 case TOK_GE:
482 return TOK_LT;
483 case TOK_LE:
484 return TOK_GT;
485 case TOK_GT:
486 return TOK_LE;
488 tcc_error("unexpected condition code");
489 return TOK_NE;
492 /* load 'r' from value 'sv' */
493 void load(int r, SValue *sv)
495 int v, ft, fc, fr, sign;
496 uint32_t op;
497 SValue v1;
499 fr = sv->r;
500 ft = sv->type.t;
501 fc = sv->c.ul;
503 if(fc>=0)
504 sign=0;
505 else {
506 sign=1;
507 fc=-fc;
510 v = fr & VT_VALMASK;
511 if (fr & VT_LVAL) {
512 uint32_t base = 0xB; // fp
513 if(v == VT_LLOCAL) {
514 v1.type.t = VT_PTR;
515 v1.r = VT_LOCAL | VT_LVAL;
516 v1.c.ul = sv->c.ul;
517 load(base=14 /* lr */, &v1);
518 fc=sign=0;
519 v=VT_LOCAL;
520 } else if(v == VT_CONST) {
521 v1.type.t = VT_PTR;
522 v1.r = fr&~VT_LVAL;
523 v1.c.ul = sv->c.ul;
524 v1.sym=sv->sym;
525 load(base=14, &v1);
526 fc=sign=0;
527 v=VT_LOCAL;
528 } else if(v < VT_CONST) {
529 base=intr(v);
530 fc=sign=0;
531 v=VT_LOCAL;
533 if(v == VT_LOCAL) {
534 if(is_float(ft)) {
535 calcaddr(&base,&fc,&sign,1020,2);
536 #ifdef TCC_ARM_VFP
537 op=0xED100A00; /* flds */
538 if(!sign)
539 op|=0x800000;
540 if ((ft & VT_BTYPE) != VT_FLOAT)
541 op|=0x100; /* flds -> fldd */
542 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
543 #else
544 op=0xED100100;
545 if(!sign)
546 op|=0x800000;
547 #if LDOUBLE_SIZE == 8
548 if ((ft & VT_BTYPE) != VT_FLOAT)
549 op|=0x8000;
550 #else
551 if ((ft & VT_BTYPE) == VT_DOUBLE)
552 op|=0x8000;
553 else if ((ft & VT_BTYPE) == VT_LDOUBLE)
554 op|=0x400000;
555 #endif
556 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
557 #endif
558 } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
559 || (ft & VT_BTYPE) == VT_SHORT) {
560 calcaddr(&base,&fc,&sign,255,0);
561 op=0xE1500090;
562 if ((ft & VT_BTYPE) == VT_SHORT)
563 op|=0x20;
564 if ((ft & VT_UNSIGNED) == 0)
565 op|=0x40;
566 if(!sign)
567 op|=0x800000;
568 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
569 } else {
570 calcaddr(&base,&fc,&sign,4095,0);
571 op=0xE5100000;
572 if(!sign)
573 op|=0x800000;
574 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
575 op|=0x400000;
576 o(op|(intr(r)<<12)|fc|(base<<16));
578 return;
580 } else {
581 if (v == VT_CONST) {
582 op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
583 if (fr & VT_SYM || !op) {
584 o(0xE59F0000|(intr(r)<<12));
585 o(0xEA000000);
586 if(fr & VT_SYM)
587 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
588 o(sv->c.ul);
589 } else
590 o(op);
591 return;
592 } else if (v == VT_LOCAL) {
593 op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
594 if (fr & VT_SYM || !op) {
595 o(0xE59F0000|(intr(r)<<12));
596 o(0xEA000000);
597 if(fr & VT_SYM) // needed ?
598 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
599 o(sv->c.ul);
600 o(0xE08B0000|(intr(r)<<12)|intr(r));
601 } else
602 o(op);
603 return;
604 } else if(v == VT_CMP) {
605 o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
606 o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
607 return;
608 } else if (v == VT_JMP || v == VT_JMPI) {
609 int t;
610 t = v & 1;
611 o(0xE3A00000|(intr(r)<<12)|t);
612 o(0xEA000000);
613 gsym(sv->c.ul);
614 o(0xE3A00000|(intr(r)<<12)|(t^1));
615 return;
616 } else if (v < VT_CONST) {
617 if(is_float(ft))
618 #ifdef TCC_ARM_VFP
619 o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
620 #else
621 o(0xEE008180|(fpr(r)<<12)|fpr(v));
622 #endif
623 else
624 o(0xE1A00000|(intr(r)<<12)|intr(v));
625 return;
628 tcc_error("load unimplemented!");
631 /* store register 'r' in lvalue 'v' */
632 void store(int r, SValue *sv)
634 SValue v1;
635 int v, ft, fc, fr, sign;
636 uint32_t op;
638 fr = sv->r;
639 ft = sv->type.t;
640 fc = sv->c.ul;
642 if(fc>=0)
643 sign=0;
644 else {
645 sign=1;
646 fc=-fc;
649 v = fr & VT_VALMASK;
650 if (fr & VT_LVAL || fr == VT_LOCAL) {
651 uint32_t base = 0xb;
652 if(v < VT_CONST) {
653 base=intr(v);
654 v=VT_LOCAL;
655 fc=sign=0;
656 } else if(v == VT_CONST) {
657 v1.type.t = ft;
658 v1.r = fr&~VT_LVAL;
659 v1.c.ul = sv->c.ul;
660 v1.sym=sv->sym;
661 load(base=14, &v1);
662 fc=sign=0;
663 v=VT_LOCAL;
665 if(v == VT_LOCAL) {
666 if(is_float(ft)) {
667 calcaddr(&base,&fc,&sign,1020,2);
668 #ifdef TCC_ARM_VFP
669 op=0xED000A00; /* fsts */
670 if(!sign)
671 op|=0x800000;
672 if ((ft & VT_BTYPE) != VT_FLOAT)
673 op|=0x100; /* fsts -> fstd */
674 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
675 #else
676 op=0xED000100;
677 if(!sign)
678 op|=0x800000;
679 #if LDOUBLE_SIZE == 8
680 if ((ft & VT_BTYPE) != VT_FLOAT)
681 op|=0x8000;
682 #else
683 if ((ft & VT_BTYPE) == VT_DOUBLE)
684 op|=0x8000;
685 if ((ft & VT_BTYPE) == VT_LDOUBLE)
686 op|=0x400000;
687 #endif
688 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
689 #endif
690 return;
691 } else if((ft & VT_BTYPE) == VT_SHORT) {
692 calcaddr(&base,&fc,&sign,255,0);
693 op=0xE14000B0;
694 if(!sign)
695 op|=0x800000;
696 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
697 } else {
698 calcaddr(&base,&fc,&sign,4095,0);
699 op=0xE5000000;
700 if(!sign)
701 op|=0x800000;
702 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
703 op|=0x400000;
704 o(op|(intr(r)<<12)|fc|(base<<16));
706 return;
709 tcc_error("store unimplemented");
712 static void gadd_sp(int val)
714 stuff_const_harder(0xE28DD000,val);
717 /* 'is_jmp' is '1' if it is a jump */
718 static void gcall_or_jmp(int is_jmp)
720 int r;
721 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
722 uint32_t x;
723 /* constant case */
724 x=encbranch(ind,ind+vtop->c.ul,0);
725 if(x) {
726 if (vtop->r & VT_SYM) {
727 /* relocation case */
728 greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
729 } else
730 put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
731 o(x|(is_jmp?0xE0000000:0xE1000000));
732 } else {
733 if(!is_jmp)
734 o(0xE28FE004); // add lr,pc,#4
735 o(0xE51FF004); // ldr pc,[pc,#-4]
736 if (vtop->r & VT_SYM)
737 greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
738 o(vtop->c.ul);
740 } else {
741 /* otherwise, indirect call */
742 r = gv(RC_INT);
743 if(!is_jmp)
744 o(0xE1A0E00F); // mov lr,pc
745 o(0xE1A0F000|intr(r)); // mov pc,r
749 #ifdef TCC_ARM_HARDFLOAT
750 /* Return whether a structure is an homogeneous float aggregate or not.
751 The answer is true if all the elements of the structure are of the same
752 primitive float type and there is less than 4 elements.
754 type: the type corresponding to the structure to be tested */
755 static int is_hgen_float_aggr(CType *type)
757 if ((type->t & VT_BTYPE) == VT_STRUCT) {
758 struct Sym *ref;
759 int btype, nb_fields = 0;
761 ref = type->ref->next;
762 btype = ref->type.t & VT_BTYPE;
763 if (btype == VT_FLOAT || btype == VT_DOUBLE) {
764 for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
765 return !ref && nb_fields <= 4;
768 return 0;
771 struct avail_regs {
772 signed char avail[3]; /* 3 holes max with only float and double alignments */
773 int first_hole; /* first available hole */
774 int last_hole; /* last available hole (none if equal to first_hole) */
775 int first_free_reg; /* next free register in the sequence, hole excluded */
778 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
780 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
781 param) according to the rules described in the procedure call standard for
782 the ARM architecture (AAPCS). If found, the registers are assigned to this
783 VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
784 and the parameter is a single float.
786 avregs: opaque structure to keep track of available VFP co-processor regs
787 align: alignment contraints for the param, as returned by type_size()
788 size: size of the parameter, as returned by type_size() */
789 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
791 int first_reg = 0;
793 if (avregs->first_free_reg == -1)
794 return -1;
795 if (align >> 3) { /* double alignment */
796 first_reg = avregs->first_free_reg;
797 /* alignment contraint not respected so use next reg and record hole */
798 if (first_reg & 1)
799 avregs->avail[avregs->last_hole++] = first_reg++;
800 } else { /* no special alignment (float or array of float) */
801 /* if single float and a hole is available, assign the param to it */
802 if (size == 4 && avregs->first_hole != avregs->last_hole)
803 return avregs->avail[avregs->first_hole++];
804 else
805 first_reg = avregs->first_free_reg;
807 if (first_reg + size / 4 <= 16) {
808 avregs->first_free_reg = first_reg + size / 4;
809 return first_reg;
811 avregs->first_free_reg = -1;
812 return -1;
814 #endif
816 /* Return the number of registers needed to return the struct, or 0 if
817 returning via struct pointer. */
818 ST_FUNC int gfunc_sret(CType *vt, CType *ret, int *ret_align) {
819 #ifdef TCC_ARM_EABI
820 int size, align;
821 size = type_size(vt, &align);
822 #ifdef TCC_ARM_HARDFLOAT
823 if (is_float(vt->t) || is_hgen_float_aggr(vt)) {
824 *ret_align = 8;
825 ret->ref = NULL;
826 ret->t = VT_DOUBLE;
827 return (size + 7) >> 3;
828 } else
829 #endif
830 if (size > 4) {
831 return 0;
832 } else {
833 *ret_align = 4;
834 ret->ref = NULL;
835 ret->t = VT_INT;
836 return 1;
838 #else
839 return 0;
840 #endif
843 /* Parameters are classified according to how they are copied to their final
844 destination for the function call. Because the copying is performed class
845 after class according to the order in the union below, it is important that
846 some constraints about the order of the members of this union are respected:
847 - CORE_STRUCT_CLASS must come after STACK_CLASS;
848 - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
849 VFP_STRUCT_CLASS;
850 - VFP_STRUCT_CLASS must come after VFP_CLASS.
851 See the comment for the main loop in copy_params() for the reason. */
852 enum reg_class {
853 STACK_CLASS = 0,
854 CORE_STRUCT_CLASS,
855 VFP_CLASS,
856 VFP_STRUCT_CLASS,
857 CORE_CLASS,
858 NB_CLASSES
861 struct param_plan {
862 int start; /* first reg or addr used depending on the class */
863 int end; /* last reg used or next free addr depending on the class */
864 SValue *sval; /* pointer to SValue on the value stack */
865 struct param_plan *prev; /* previous element in this class */
868 struct plan {
869 struct param_plan *pplans; /* array of all the param plans */
870 struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
873 #define add_param_plan(plan,pplan,class) \
874 do { \
875 pplan.prev = plan->clsplans[class]; \
876 plan->pplans[plan ## _nb] = pplan; \
877 plan->clsplans[class] = &plan->pplans[plan ## _nb++]; \
878 } while(0)
880 /* Assign parameters to registers and stack with alignment according to the
881 rules in the procedure call standard for the ARM architecture (AAPCS).
882 The overall assignment is recorded in an array of per parameter structures
883 called parameter plans. The parameter plans are also further organized in a
884 number of linked lists, one per class of parameter (see the comment for the
885 definition of union reg_class).
887 nb_args: number of parameters of the function for which a call is generated
888 variadic: whether the function is a variadic function or not
889 plan: the structure where the overall assignment is recorded
890 todo: a bitmap that record which core registers hold a parameter
892 Returns the amount of stack space needed for parameter passing
894 Note: this function allocated an array in plan->pplans with tcc_malloc. It
895 is the responsability of the caller to free this array once used (ie not
896 before copy_params). */
897 static int assign_regs(int nb_args, int variadic, struct plan *plan, int *todo)
899 int i, size, align;
900 int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
901 int plan_nb = 0;
902 struct param_plan pplan;
903 #ifdef TCC_ARM_HARDFLOAT
904 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
905 #endif
907 ncrn = nsaa = 0;
908 *todo = 0;
909 plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
910 memset(plan->clsplans, 0, sizeof(plan->clsplans));
911 for(i = nb_args; i-- ;) {
912 int j, start_vfpreg = 0;
913 size = type_size(&vtop[-i].type, &align);
914 switch(vtop[-i].type.t & VT_BTYPE) {
915 case VT_STRUCT:
916 case VT_FLOAT:
917 case VT_DOUBLE:
918 case VT_LDOUBLE:
919 #ifdef TCC_ARM_HARDFLOAT
920 if (!variadic) {
921 int is_hfa = 0; /* Homogeneous float aggregate */
923 if (is_float(vtop[-i].type.t)
924 || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
925 int end_vfpreg;
927 start_vfpreg = assign_vfpreg(&avregs, align, size);
928 end_vfpreg = start_vfpreg + ((size - 1) >> 2);
929 if (start_vfpreg >= 0) {
930 pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
931 if (is_hfa)
932 add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
933 else
934 add_param_plan(plan, pplan, VFP_CLASS);
935 continue;
936 } else
937 break;
940 #endif
941 ncrn = (ncrn + (align-1)/4) & -(align/4);
942 size = (size + 3) & -4;
943 if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
944 /* The parameter is allocated both in core register and on stack. As
945 * such, it can be of either class: it would either be the last of
946 * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
947 for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
948 *todo|=(1<<j);
949 pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
950 add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
951 ncrn += size/4;
952 if (ncrn > 4)
953 nsaa = (ncrn - 4) * 4;
954 } else {
955 ncrn = 4;
956 break;
958 continue;
959 default:
960 if (ncrn < 4) {
961 int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
963 if (is_long) {
964 ncrn = (ncrn + 1) & -2;
965 if (ncrn == 4)
966 break;
968 pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
969 ncrn++;
970 if (is_long)
971 pplan.end = ncrn++;
972 add_param_plan(plan, pplan, CORE_CLASS);
973 continue;
976 nsaa = (nsaa + (align - 1)) & ~(align - 1);
977 pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
978 add_param_plan(plan, pplan, STACK_CLASS);
979 nsaa += size; /* size already rounded up before */
981 return nsaa;
984 #undef add_param_plan
986 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
987 function call.
989 nb_args: number of parameters the function take
990 plan: the overall assignment plan for parameters
991 todo: a bitmap indicating what core reg will hold a parameter
993 Returns the number of SValue added by this function on the value stack */
994 static int copy_params(int nb_args, struct plan *plan, int todo)
996 int size, align, r, i, nb_extra_sval = 0;
997 struct param_plan *pplan;
999 /* Several constraints require parameters to be copied in a specific order:
1000 - structures are copied to the stack before being loaded in a reg;
1001 - floats loaded to an odd numbered VFP reg are first copied to the
1002 preceding even numbered VFP reg and then moved to the next VFP reg.
1004 It is thus important that:
1005 - structures assigned to core regs must be copied after parameters
1006 assigned to the stack but before structures assigned to VFP regs because
1007 a structure can lie partly in core registers and partly on the stack;
1008 - parameters assigned to the stack and all structures be copied before
1009 parameters assigned to a core reg since copying a parameter to the stack
1010 require using a core reg;
1011 - parameters assigned to VFP regs be copied before structures assigned to
1012 VFP regs as the copy might use an even numbered VFP reg that already
1013 holds part of a structure. */
1014 for(i = 0; i < NB_CLASSES; i++) {
1015 for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1016 vpushv(pplan->sval);
1017 pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1018 switch(i) {
1019 case STACK_CLASS:
1020 case CORE_STRUCT_CLASS:
1021 case VFP_STRUCT_CLASS:
1022 if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1023 size = type_size(&pplan->sval->type, &align);
1024 /* align to stack align size */
1025 size = (size + 3) & ~3;
1026 if (i == STACK_CLASS && pplan->prev)
1027 size += pplan->start - pplan->prev->end; /* Add padding if any */
1028 /* allocate the necessary size on stack */
1029 gadd_sp(-size);
1030 /* generate structure store */
1031 r = get_reg(RC_INT);
1032 o(0xE1A0000D|(intr(r)<<12)); /* mov r, sp */
1033 vset(&vtop->type, r | VT_LVAL, 0);
1034 vswap();
1035 vstore(); /* memcpy to current sp */
1036 /* Homogeneous float aggregate are loaded to VFP registers
1037 immediately since there is no way of loading data in multiple
1038 non consecutive VFP registers as what is done for other
1039 structures (see the use of todo). */
1040 if (i == VFP_STRUCT_CLASS) {
1041 int first = pplan->start, nb = pplan->end - first + 1;
1042 /* vpop.32 {pplan->start, ..., pplan->end} */
1043 o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1044 /* No need to write the register used to a SValue since VFP regs
1045 cannot be used for gcall_or_jmp */
1047 } else {
1048 if (is_float(pplan->sval->type.t)) {
1049 #ifdef TCC_ARM_VFP
1050 r = vfpr(gv(RC_FLOAT)) << 12;
1051 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1052 size = 4;
1053 else {
1054 size = 8;
1055 r |= 0x101; /* vpush.32 -> vpush.64 */
1057 o(0xED2D0A01 + r); /* vpush */
1058 #else
1059 r = fpr(gv(RC_FLOAT)) << 12;
1060 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1061 size = 4;
1062 else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1063 size = 8;
1064 else
1065 size = LDOUBLE_SIZE;
1067 if (size == 12)
1068 r |= 0x400000;
1069 else if(size == 8)
1070 r|=0x8000;
1072 o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1073 #endif
1074 } else {
1075 /* simple type (currently always same size) */
1076 /* XXX: implicit cast ? */
1077 size=4;
1078 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1079 lexpand_nr();
1080 size = 8;
1081 r = gv(RC_INT);
1082 o(0xE52D0004|(intr(r)<<12)); /* push r */
1083 vtop--;
1085 r = gv(RC_INT);
1086 o(0xE52D0004|(intr(r)<<12)); /* push r */
1088 if (i == STACK_CLASS && pplan->prev)
1089 gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1091 break;
1093 case VFP_CLASS:
1094 gv(regmask(TREG_F0 + (pplan->start >> 1)));
1095 if (pplan->start & 1) { /* Must be in upper part of double register */
1096 o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1097 vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1099 break;
1101 case CORE_CLASS:
1102 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1103 lexpand_nr();
1104 gv(regmask(pplan->end));
1105 pplan->sval->r2 = vtop->r;
1106 vtop--;
1108 gv(regmask(pplan->start));
1109 /* Mark register as used so that gcall_or_jmp use another one
1110 (regs >=4 are free as never used to pass parameters) */
1111 pplan->sval->r = vtop->r;
1112 break;
1114 vtop--;
1118 /* Manually free remaining registers since next parameters are loaded
1119 * manually, without the help of gv(int). */
1120 save_regs(nb_args);
1122 if(todo) {
1123 o(0xE8BD0000|todo); /* pop {todo} */
1124 for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1125 int r;
1126 pplan->sval->r = pplan->start;
1127 /* TODO: why adding fake param */
1128 for (r = pplan->start + 1; r <= pplan->end; r++) {
1129 if (todo & (1 << r)) {
1130 nb_extra_sval++;
1131 vpushi(0);
1132 vtop->r = r;
1137 return nb_extra_sval;
1140 /* Generate function call. The function address is pushed first, then
1141 all the parameters in call order. This functions pops all the
1142 parameters and the function address. */
1143 void gfunc_call(int nb_args)
1145 int r, args_size;
1146 int variadic;
1147 int todo;
1148 struct plan plan;
1150 variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1151 /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1152 VT_JMP anywhere except on the top of the stack because it would complicate
1153 the code generator. */
1154 r = vtop->r & VT_VALMASK;
1155 if (r == VT_CMP || (r & ~1) == VT_JMP)
1156 gv(RC_INT);
1158 args_size = assign_regs(nb_args, variadic, &plan, &todo);
1160 #ifdef TCC_ARM_EABI
1161 if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1162 args_size = (args_size + 7) & ~7;
1163 o(0xE24DD004); /* sub sp, sp, #4 */
1165 #endif
1167 nb_args += copy_params(nb_args, &plan, todo);
1168 tcc_free(plan.pplans);
1170 /* Move fct SValue on top as required by gcall_or_jmp */
1171 vrotb(nb_args + 1);
1172 gcall_or_jmp(0);
1173 if (args_size)
1174 gadd_sp(args_size); /* pop all parameters passed on the stack */
1175 #ifdef TCC_ARM_EABI
1176 #ifdef TCC_ARM_VFP
1177 #ifdef TCC_ARM_HARDFLOAT
1178 if(variadic && is_float(vtop->type.ref->type.t)) {
1179 #else
1180 rf(is_float(vtop->type.ref->type.t)) {
1181 #endif
1182 if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1183 o(0xEE000A10); /*vmov s0, r0 */
1184 } else {
1185 o(0xEE000B10); /* vmov.32 d0[0], r0 */
1186 o(0xEE201B10); /* vmov.32 d0[1], r1 */
1189 #endif
1190 #endif
1191 vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1192 leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1195 /* generate function prolog of type 't' */
1196 void gfunc_prolog(CType *func_type)
1198 Sym *sym,*sym2;
1199 int n,nf,size,align, variadic, struct_ret = 0;
1200 #ifdef TCC_ARM_HARDFLOAT
1201 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1202 #endif
1204 sym = func_type->ref;
1205 func_vt = sym->type;
1207 n = nf = 0;
1208 variadic = (func_type->ref->c == FUNC_ELLIPSIS);
1209 if((func_vt.t & VT_BTYPE) == VT_STRUCT
1210 #ifdef TCC_ARM_HARDFLOAT
1211 && (variadic || !is_hgen_float_aggr(&func_vt))
1212 #endif
1213 && type_size(&func_vt,&align) > 4)
1215 n++;
1216 struct_ret = 1;
1217 func_vc = 12; /* Offset from fp of the place to store the result */
1219 for(sym2=sym->next;sym2 && (n<4 || nf<16);sym2=sym2->next) {
1220 size = type_size(&sym2->type, &align);
1221 #ifdef TCC_ARM_HARDFLOAT
1222 if (!variadic && (is_float(sym2->type.t)
1223 || is_hgen_float_aggr(&sym2->type))) {
1224 int tmpnf = assign_vfpreg(&avregs, align, size);
1225 tmpnf += (size + 3) / 4;
1226 nf = (tmpnf > nf) ? tmpnf : nf;
1227 } else
1228 #endif
1229 if (n < 4)
1230 n += (size + 3) / 4;
1232 o(0xE1A0C00D); /* mov ip,sp */
1233 if(variadic)
1234 n=4;
1235 if(n) {
1236 if(n>4)
1237 n=4;
1238 #ifdef TCC_ARM_EABI
1239 n=(n+1)&-2;
1240 #endif
1241 o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1243 if (nf) {
1244 if (nf>16)
1245 nf=16;
1246 nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1247 o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1249 o(0xE92D5800); /* save fp, ip, lr */
1250 o(0xE1A0B00D); /* mov fp, sp */
1251 func_sub_sp_offset = ind;
1252 o(0xE1A00000); /* nop, leave space for stack adjustment in epilogue */
1254 int addr, pn = struct_ret, sn = 0; /* pn=core, sn=stack */
1256 #ifdef TCC_ARM_HARDFLOAT
1257 func_vc += nf * 4;
1258 avregs = AVAIL_REGS_INITIALIZER;
1259 #endif
1260 while ((sym = sym->next)) {
1261 CType *type;
1262 type = &sym->type;
1263 size = type_size(type, &align);
1264 size = (size + 3) >> 2;
1265 align = (align + 3) & ~3;
1266 #ifdef TCC_ARM_HARDFLOAT
1267 if (!variadic && (is_float(sym->type.t)
1268 || is_hgen_float_aggr(&sym->type))) {
1269 int fpn = assign_vfpreg(&avregs, align, size << 2);
1270 if (fpn >= 0) {
1271 addr = fpn * 4;
1272 } else
1273 goto from_stack;
1274 } else
1275 #endif
1276 if (pn < 4) {
1277 #ifdef TCC_ARM_EABI
1278 pn = (pn + (align-1)/4) & -(align/4);
1279 #endif
1280 addr = (nf + pn) * 4;
1281 pn += size;
1282 if (!sn && pn > 4)
1283 sn = (pn - 4);
1284 } else {
1285 #ifdef TCC_ARM_HARDFLOAT
1286 from_stack:
1287 #endif
1288 #ifdef TCC_ARM_EABI
1289 sn = (sn + (align-1)/4) & -(align/4);
1290 #endif
1291 addr = (n + nf + sn) * 4;
1292 sn += size;
1294 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr+12);
1297 last_itod_magic=0;
1298 leaffunc = 1;
1299 loc = 0;
1302 /* generate function epilog */
1303 void gfunc_epilog(void)
1305 uint32_t x;
1306 int diff;
1307 #ifdef TCC_ARM_EABI
1308 /* Useless but harmless copy of the float result into main register(s) in case
1309 of variadic function in the hardfloat variant */
1310 if(is_float(func_vt.t)) {
1311 if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1312 o(0xEE100A10); /* fmrs r0, s0 */
1313 else {
1314 o(0xEE100B10); /* fmrdl r0, d0 */
1315 o(0xEE301B10); /* fmrdh r1, d0 */
1318 #endif
1319 o(0xE89BA800); /* restore fp, sp, pc */
1320 diff = (-loc + 3) & -4;
1321 #ifdef TCC_ARM_EABI
1322 if(!leaffunc)
1323 diff = ((diff + 11) & -8) - 4;
1324 #endif
1325 if(diff > 0) {
1326 x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1327 if(x)
1328 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1329 else {
1330 int addr;
1331 addr=ind;
1332 o(0xE59FC004); /* ldr ip,[pc+4] */
1333 o(0xE04BD00C); /* sub sp,fp,ip */
1334 o(0xE1A0F00E); /* mov pc,lr */
1335 o(diff);
1336 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1341 /* generate a jump to a label */
1342 int gjmp(int t)
1344 int r;
1345 r=ind;
1346 o(0xE0000000|encbranch(r,t,1));
1347 return r;
1350 /* generate a jump to a fixed address */
1351 void gjmp_addr(int a)
1353 gjmp(a);
1356 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1357 int gtst(int inv, int t)
1359 int v, r;
1360 uint32_t op;
1361 v = vtop->r & VT_VALMASK;
1362 r=ind;
1363 if (v == VT_CMP) {
1364 op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1365 op|=encbranch(r,t,1);
1366 o(op);
1367 t=r;
1368 } else if (v == VT_JMP || v == VT_JMPI) {
1369 if ((v & 1) == inv) {
1370 if(!vtop->c.i)
1371 vtop->c.i=t;
1372 else {
1373 uint32_t *x;
1374 int p,lp;
1375 if(t) {
1376 p = vtop->c.i;
1377 do {
1378 p = decbranch(lp=p);
1379 } while(p);
1380 x = (uint32_t *)(cur_text_section->data + lp);
1381 *x &= 0xff000000;
1382 *x |= encbranch(lp,t,1);
1384 t = vtop->c.i;
1386 } else {
1387 t = gjmp(t);
1388 gsym(vtop->c.i);
1390 } else {
1391 if (is_float(vtop->type.t)) {
1392 r=gv(RC_FLOAT);
1393 #ifdef TCC_ARM_VFP
1394 o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1395 o(0xEEF1FA10); /* fmstat */
1396 #else
1397 o(0xEE90F118|(fpr(r)<<16));
1398 #endif
1399 vtop->r = VT_CMP;
1400 vtop->c.i = TOK_NE;
1401 return gtst(inv, t);
1402 } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1403 /* constant jmp optimization */
1404 if ((vtop->c.i != 0) != inv)
1405 t = gjmp(t);
1406 } else {
1407 v = gv(RC_INT);
1408 o(0xE3300000|(intr(v)<<16));
1409 vtop->r = VT_CMP;
1410 vtop->c.i = TOK_NE;
1411 return gtst(inv, t);
1414 vtop--;
1415 return t;
1418 /* generate an integer binary operation */
1419 void gen_opi(int op)
1421 int c, func = 0;
1422 uint32_t opc = 0, r, fr;
1423 unsigned short retreg = REG_IRET;
1425 c=0;
1426 switch(op) {
1427 case '+':
1428 opc = 0x8;
1429 c=1;
1430 break;
1431 case TOK_ADDC1: /* add with carry generation */
1432 opc = 0x9;
1433 c=1;
1434 break;
1435 case '-':
1436 opc = 0x4;
1437 c=1;
1438 break;
1439 case TOK_SUBC1: /* sub with carry generation */
1440 opc = 0x5;
1441 c=1;
1442 break;
1443 case TOK_ADDC2: /* add with carry use */
1444 opc = 0xA;
1445 c=1;
1446 break;
1447 case TOK_SUBC2: /* sub with carry use */
1448 opc = 0xC;
1449 c=1;
1450 break;
1451 case '&':
1452 opc = 0x0;
1453 c=1;
1454 break;
1455 case '^':
1456 opc = 0x2;
1457 c=1;
1458 break;
1459 case '|':
1460 opc = 0x18;
1461 c=1;
1462 break;
1463 case '*':
1464 gv2(RC_INT, RC_INT);
1465 r = vtop[-1].r;
1466 fr = vtop[0].r;
1467 vtop--;
1468 o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1469 return;
1470 case TOK_SHL:
1471 opc = 0;
1472 c=2;
1473 break;
1474 case TOK_SHR:
1475 opc = 1;
1476 c=2;
1477 break;
1478 case TOK_SAR:
1479 opc = 2;
1480 c=2;
1481 break;
1482 case '/':
1483 case TOK_PDIV:
1484 func=TOK___divsi3;
1485 c=3;
1486 break;
1487 case TOK_UDIV:
1488 func=TOK___udivsi3;
1489 c=3;
1490 break;
1491 case '%':
1492 #ifdef TCC_ARM_EABI
1493 func=TOK___aeabi_idivmod;
1494 retreg=REG_LRET;
1495 #else
1496 func=TOK___modsi3;
1497 #endif
1498 c=3;
1499 break;
1500 case TOK_UMOD:
1501 #ifdef TCC_ARM_EABI
1502 func=TOK___aeabi_uidivmod;
1503 retreg=REG_LRET;
1504 #else
1505 func=TOK___umodsi3;
1506 #endif
1507 c=3;
1508 break;
1509 case TOK_UMULL:
1510 gv2(RC_INT, RC_INT);
1511 r=intr(vtop[-1].r2=get_reg(RC_INT));
1512 c=vtop[-1].r;
1513 vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1514 vtop--;
1515 o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1516 return;
1517 default:
1518 opc = 0x15;
1519 c=1;
1520 break;
1522 switch(c) {
1523 case 1:
1524 if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1525 if(opc == 4 || opc == 5 || opc == 0xc) {
1526 vswap();
1527 opc|=2; // sub -> rsb
1530 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1531 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1532 gv(RC_INT);
1533 vswap();
1534 c=intr(gv(RC_INT));
1535 vswap();
1536 opc=0xE0000000|(opc<<20)|(c<<16);
1537 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1538 uint32_t x;
1539 x=stuff_const(opc|0x2000000,vtop->c.i);
1540 if(x) {
1541 r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1542 o(x|(r<<12));
1543 goto done;
1546 fr=intr(gv(RC_INT));
1547 r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1548 o(opc|(r<<12)|fr);
1549 done:
1550 vtop--;
1551 if (op >= TOK_ULT && op <= TOK_GT) {
1552 vtop->r = VT_CMP;
1553 vtop->c.i = op;
1555 break;
1556 case 2:
1557 opc=0xE1A00000|(opc<<5);
1558 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1559 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1560 gv(RC_INT);
1561 vswap();
1562 r=intr(gv(RC_INT));
1563 vswap();
1564 opc|=r;
1565 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1566 fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1567 c = vtop->c.i & 0x1f;
1568 o(opc|(c<<7)|(fr<<12));
1569 } else {
1570 fr=intr(gv(RC_INT));
1571 c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1572 o(opc|(c<<12)|(fr<<8)|0x10);
1574 vtop--;
1575 break;
1576 case 3:
1577 vpush_global_sym(&func_old_type, func);
1578 vrott(3);
1579 gfunc_call(2);
1580 vpushi(0);
1581 vtop->r = retreg;
1582 break;
1583 default:
1584 tcc_error("gen_opi %i unimplemented!",op);
1588 #ifdef TCC_ARM_VFP
1589 static int is_zero(int i)
1591 if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1592 return 0;
1593 if (vtop[i].type.t == VT_FLOAT)
1594 return (vtop[i].c.f == 0.f);
1595 else if (vtop[i].type.t == VT_DOUBLE)
1596 return (vtop[i].c.d == 0.0);
1597 return (vtop[i].c.ld == 0.l);
1600 /* generate a floating point operation 'v = t1 op t2' instruction. The
1601 * two operands are guaranted to have the same floating point type */
1602 void gen_opf(int op)
1604 uint32_t x;
1605 int fneg=0,r;
1606 x=0xEE000A00|T2CPR(vtop->type.t);
1607 switch(op) {
1608 case '+':
1609 if(is_zero(-1))
1610 vswap();
1611 if(is_zero(0)) {
1612 vtop--;
1613 return;
1615 x|=0x300000;
1616 break;
1617 case '-':
1618 x|=0x300040;
1619 if(is_zero(0)) {
1620 vtop--;
1621 return;
1623 if(is_zero(-1)) {
1624 x|=0x810000; /* fsubX -> fnegX */
1625 vswap();
1626 vtop--;
1627 fneg=1;
1629 break;
1630 case '*':
1631 x|=0x200000;
1632 break;
1633 case '/':
1634 x|=0x800000;
1635 break;
1636 default:
1637 if(op < TOK_ULT || op > TOK_GT) {
1638 tcc_error("unknown fp op %x!",op);
1639 return;
1641 if(is_zero(-1)) {
1642 vswap();
1643 switch(op) {
1644 case TOK_LT: op=TOK_GT; break;
1645 case TOK_GE: op=TOK_ULE; break;
1646 case TOK_LE: op=TOK_GE; break;
1647 case TOK_GT: op=TOK_ULT; break;
1650 x|=0xB40040; /* fcmpX */
1651 if(op!=TOK_EQ && op!=TOK_NE)
1652 x|=0x80; /* fcmpX -> fcmpeX */
1653 if(is_zero(0)) {
1654 vtop--;
1655 o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1656 } else {
1657 x|=vfpr(gv(RC_FLOAT));
1658 vswap();
1659 o(x|(vfpr(gv(RC_FLOAT))<<12));
1660 vtop--;
1662 o(0xEEF1FA10); /* fmstat */
1664 switch(op) {
1665 case TOK_LE: op=TOK_ULE; break;
1666 case TOK_LT: op=TOK_ULT; break;
1667 case TOK_UGE: op=TOK_GE; break;
1668 case TOK_UGT: op=TOK_GT; break;
1671 vtop->r = VT_CMP;
1672 vtop->c.i = op;
1673 return;
1675 r=gv(RC_FLOAT);
1676 x|=vfpr(r);
1677 r=regmask(r);
1678 if(!fneg) {
1679 int r2;
1680 vswap();
1681 r2=gv(RC_FLOAT);
1682 x|=vfpr(r2)<<16;
1683 r|=regmask(r2);
1685 vtop->r=get_reg_ex(RC_FLOAT,r);
1686 if(!fneg)
1687 vtop--;
1688 o(x|(vfpr(vtop->r)<<12));
1691 #else
1692 static uint32_t is_fconst()
1694 long double f;
1695 uint32_t r;
1696 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1697 return 0;
1698 if (vtop->type.t == VT_FLOAT)
1699 f = vtop->c.f;
1700 else if (vtop->type.t == VT_DOUBLE)
1701 f = vtop->c.d;
1702 else
1703 f = vtop->c.ld;
1704 if(!ieee_finite(f))
1705 return 0;
1706 r=0x8;
1707 if(f<0.0) {
1708 r=0x18;
1709 f=-f;
1711 if(f==0.0)
1712 return r;
1713 if(f==1.0)
1714 return r|1;
1715 if(f==2.0)
1716 return r|2;
1717 if(f==3.0)
1718 return r|3;
1719 if(f==4.0)
1720 return r|4;
1721 if(f==5.0)
1722 return r|5;
1723 if(f==0.5)
1724 return r|6;
1725 if(f==10.0)
1726 return r|7;
1727 return 0;
1730 /* generate a floating point operation 'v = t1 op t2' instruction. The
1731 two operands are guaranted to have the same floating point type */
1732 void gen_opf(int op)
1734 uint32_t x, r, r2, c1, c2;
1735 //fputs("gen_opf\n",stderr);
1736 vswap();
1737 c1 = is_fconst();
1738 vswap();
1739 c2 = is_fconst();
1740 x=0xEE000100;
1741 #if LDOUBLE_SIZE == 8
1742 if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1743 x|=0x80;
1744 #else
1745 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1746 x|=0x80;
1747 else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1748 x|=0x80000;
1749 #endif
1750 switch(op)
1752 case '+':
1753 if(!c2) {
1754 vswap();
1755 c2=c1;
1757 vswap();
1758 r=fpr(gv(RC_FLOAT));
1759 vswap();
1760 if(c2) {
1761 if(c2>0xf)
1762 x|=0x200000; // suf
1763 r2=c2&0xf;
1764 } else {
1765 r2=fpr(gv(RC_FLOAT));
1767 break;
1768 case '-':
1769 if(c2) {
1770 if(c2<=0xf)
1771 x|=0x200000; // suf
1772 r2=c2&0xf;
1773 vswap();
1774 r=fpr(gv(RC_FLOAT));
1775 vswap();
1776 } else if(c1 && c1<=0xf) {
1777 x|=0x300000; // rsf
1778 r2=c1;
1779 r=fpr(gv(RC_FLOAT));
1780 vswap();
1781 } else {
1782 x|=0x200000; // suf
1783 vswap();
1784 r=fpr(gv(RC_FLOAT));
1785 vswap();
1786 r2=fpr(gv(RC_FLOAT));
1788 break;
1789 case '*':
1790 if(!c2 || c2>0xf) {
1791 vswap();
1792 c2=c1;
1794 vswap();
1795 r=fpr(gv(RC_FLOAT));
1796 vswap();
1797 if(c2 && c2<=0xf)
1798 r2=c2;
1799 else
1800 r2=fpr(gv(RC_FLOAT));
1801 x|=0x100000; // muf
1802 break;
1803 case '/':
1804 if(c2 && c2<=0xf) {
1805 x|=0x400000; // dvf
1806 r2=c2;
1807 vswap();
1808 r=fpr(gv(RC_FLOAT));
1809 vswap();
1810 } else if(c1 && c1<=0xf) {
1811 x|=0x500000; // rdf
1812 r2=c1;
1813 r=fpr(gv(RC_FLOAT));
1814 vswap();
1815 } else {
1816 x|=0x400000; // dvf
1817 vswap();
1818 r=fpr(gv(RC_FLOAT));
1819 vswap();
1820 r2=fpr(gv(RC_FLOAT));
1822 break;
1823 default:
1824 if(op >= TOK_ULT && op <= TOK_GT) {
1825 x|=0xd0f110; // cmfe
1826 /* bug (intention?) in Linux FPU emulator
1827 doesn't set carry if equal */
1828 switch(op) {
1829 case TOK_ULT:
1830 case TOK_UGE:
1831 case TOK_ULE:
1832 case TOK_UGT:
1833 tcc_error("unsigned comparision on floats?");
1834 break;
1835 case TOK_LT:
1836 op=TOK_Nset;
1837 break;
1838 case TOK_LE:
1839 op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1840 break;
1841 case TOK_EQ:
1842 case TOK_NE:
1843 x&=~0x400000; // cmfe -> cmf
1844 break;
1846 if(c1 && !c2) {
1847 c2=c1;
1848 vswap();
1849 switch(op) {
1850 case TOK_Nset:
1851 op=TOK_GT;
1852 break;
1853 case TOK_GE:
1854 op=TOK_ULE;
1855 break;
1856 case TOK_ULE:
1857 op=TOK_GE;
1858 break;
1859 case TOK_GT:
1860 op=TOK_Nset;
1861 break;
1864 vswap();
1865 r=fpr(gv(RC_FLOAT));
1866 vswap();
1867 if(c2) {
1868 if(c2>0xf)
1869 x|=0x200000;
1870 r2=c2&0xf;
1871 } else {
1872 r2=fpr(gv(RC_FLOAT));
1874 vtop[-1].r = VT_CMP;
1875 vtop[-1].c.i = op;
1876 } else {
1877 tcc_error("unknown fp op %x!",op);
1878 return;
1881 if(vtop[-1].r == VT_CMP)
1882 c1=15;
1883 else {
1884 c1=vtop->r;
1885 if(r2&0x8)
1886 c1=vtop[-1].r;
1887 vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1888 c1=fpr(vtop[-1].r);
1890 vtop--;
1891 o(x|(r<<16)|(c1<<12)|r2);
1893 #endif
1895 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1896 and 'long long' cases. */
1897 ST_FUNC void gen_cvt_itof1(int t)
1899 uint32_t r, r2;
1900 int bt;
1901 bt=vtop->type.t & VT_BTYPE;
1902 if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1903 #ifndef TCC_ARM_VFP
1904 uint32_t dsize = 0;
1905 #endif
1906 r=intr(gv(RC_INT));
1907 #ifdef TCC_ARM_VFP
1908 r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1909 o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1910 r2|=r2<<12;
1911 if(!(vtop->type.t & VT_UNSIGNED))
1912 r2|=0x80; /* fuitoX -> fsituX */
1913 o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1914 #else
1915 r2=fpr(vtop->r=get_reg(RC_FLOAT));
1916 if((t & VT_BTYPE) != VT_FLOAT)
1917 dsize=0x80; /* flts -> fltd */
1918 o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1919 if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1920 uint32_t off = 0;
1921 o(0xE3500000|(r<<12)); /* cmp */
1922 r=fpr(get_reg(RC_FLOAT));
1923 if(last_itod_magic) {
1924 off=ind+8-last_itod_magic;
1925 off/=4;
1926 if(off>255)
1927 off=0;
1929 o(0xBD1F0100|(r<<12)|off); /* ldflts */
1930 if(!off) {
1931 o(0xEA000000); /* b */
1932 last_itod_magic=ind;
1933 o(0x4F800000); /* 4294967296.0f */
1935 o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1937 #endif
1938 return;
1939 } else if(bt == VT_LLONG) {
1940 int func;
1941 CType *func_type = 0;
1942 if((t & VT_BTYPE) == VT_FLOAT) {
1943 func_type = &func_float_type;
1944 if(vtop->type.t & VT_UNSIGNED)
1945 func=TOK___floatundisf;
1946 else
1947 func=TOK___floatdisf;
1948 #if LDOUBLE_SIZE != 8
1949 } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1950 func_type = &func_ldouble_type;
1951 if(vtop->type.t & VT_UNSIGNED)
1952 func=TOK___floatundixf;
1953 else
1954 func=TOK___floatdixf;
1955 } else if((t & VT_BTYPE) == VT_DOUBLE) {
1956 #else
1957 } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1958 #endif
1959 func_type = &func_double_type;
1960 if(vtop->type.t & VT_UNSIGNED)
1961 func=TOK___floatundidf;
1962 else
1963 func=TOK___floatdidf;
1965 if(func_type) {
1966 vpush_global_sym(func_type, func);
1967 vswap();
1968 gfunc_call(1);
1969 vpushi(0);
1970 vtop->r=TREG_F0;
1971 return;
1974 tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1977 /* convert fp to int 't' type */
1978 void gen_cvt_ftoi(int t)
1980 uint32_t r, r2;
1981 int u, func = 0;
1982 u=t&VT_UNSIGNED;
1983 t&=VT_BTYPE;
1984 r2=vtop->type.t & VT_BTYPE;
1985 if(t==VT_INT) {
1986 #ifdef TCC_ARM_VFP
1987 r=vfpr(gv(RC_FLOAT));
1988 u=u?0:0x10000;
1989 o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
1990 r2=intr(vtop->r=get_reg(RC_INT));
1991 o(0xEE100A10|(r<<16)|(r2<<12));
1992 return;
1993 #else
1994 if(u) {
1995 if(r2 == VT_FLOAT)
1996 func=TOK___fixunssfsi;
1997 #if LDOUBLE_SIZE != 8
1998 else if(r2 == VT_LDOUBLE)
1999 func=TOK___fixunsxfsi;
2000 else if(r2 == VT_DOUBLE)
2001 #else
2002 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2003 #endif
2004 func=TOK___fixunsdfsi;
2005 } else {
2006 r=fpr(gv(RC_FLOAT));
2007 r2=intr(vtop->r=get_reg(RC_INT));
2008 o(0xEE100170|(r2<<12)|r);
2009 return;
2011 #endif
2012 } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2013 if(r2 == VT_FLOAT)
2014 func=TOK___fixsfdi;
2015 #if LDOUBLE_SIZE != 8
2016 else if(r2 == VT_LDOUBLE)
2017 func=TOK___fixxfdi;
2018 else if(r2 == VT_DOUBLE)
2019 #else
2020 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2021 #endif
2022 func=TOK___fixdfdi;
2024 if(func) {
2025 vpush_global_sym(&func_old_type, func);
2026 vswap();
2027 gfunc_call(1);
2028 vpushi(0);
2029 if(t == VT_LLONG)
2030 vtop->r2 = REG_LRET;
2031 vtop->r = REG_IRET;
2032 return;
2034 tcc_error("unimplemented gen_cvt_ftoi!");
2037 /* convert from one floating point type to another */
2038 void gen_cvt_ftof(int t)
2040 #ifdef TCC_ARM_VFP
2041 if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2042 uint32_t r = vfpr(gv(RC_FLOAT));
2043 o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2045 #else
2046 /* all we have to do on i386 and FPA ARM is to put the float in a register */
2047 gv(RC_FLOAT);
2048 #endif
2051 /* computed goto support */
2052 void ggoto(void)
2054 gcall_or_jmp(1);
2055 vtop--;
2058 /* Save the stack pointer onto the stack and return the location of its address */
2059 ST_FUNC void gen_vla_sp_save(int addr) {
2060 tcc_error("variable length arrays unsupported for this target");
2063 /* Restore the SP from a location on the stack */
2064 ST_FUNC void gen_vla_sp_restore(int addr) {
2065 tcc_error("variable length arrays unsupported for this target");
2068 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2069 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2070 tcc_error("variable length arrays unsupported for this target");
2073 /* end of ARM code generator */
2074 /*************************************************************/
2075 #endif
2076 /*************************************************************/