Explicit that EABI only supports VFP for now
[tinycc.git] / arm-gen.c
blob05bccb0c91fa6303e1cd2c8a49913ee26369b80b
1 /*
2 * ARMv4 code generator for TCC
4 * Copyright (c) 2003 Daniel Glöckner
5 * Copyright (c) 2012 Thomas Preud'homme
7 * Based on i386-gen.c by Fabrice Bellard
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #ifdef TARGET_DEFS_ONLY
26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
27 #error "Currently TinyCC only supports float computation with VFP instructions"
28 #endif
30 /* number of available registers */
31 #ifdef TCC_ARM_VFP
32 #define NB_REGS 13
33 #else
34 #define NB_REGS 9
35 #endif
37 #ifndef TCC_ARM_VERSION
38 # define TCC_ARM_VERSION 5
39 #endif
41 /* a register can belong to several classes. The classes must be
42 sorted from more general to more precise (see gv2() code which does
43 assumptions on it). */
44 #define RC_INT 0x0001 /* generic integer register */
45 #define RC_FLOAT 0x0002 /* generic float register */
46 #define RC_R0 0x0004
47 #define RC_R1 0x0008
48 #define RC_R2 0x0010
49 #define RC_R3 0x0020
50 #define RC_R12 0x0040
51 #define RC_F0 0x0080
52 #define RC_F1 0x0100
53 #define RC_F2 0x0200
54 #define RC_F3 0x0400
55 #ifdef TCC_ARM_VFP
56 #define RC_F4 0x0800
57 #define RC_F5 0x1000
58 #define RC_F6 0x2000
59 #define RC_F7 0x4000
60 #endif
61 #define RC_IRET RC_R0 /* function return: integer register */
62 #define RC_LRET RC_R1 /* function return: second integer register */
63 #define RC_FRET RC_F0 /* function return: float register */
65 /* pretty names for the registers */
66 enum {
67 TREG_R0 = 0,
68 TREG_R1,
69 TREG_R2,
70 TREG_R3,
71 TREG_R12,
72 TREG_F0,
73 TREG_F1,
74 TREG_F2,
75 TREG_F3,
76 #ifdef TCC_ARM_VFP
77 TREG_F4,
78 TREG_F5,
79 TREG_F6,
80 TREG_F7,
81 #endif
84 #ifdef TCC_ARM_VFP
85 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
86 #endif
88 /* return registers for function */
89 #define REG_IRET TREG_R0 /* single word int return register */
90 #define REG_LRET TREG_R1 /* second word return register (for long long) */
91 #define REG_FRET TREG_F0 /* float return register */
93 #ifdef TCC_ARM_EABI
94 #define TOK___divdi3 TOK___aeabi_ldivmod
95 #define TOK___moddi3 TOK___aeabi_ldivmod
96 #define TOK___udivdi3 TOK___aeabi_uldivmod
97 #define TOK___umoddi3 TOK___aeabi_uldivmod
98 #endif
100 /* defined if function parameters must be evaluated in reverse order */
101 #define INVERT_FUNC_PARAMS
103 /* defined if structures are passed as pointers. Otherwise structures
104 are directly pushed on stack. */
105 /* #define FUNC_STRUCT_PARAM_AS_PTR */
107 /* pointer size, in bytes */
108 #define PTR_SIZE 4
110 /* long double size and alignment, in bytes */
111 #ifdef TCC_ARM_VFP
112 #define LDOUBLE_SIZE 8
113 #endif
115 #ifndef LDOUBLE_SIZE
116 #define LDOUBLE_SIZE 8
117 #endif
119 #ifdef TCC_ARM_EABI
120 #define LDOUBLE_ALIGN 8
121 #else
122 #define LDOUBLE_ALIGN 4
123 #endif
125 /* maximum alignment (for aligned attribute support) */
126 #define MAX_ALIGN 8
128 #define CHAR_IS_UNSIGNED
130 /******************************************************/
131 /* ELF defines */
133 #define EM_TCC_TARGET EM_ARM
135 /* relocation type for 32 bit data relocation */
136 #define R_DATA_32 R_ARM_ABS32
137 #define R_DATA_PTR R_ARM_ABS32
138 #define R_JMP_SLOT R_ARM_JUMP_SLOT
139 #define R_COPY R_ARM_COPY
141 #define ELF_START_ADDR 0x00008000
142 #define ELF_PAGE_SIZE 0x1000
144 /******************************************************/
145 #else /* ! TARGET_DEFS_ONLY */
146 /******************************************************/
147 #include "tcc.h"
149 ST_DATA const int reg_classes[NB_REGS] = {
150 /* r0 */ RC_INT | RC_R0,
151 /* r1 */ RC_INT | RC_R1,
152 /* r2 */ RC_INT | RC_R2,
153 /* r3 */ RC_INT | RC_R3,
154 /* r12 */ RC_INT | RC_R12,
155 /* f0 */ RC_FLOAT | RC_F0,
156 /* f1 */ RC_FLOAT | RC_F1,
157 /* f2 */ RC_FLOAT | RC_F2,
158 /* f3 */ RC_FLOAT | RC_F3,
159 #ifdef TCC_ARM_VFP
160 /* d4/s8 */ RC_FLOAT | RC_F4,
161 /* d5/s10 */ RC_FLOAT | RC_F5,
162 /* d6/s12 */ RC_FLOAT | RC_F6,
163 /* d7/s14 */ RC_FLOAT | RC_F7,
164 #endif
167 static int func_sub_sp_offset, last_itod_magic;
168 static int leaffunc;
170 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
171 static CType float_type, double_type, func_float_type, func_double_type;
172 ST_FUNC void arm_init_types(void)
174 float_type.t = VT_FLOAT;
175 double_type.t = VT_DOUBLE;
176 func_float_type.t = VT_FUNC;
177 func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
178 func_double_type.t = VT_FUNC;
179 func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
181 #else
182 #define func_float_type func_old_type
183 #define func_double_type func_old_type
184 #define func_ldouble_type func_old_type
185 ST_FUNC void arm_init_types(void) {}
186 #endif
188 static int two2mask(int a,int b) {
189 return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
192 static int regmask(int r) {
193 return reg_classes[r]&~(RC_INT|RC_FLOAT);
196 /******************************************************/
198 void o(uint32_t i)
200 /* this is a good place to start adding big-endian support*/
201 int ind1;
203 ind1 = ind + 4;
204 if (!cur_text_section)
205 tcc_error("compiler error! This happens f.ex. if the compiler\n"
206 "can't evaluate constant expressions outside of a function.");
207 if (ind1 > cur_text_section->data_allocated)
208 section_realloc(cur_text_section, ind1);
209 cur_text_section->data[ind++] = i&255;
210 i>>=8;
211 cur_text_section->data[ind++] = i&255;
212 i>>=8;
213 cur_text_section->data[ind++] = i&255;
214 i>>=8;
215 cur_text_section->data[ind++] = i;
218 static uint32_t stuff_const(uint32_t op, uint32_t c)
220 int try_neg=0;
221 uint32_t nc = 0, negop = 0;
223 switch(op&0x1F00000)
225 case 0x800000: //add
226 case 0x400000: //sub
227 try_neg=1;
228 negop=op^0xC00000;
229 nc=-c;
230 break;
231 case 0x1A00000: //mov
232 case 0x1E00000: //mvn
233 try_neg=1;
234 negop=op^0x400000;
235 nc=~c;
236 break;
237 case 0x200000: //xor
238 if(c==~0)
239 return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
240 break;
241 case 0x0: //and
242 if(c==~0)
243 return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
244 case 0x1C00000: //bic
245 try_neg=1;
246 negop=op^0x1C00000;
247 nc=~c;
248 break;
249 case 0x1800000: //orr
250 if(c==~0)
251 return (op&0xFFF0FFFF)|0x1E00000;
252 break;
254 do {
255 uint32_t m;
256 int i;
257 if(c<256) /* catch undefined <<32 */
258 return op|c;
259 for(i=2;i<32;i+=2) {
260 m=(0xff>>i)|(0xff<<(32-i));
261 if(!(c&~m))
262 return op|(i<<7)|(c<<i)|(c>>(32-i));
264 op=negop;
265 c=nc;
266 } while(try_neg--);
267 return 0;
271 //only add,sub
272 void stuff_const_harder(uint32_t op, uint32_t v) {
273 uint32_t x;
274 x=stuff_const(op,v);
275 if(x)
276 o(x);
277 else {
278 uint32_t a[16], nv, no, o2, n2;
279 int i,j,k;
280 a[0]=0xff;
281 o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
282 for(i=1;i<16;i++)
283 a[i]=(a[i-1]>>2)|(a[i-1]<<30);
284 for(i=0;i<12;i++)
285 for(j=i<4?i+12:15;j>=i+4;j--)
286 if((v&(a[i]|a[j]))==v) {
287 o(stuff_const(op,v&a[i]));
288 o(stuff_const(o2,v&a[j]));
289 return;
291 no=op^0xC00000;
292 n2=o2^0xC00000;
293 nv=-v;
294 for(i=0;i<12;i++)
295 for(j=i<4?i+12:15;j>=i+4;j--)
296 if((nv&(a[i]|a[j]))==nv) {
297 o(stuff_const(no,nv&a[i]));
298 o(stuff_const(n2,nv&a[j]));
299 return;
301 for(i=0;i<8;i++)
302 for(j=i+4;j<12;j++)
303 for(k=i<4?i+12:15;k>=j+4;k--)
304 if((v&(a[i]|a[j]|a[k]))==v) {
305 o(stuff_const(op,v&a[i]));
306 o(stuff_const(o2,v&a[j]));
307 o(stuff_const(o2,v&a[k]));
308 return;
310 no=op^0xC00000;
311 nv=-v;
312 for(i=0;i<8;i++)
313 for(j=i+4;j<12;j++)
314 for(k=i<4?i+12:15;k>=j+4;k--)
315 if((nv&(a[i]|a[j]|a[k]))==nv) {
316 o(stuff_const(no,nv&a[i]));
317 o(stuff_const(n2,nv&a[j]));
318 o(stuff_const(n2,nv&a[k]));
319 return;
321 o(stuff_const(op,v&a[0]));
322 o(stuff_const(o2,v&a[4]));
323 o(stuff_const(o2,v&a[8]));
324 o(stuff_const(o2,v&a[12]));
328 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
330 addr-=pos+8;
331 addr/=4;
332 if(addr>=0x1000000 || addr<-0x1000000) {
333 if(fail)
334 tcc_error("FIXME: function bigger than 32MB");
335 return 0;
337 return 0x0A000000|(addr&0xffffff);
340 int decbranch(int pos)
342 int x;
343 x=*(uint32_t *)(cur_text_section->data + pos);
344 x&=0x00ffffff;
345 if(x&0x800000)
346 x-=0x1000000;
347 return x*4+pos+8;
350 /* output a symbol and patch all calls to it */
351 void gsym_addr(int t, int a)
353 uint32_t *x;
354 int lt;
355 while(t) {
356 x=(uint32_t *)(cur_text_section->data + t);
357 t=decbranch(lt=t);
358 if(a==lt+4)
359 *x=0xE1A00000; // nop
360 else {
361 *x &= 0xff000000;
362 *x |= encbranch(lt,a,1);
367 void gsym(int t)
369 gsym_addr(t, ind);
372 #ifdef TCC_ARM_VFP
373 static uint32_t vfpr(int r)
375 if(r<TREG_F0 || r>TREG_F7)
376 tcc_error("compiler error! register %i is no vfp register",r);
377 return r-5;
379 #else
380 static uint32_t fpr(int r)
382 if(r<TREG_F0 || r>TREG_F3)
383 tcc_error("compiler error! register %i is no fpa register",r);
384 return r-5;
386 #endif
388 static uint32_t intr(int r)
390 if(r==4)
391 return 12;
392 if((r<0 || r>4) && r!=14)
393 tcc_error("compiler error! register %i is no int register",r);
394 return r;
397 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
399 if(*off>maxoff || *off&((1<<shift)-1)) {
400 uint32_t x, y;
401 x=0xE280E000;
402 if(*sgn)
403 x=0xE240E000;
404 x|=(*base)<<16;
405 *base=14; // lr
406 y=stuff_const(x,*off&~maxoff);
407 if(y) {
408 o(y);
409 *off&=maxoff;
410 return;
412 y=stuff_const(x,(*off+maxoff)&~maxoff);
413 if(y) {
414 o(y);
415 *sgn=!*sgn;
416 *off=((*off+maxoff)&~maxoff)-*off;
417 return;
419 stuff_const_harder(x,*off&~maxoff);
420 *off&=maxoff;
424 static uint32_t mapcc(int cc)
426 switch(cc)
428 case TOK_ULT:
429 return 0x30000000; /* CC/LO */
430 case TOK_UGE:
431 return 0x20000000; /* CS/HS */
432 case TOK_EQ:
433 return 0x00000000; /* EQ */
434 case TOK_NE:
435 return 0x10000000; /* NE */
436 case TOK_ULE:
437 return 0x90000000; /* LS */
438 case TOK_UGT:
439 return 0x80000000; /* HI */
440 case TOK_Nset:
441 return 0x40000000; /* MI */
442 case TOK_Nclear:
443 return 0x50000000; /* PL */
444 case TOK_LT:
445 return 0xB0000000; /* LT */
446 case TOK_GE:
447 return 0xA0000000; /* GE */
448 case TOK_LE:
449 return 0xD0000000; /* LE */
450 case TOK_GT:
451 return 0xC0000000; /* GT */
453 tcc_error("unexpected condition code");
454 return 0xE0000000; /* AL */
457 static int negcc(int cc)
459 switch(cc)
461 case TOK_ULT:
462 return TOK_UGE;
463 case TOK_UGE:
464 return TOK_ULT;
465 case TOK_EQ:
466 return TOK_NE;
467 case TOK_NE:
468 return TOK_EQ;
469 case TOK_ULE:
470 return TOK_UGT;
471 case TOK_UGT:
472 return TOK_ULE;
473 case TOK_Nset:
474 return TOK_Nclear;
475 case TOK_Nclear:
476 return TOK_Nset;
477 case TOK_LT:
478 return TOK_GE;
479 case TOK_GE:
480 return TOK_LT;
481 case TOK_LE:
482 return TOK_GT;
483 case TOK_GT:
484 return TOK_LE;
486 tcc_error("unexpected condition code");
487 return TOK_NE;
490 /* load 'r' from value 'sv' */
491 void load(int r, SValue *sv)
493 int v, ft, fc, fr, sign;
494 uint32_t op;
495 SValue v1;
497 fr = sv->r;
498 ft = sv->type.t;
499 fc = sv->c.ul;
501 if(fc>=0)
502 sign=0;
503 else {
504 sign=1;
505 fc=-fc;
508 v = fr & VT_VALMASK;
509 if (fr & VT_LVAL) {
510 uint32_t base = 0xB; // fp
511 if(v == VT_LLOCAL) {
512 v1.type.t = VT_PTR;
513 v1.r = VT_LOCAL | VT_LVAL;
514 v1.c.ul = sv->c.ul;
515 load(base=14 /* lr */, &v1);
516 fc=sign=0;
517 v=VT_LOCAL;
518 } else if(v == VT_CONST) {
519 v1.type.t = VT_PTR;
520 v1.r = fr&~VT_LVAL;
521 v1.c.ul = sv->c.ul;
522 v1.sym=sv->sym;
523 load(base=14, &v1);
524 fc=sign=0;
525 v=VT_LOCAL;
526 } else if(v < VT_CONST) {
527 base=intr(v);
528 fc=sign=0;
529 v=VT_LOCAL;
531 if(v == VT_LOCAL) {
532 if(is_float(ft)) {
533 calcaddr(&base,&fc,&sign,1020,2);
534 #ifdef TCC_ARM_VFP
535 op=0xED100A00; /* flds */
536 if(!sign)
537 op|=0x800000;
538 if ((ft & VT_BTYPE) != VT_FLOAT)
539 op|=0x100; /* flds -> fldd */
540 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
541 #else
542 op=0xED100100;
543 if(!sign)
544 op|=0x800000;
545 #if LDOUBLE_SIZE == 8
546 if ((ft & VT_BTYPE) != VT_FLOAT)
547 op|=0x8000;
548 #else
549 if ((ft & VT_BTYPE) == VT_DOUBLE)
550 op|=0x8000;
551 else if ((ft & VT_BTYPE) == VT_LDOUBLE)
552 op|=0x400000;
553 #endif
554 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
555 #endif
556 } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
557 || (ft & VT_BTYPE) == VT_SHORT) {
558 calcaddr(&base,&fc,&sign,255,0);
559 op=0xE1500090;
560 if ((ft & VT_BTYPE) == VT_SHORT)
561 op|=0x20;
562 if ((ft & VT_UNSIGNED) == 0)
563 op|=0x40;
564 if(!sign)
565 op|=0x800000;
566 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
567 } else {
568 calcaddr(&base,&fc,&sign,4095,0);
569 op=0xE5100000;
570 if(!sign)
571 op|=0x800000;
572 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
573 op|=0x400000;
574 o(op|(intr(r)<<12)|fc|(base<<16));
576 return;
578 } else {
579 if (v == VT_CONST) {
580 op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
581 if (fr & VT_SYM || !op) {
582 o(0xE59F0000|(intr(r)<<12));
583 o(0xEA000000);
584 if(fr & VT_SYM)
585 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
586 o(sv->c.ul);
587 } else
588 o(op);
589 return;
590 } else if (v == VT_LOCAL) {
591 op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
592 if (fr & VT_SYM || !op) {
593 o(0xE59F0000|(intr(r)<<12));
594 o(0xEA000000);
595 if(fr & VT_SYM) // needed ?
596 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
597 o(sv->c.ul);
598 o(0xE08B0000|(intr(r)<<12)|intr(r));
599 } else
600 o(op);
601 return;
602 } else if(v == VT_CMP) {
603 o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
604 o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
605 return;
606 } else if (v == VT_JMP || v == VT_JMPI) {
607 int t;
608 t = v & 1;
609 o(0xE3A00000|(intr(r)<<12)|t);
610 o(0xEA000000);
611 gsym(sv->c.ul);
612 o(0xE3A00000|(intr(r)<<12)|(t^1));
613 return;
614 } else if (v < VT_CONST) {
615 if(is_float(ft))
616 #ifdef TCC_ARM_VFP
617 o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
618 #else
619 o(0xEE008180|(fpr(r)<<12)|fpr(v));
620 #endif
621 else
622 o(0xE1A00000|(intr(r)<<12)|intr(v));
623 return;
626 tcc_error("load unimplemented!");
629 /* store register 'r' in lvalue 'v' */
630 void store(int r, SValue *sv)
632 SValue v1;
633 int v, ft, fc, fr, sign;
634 uint32_t op;
636 fr = sv->r;
637 ft = sv->type.t;
638 fc = sv->c.ul;
640 if(fc>=0)
641 sign=0;
642 else {
643 sign=1;
644 fc=-fc;
647 v = fr & VT_VALMASK;
648 if (fr & VT_LVAL || fr == VT_LOCAL) {
649 uint32_t base = 0xb;
650 if(v < VT_CONST) {
651 base=intr(v);
652 v=VT_LOCAL;
653 fc=sign=0;
654 } else if(v == VT_CONST) {
655 v1.type.t = ft;
656 v1.r = fr&~VT_LVAL;
657 v1.c.ul = sv->c.ul;
658 v1.sym=sv->sym;
659 load(base=14, &v1);
660 fc=sign=0;
661 v=VT_LOCAL;
663 if(v == VT_LOCAL) {
664 if(is_float(ft)) {
665 calcaddr(&base,&fc,&sign,1020,2);
666 #ifdef TCC_ARM_VFP
667 op=0xED000A00; /* fsts */
668 if(!sign)
669 op|=0x800000;
670 if ((ft & VT_BTYPE) != VT_FLOAT)
671 op|=0x100; /* fsts -> fstd */
672 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
673 #else
674 op=0xED000100;
675 if(!sign)
676 op|=0x800000;
677 #if LDOUBLE_SIZE == 8
678 if ((ft & VT_BTYPE) != VT_FLOAT)
679 op|=0x8000;
680 #else
681 if ((ft & VT_BTYPE) == VT_DOUBLE)
682 op|=0x8000;
683 if ((ft & VT_BTYPE) == VT_LDOUBLE)
684 op|=0x400000;
685 #endif
686 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
687 #endif
688 return;
689 } else if((ft & VT_BTYPE) == VT_SHORT) {
690 calcaddr(&base,&fc,&sign,255,0);
691 op=0xE14000B0;
692 if(!sign)
693 op|=0x800000;
694 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
695 } else {
696 calcaddr(&base,&fc,&sign,4095,0);
697 op=0xE5000000;
698 if(!sign)
699 op|=0x800000;
700 if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
701 op|=0x400000;
702 o(op|(intr(r)<<12)|fc|(base<<16));
704 return;
707 tcc_error("store unimplemented");
710 static void gadd_sp(int val)
712 stuff_const_harder(0xE28DD000,val);
715 /* 'is_jmp' is '1' if it is a jump */
716 static void gcall_or_jmp(int is_jmp)
718 int r;
719 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
720 uint32_t x;
721 /* constant case */
722 x=encbranch(ind,ind+vtop->c.ul,0);
723 if(x) {
724 if (vtop->r & VT_SYM) {
725 /* relocation case */
726 greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
727 } else
728 put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
729 o(x|(is_jmp?0xE0000000:0xE1000000));
730 } else {
731 if(!is_jmp)
732 o(0xE28FE004); // add lr,pc,#4
733 o(0xE51FF004); // ldr pc,[pc,#-4]
734 if (vtop->r & VT_SYM)
735 greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
736 o(vtop->c.ul);
738 } else {
739 /* otherwise, indirect call */
740 r = gv(RC_INT);
741 if(!is_jmp)
742 o(0xE1A0E00F); // mov lr,pc
743 o(0xE1A0F000|intr(r)); // mov pc,r
747 /* Return whether a structure is an homogeneous float aggregate or not.
748 The answer is true if all the elements of the structure are of the same
749 primitive float type and there is less than 4 elements.
751 type: the type corresponding to the structure to be tested */
752 static int is_hgen_float_aggr(CType *type)
754 if ((type->t & VT_BTYPE) == VT_STRUCT) {
755 struct Sym *ref;
756 int btype, nb_fields = 0;
758 ref = type->ref->next;
759 btype = ref->type.t & VT_BTYPE;
760 if (btype == VT_FLOAT || btype == VT_DOUBLE) {
761 for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
762 return !ref && nb_fields <= 4;
765 return 0;
768 struct avail_regs {
769 signed char avail[3]; /* 3 holes max with only float and double alignments */
770 int first_hole; /* first available hole */
771 int last_hole; /* last available hole (none if equal to first_hole) */
772 int first_free_reg; /* next free register in the sequence, hole excluded */
775 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
777 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
778 param) according to the rules described in the procedure call standard for
779 the ARM architecture (AAPCS). If found, the registers are assigned to this
780 VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
781 and the parameter is a single float.
783 avregs: opaque structure to keep track of available VFP co-processor regs
784 align: alignment contraints for the param, as returned by type_size()
785 size: size of the parameter, as returned by type_size() */
786 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
788 int first_reg = 0;
790 if (avregs->first_free_reg == -1)
791 return -1;
792 if (align >> 3) { /* double alignment */
793 first_reg = avregs->first_free_reg;
794 /* alignment contraint not respected so use next reg and record hole */
795 if (first_reg & 1)
796 avregs->avail[avregs->last_hole++] = first_reg++;
797 } else { /* no special alignment (float or array of float) */
798 /* if single float and a hole is available, assign the param to it */
799 if (size == 4 && avregs->first_hole != avregs->last_hole)
800 return avregs->avail[avregs->first_hole++];
801 else
802 first_reg = avregs->first_free_reg;
804 if (first_reg + size / 4 <= 16) {
805 avregs->first_free_reg = first_reg + size / 4;
806 return first_reg;
808 avregs->first_free_reg = -1;
809 return -1;
812 /* Returns whether all params need to be passed in core registers or not.
813 This is the case for function part of the runtime ABI. */
814 int floats_in_core_regs(SValue *sval)
816 if (!sval->sym)
817 return 0;
819 switch (sval->sym->v) {
820 case TOK___floatundisf:
821 case TOK___floatundidf:
822 case TOK___fixunssfdi:
823 case TOK___fixunsdfdi:
824 #ifndef TCC_ARM_VFP
825 case TOK___fixunsxfdi:
826 #endif
827 case TOK___floatdisf:
828 case TOK___floatdidf:
829 case TOK___fixsfdi:
830 case TOK___fixdfdi:
831 return 1;
833 default:
834 return 0;
838 /* Return the number of registers needed to return the struct, or 0 if
839 returning via struct pointer. */
840 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align) {
841 #ifdef TCC_ARM_EABI
842 int size, align;
843 size = type_size(vt, &align);
844 #ifdef TCC_ARM_HARDFLOAT
845 if (!variadic && (is_float(vt->t) || is_hgen_float_aggr(vt))) {
846 *ret_align = 8;
847 ret->ref = NULL;
848 ret->t = VT_DOUBLE;
849 return (size + 7) >> 3;
850 } else
851 #endif
852 if (size > 4) {
853 return 0;
854 } else {
855 *ret_align = 4;
856 ret->ref = NULL;
857 ret->t = VT_INT;
858 return 1;
860 #else
861 return 0;
862 #endif
865 /* Parameters are classified according to how they are copied to their final
866 destination for the function call. Because the copying is performed class
867 after class according to the order in the union below, it is important that
868 some constraints about the order of the members of this union are respected:
869 - CORE_STRUCT_CLASS must come after STACK_CLASS;
870 - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
871 VFP_STRUCT_CLASS;
872 - VFP_STRUCT_CLASS must come after VFP_CLASS.
873 See the comment for the main loop in copy_params() for the reason. */
874 enum reg_class {
875 STACK_CLASS = 0,
876 CORE_STRUCT_CLASS,
877 VFP_CLASS,
878 VFP_STRUCT_CLASS,
879 CORE_CLASS,
880 NB_CLASSES
883 struct param_plan {
884 int start; /* first reg or addr used depending on the class */
885 int end; /* last reg used or next free addr depending on the class */
886 SValue *sval; /* pointer to SValue on the value stack */
887 struct param_plan *prev; /* previous element in this class */
890 struct plan {
891 struct param_plan *pplans; /* array of all the param plans */
892 struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
895 #define add_param_plan(plan,pplan,class) \
896 do { \
897 pplan.prev = plan->clsplans[class]; \
898 plan->pplans[plan ## _nb] = pplan; \
899 plan->clsplans[class] = &plan->pplans[plan ## _nb++]; \
900 } while(0)
902 /* Assign parameters to registers and stack with alignment according to the
903 rules in the procedure call standard for the ARM architecture (AAPCS).
904 The overall assignment is recorded in an array of per parameter structures
905 called parameter plans. The parameter plans are also further organized in a
906 number of linked lists, one per class of parameter (see the comment for the
907 definition of union reg_class).
909 nb_args: number of parameters of the function for which a call is generated
910 corefloat: whether to pass float via core registers or not
911 plan: the structure where the overall assignment is recorded
912 todo: a bitmap that record which core registers hold a parameter
914 Returns the amount of stack space needed for parameter passing
916 Note: this function allocated an array in plan->pplans with tcc_malloc. It
917 is the responsability of the caller to free this array once used (ie not
918 before copy_params). */
919 static int assign_regs(int nb_args, int corefloat, struct plan *plan, int *todo)
921 int i, size, align;
922 int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
923 int plan_nb = 0;
924 struct param_plan pplan;
925 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
927 ncrn = nsaa = 0;
928 *todo = 0;
929 plan->pplans = tcc_malloc(nb_args * sizeof(*plan->pplans));
930 memset(plan->clsplans, 0, sizeof(plan->clsplans));
931 for(i = nb_args; i-- ;) {
932 int j, start_vfpreg = 0;
933 size = type_size(&vtop[-i].type, &align);
934 switch(vtop[-i].type.t & VT_BTYPE) {
935 case VT_STRUCT:
936 case VT_FLOAT:
937 case VT_DOUBLE:
938 case VT_LDOUBLE:
939 if (!corefloat) {
940 int is_hfa = 0; /* Homogeneous float aggregate */
942 if (is_float(vtop[-i].type.t)
943 || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
944 int end_vfpreg;
946 start_vfpreg = assign_vfpreg(&avregs, align, size);
947 end_vfpreg = start_vfpreg + ((size - 1) >> 2);
948 if (start_vfpreg >= 0) {
949 pplan = (struct param_plan) {start_vfpreg, end_vfpreg, &vtop[-i]};
950 if (is_hfa)
951 add_param_plan(plan, pplan, VFP_STRUCT_CLASS);
952 else
953 add_param_plan(plan, pplan, VFP_CLASS);
954 continue;
955 } else
956 break;
959 ncrn = (ncrn + (align-1)/4) & -(align/4);
960 size = (size + 3) & -4;
961 if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
962 /* The parameter is allocated both in core register and on stack. As
963 * such, it can be of either class: it would either be the last of
964 * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
965 for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
966 *todo|=(1<<j);
967 pplan = (struct param_plan) {ncrn, j, &vtop[-i]};
968 add_param_plan(plan, pplan, CORE_STRUCT_CLASS);
969 ncrn += size/4;
970 if (ncrn > 4)
971 nsaa = (ncrn - 4) * 4;
972 } else {
973 ncrn = 4;
974 break;
976 continue;
977 default:
978 if (ncrn < 4) {
979 int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
981 if (is_long) {
982 ncrn = (ncrn + 1) & -2;
983 if (ncrn == 4)
984 break;
986 pplan = (struct param_plan) {ncrn, ncrn, &vtop[-i]};
987 ncrn++;
988 if (is_long)
989 pplan.end = ncrn++;
990 add_param_plan(plan, pplan, CORE_CLASS);
991 continue;
994 nsaa = (nsaa + (align - 1)) & ~(align - 1);
995 pplan = (struct param_plan) {nsaa, nsaa + size, &vtop[-i]};
996 add_param_plan(plan, pplan, STACK_CLASS);
997 nsaa += size; /* size already rounded up before */
999 return nsaa;
1002 #undef add_param_plan
1004 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1005 function call.
1007 nb_args: number of parameters the function take
1008 plan: the overall assignment plan for parameters
1009 todo: a bitmap indicating what core reg will hold a parameter
1011 Returns the number of SValue added by this function on the value stack */
1012 static int copy_params(int nb_args, struct plan *plan, int todo)
1014 int size, align, r, i, nb_extra_sval = 0;
1015 struct param_plan *pplan;
1017 /* Several constraints require parameters to be copied in a specific order:
1018 - structures are copied to the stack before being loaded in a reg;
1019 - floats loaded to an odd numbered VFP reg are first copied to the
1020 preceding even numbered VFP reg and then moved to the next VFP reg.
1022 It is thus important that:
1023 - structures assigned to core regs must be copied after parameters
1024 assigned to the stack but before structures assigned to VFP regs because
1025 a structure can lie partly in core registers and partly on the stack;
1026 - parameters assigned to the stack and all structures be copied before
1027 parameters assigned to a core reg since copying a parameter to the stack
1028 require using a core reg;
1029 - parameters assigned to VFP regs be copied before structures assigned to
1030 VFP regs as the copy might use an even numbered VFP reg that already
1031 holds part of a structure. */
1032 for(i = 0; i < NB_CLASSES; i++) {
1033 for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1034 vpushv(pplan->sval);
1035 pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1036 switch(i) {
1037 case STACK_CLASS:
1038 case CORE_STRUCT_CLASS:
1039 case VFP_STRUCT_CLASS:
1040 if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1041 int padding = 0;
1042 size = type_size(&pplan->sval->type, &align);
1043 /* align to stack align size */
1044 size = (size + 3) & ~3;
1045 if (i == STACK_CLASS && pplan->prev)
1046 padding = pplan->start - pplan->prev->end;
1047 size += padding; /* Add padding if any */
1048 /* allocate the necessary size on stack */
1049 gadd_sp(-size);
1050 /* generate structure store */
1051 r = get_reg(RC_INT);
1052 o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1053 vset(&vtop->type, r | VT_LVAL, 0);
1054 vswap();
1055 vstore(); /* memcpy to current sp + potential padding */
1057 /* Homogeneous float aggregate are loaded to VFP registers
1058 immediately since there is no way of loading data in multiple
1059 non consecutive VFP registers as what is done for other
1060 structures (see the use of todo). */
1061 if (i == VFP_STRUCT_CLASS) {
1062 int first = pplan->start, nb = pplan->end - first + 1;
1063 /* vpop.32 {pplan->start, ..., pplan->end} */
1064 o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1065 /* No need to write the register used to a SValue since VFP regs
1066 cannot be used for gcall_or_jmp */
1068 } else {
1069 if (is_float(pplan->sval->type.t)) {
1070 #ifdef TCC_ARM_VFP
1071 r = vfpr(gv(RC_FLOAT)) << 12;
1072 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1073 size = 4;
1074 else {
1075 size = 8;
1076 r |= 0x101; /* vpush.32 -> vpush.64 */
1078 o(0xED2D0A01 + r); /* vpush */
1079 #else
1080 r = fpr(gv(RC_FLOAT)) << 12;
1081 if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1082 size = 4;
1083 else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1084 size = 8;
1085 else
1086 size = LDOUBLE_SIZE;
1088 if (size == 12)
1089 r |= 0x400000;
1090 else if(size == 8)
1091 r|=0x8000;
1093 o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1094 #endif
1095 } else {
1096 /* simple type (currently always same size) */
1097 /* XXX: implicit cast ? */
1098 size=4;
1099 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1100 lexpand_nr();
1101 size = 8;
1102 r = gv(RC_INT);
1103 o(0xE52D0004|(intr(r)<<12)); /* push r */
1104 vtop--;
1106 r = gv(RC_INT);
1107 o(0xE52D0004|(intr(r)<<12)); /* push r */
1109 if (i == STACK_CLASS && pplan->prev)
1110 gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1112 break;
1114 case VFP_CLASS:
1115 gv(regmask(TREG_F0 + (pplan->start >> 1)));
1116 if (pplan->start & 1) { /* Must be in upper part of double register */
1117 o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1118 vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1120 break;
1122 case CORE_CLASS:
1123 if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1124 lexpand_nr();
1125 gv(regmask(pplan->end));
1126 pplan->sval->r2 = vtop->r;
1127 vtop--;
1129 gv(regmask(pplan->start));
1130 /* Mark register as used so that gcall_or_jmp use another one
1131 (regs >=4 are free as never used to pass parameters) */
1132 pplan->sval->r = vtop->r;
1133 break;
1135 vtop--;
1139 /* Manually free remaining registers since next parameters are loaded
1140 * manually, without the help of gv(int). */
1141 save_regs(nb_args);
1143 if(todo) {
1144 o(0xE8BD0000|todo); /* pop {todo} */
1145 for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1146 int r;
1147 pplan->sval->r = pplan->start;
1148 /* An SValue can only pin 2 registers at best (r and r2) but a structure
1149 can occupy more than 2 registers. Thus, we need to push on the value
1150 stack some fake parameter to have on SValue for each registers used
1151 by a structure (r2 is not used). */
1152 for (r = pplan->start + 1; r <= pplan->end; r++) {
1153 if (todo & (1 << r)) {
1154 nb_extra_sval++;
1155 vpushi(0);
1156 vtop->r = r;
1161 return nb_extra_sval;
1164 /* Generate function call. The function address is pushed first, then
1165 all the parameters in call order. This functions pops all the
1166 parameters and the function address. */
1167 void gfunc_call(int nb_args)
1169 int r, args_size;
1170 int variadic, corefloat = 1;
1171 int todo;
1172 struct plan plan;
1174 #ifdef TCC_ARM_HARDFLOAT
1175 variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
1176 corefloat = variadic || floats_in_core_regs(&vtop[-nb_args]);
1177 #endif
1178 /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1179 VT_JMP anywhere except on the top of the stack because it would complicate
1180 the code generator. */
1181 r = vtop->r & VT_VALMASK;
1182 if (r == VT_CMP || (r & ~1) == VT_JMP)
1183 gv(RC_INT);
1185 args_size = assign_regs(nb_args, corefloat, &plan, &todo);
1187 #ifdef TCC_ARM_EABI
1188 if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1189 args_size = (args_size + 7) & ~7;
1190 o(0xE24DD004); /* sub sp, sp, #4 */
1192 #endif
1194 nb_args += copy_params(nb_args, &plan, todo);
1195 tcc_free(plan.pplans);
1197 /* Move fct SValue on top as required by gcall_or_jmp */
1198 vrotb(nb_args + 1);
1199 gcall_or_jmp(0);
1200 if (args_size)
1201 gadd_sp(args_size); /* pop all parameters passed on the stack */
1202 #ifdef TCC_ARM_EABI
1203 #ifdef TCC_ARM_VFP
1204 if(corefloat && is_float(vtop->type.ref->type.t)) {
1205 if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1206 o(0xEE000A10); /*vmov s0, r0 */
1207 } else {
1208 o(0xEE000B10); /* vmov.32 d0[0], r0 */
1209 o(0xEE201B10); /* vmov.32 d0[1], r1 */
1212 #endif
1213 #endif
1214 vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1215 leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1218 /* generate function prolog of type 't' */
1219 void gfunc_prolog(CType *func_type)
1221 Sym *sym,*sym2;
1222 int n, nf, size, align, struct_ret = 0;
1223 #ifdef TCC_ARM_HARDFLOAT
1224 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1225 #endif
1226 CType ret_type;
1228 sym = func_type->ref;
1229 func_vt = sym->type;
1230 func_var = (func_type->ref->c == FUNC_ELLIPSIS);
1232 n = nf = 0;
1233 if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1234 !gfunc_sret(&func_vt, func_var, &ret_type, &align))
1236 n++;
1237 struct_ret = 1;
1238 func_vc = 12; /* Offset from fp of the place to store the result */
1240 for(sym2=sym->next;sym2 && (n<4 || nf<16);sym2=sym2->next) {
1241 size = type_size(&sym2->type, &align);
1242 #ifdef TCC_ARM_HARDFLOAT
1243 if (!func_var && (is_float(sym2->type.t)
1244 || is_hgen_float_aggr(&sym2->type))) {
1245 int tmpnf = assign_vfpreg(&avregs, align, size);
1246 tmpnf += (size + 3) / 4;
1247 nf = (tmpnf > nf) ? tmpnf : nf;
1248 } else
1249 #endif
1250 if (n < 4)
1251 n += (size + 3) / 4;
1253 o(0xE1A0C00D); /* mov ip,sp */
1254 if (func_var)
1255 n=4;
1256 if (n) {
1257 if(n>4)
1258 n=4;
1259 #ifdef TCC_ARM_EABI
1260 n=(n+1)&-2;
1261 #endif
1262 o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1264 if (nf) {
1265 if (nf>16)
1266 nf=16;
1267 nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1268 o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1270 o(0xE92D5800); /* save fp, ip, lr */
1271 o(0xE1A0B00D); /* mov fp, sp */
1272 func_sub_sp_offset = ind;
1273 o(0xE1A00000); /* nop, leave space for stack adjustment in epilogue */
1275 int addr, pn = struct_ret, sn = 0; /* pn=core, sn=stack */
1277 #ifdef TCC_ARM_HARDFLOAT
1278 func_vc += nf * 4;
1279 avregs = AVAIL_REGS_INITIALIZER;
1280 #endif
1281 while ((sym = sym->next)) {
1282 CType *type;
1283 type = &sym->type;
1284 size = type_size(type, &align);
1285 size = (size + 3) >> 2;
1286 align = (align + 3) & ~3;
1287 #ifdef TCC_ARM_HARDFLOAT
1288 if (!func_var && (is_float(sym->type.t)
1289 || is_hgen_float_aggr(&sym->type))) {
1290 int fpn = assign_vfpreg(&avregs, align, size << 2);
1291 if (fpn >= 0) {
1292 addr = fpn * 4;
1293 } else
1294 goto from_stack;
1295 } else
1296 #endif
1297 if (pn < 4) {
1298 #ifdef TCC_ARM_EABI
1299 pn = (pn + (align-1)/4) & -(align/4);
1300 #endif
1301 addr = (nf + pn) * 4;
1302 pn += size;
1303 if (!sn && pn > 4)
1304 sn = (pn - 4);
1305 } else {
1306 #ifdef TCC_ARM_HARDFLOAT
1307 from_stack:
1308 #endif
1309 #ifdef TCC_ARM_EABI
1310 sn = (sn + (align-1)/4) & -(align/4);
1311 #endif
1312 addr = (n + nf + sn) * 4;
1313 sn += size;
1315 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr+12);
1318 last_itod_magic=0;
1319 leaffunc = 1;
1320 loc = 0;
1323 /* generate function epilog */
1324 void gfunc_epilog(void)
1326 uint32_t x;
1327 int diff;
1328 /* Copy float return value to core register if base standard is used and
1329 float computation is made with VFP */
1330 #ifdef TCC_ARM_EABI
1331 if (
1332 #ifdef TCC_ARM_HARDFLOAT
1333 func_var &&
1334 #endif
1335 is_float(func_vt.t)) {
1336 if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1337 o(0xEE100A10); /* fmrs r0, s0 */
1338 else {
1339 o(0xEE100B10); /* fmrdl r0, d0 */
1340 o(0xEE301B10); /* fmrdh r1, d0 */
1343 #endif
1344 o(0xE89BA800); /* restore fp, sp, pc */
1345 diff = (-loc + 3) & -4;
1346 #ifdef TCC_ARM_EABI
1347 if(!leaffunc)
1348 diff = ((diff + 11) & -8) - 4;
1349 #endif
1350 if(diff > 0) {
1351 x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1352 if(x)
1353 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1354 else {
1355 int addr;
1356 addr=ind;
1357 o(0xE59FC004); /* ldr ip,[pc+4] */
1358 o(0xE04BD00C); /* sub sp,fp,ip */
1359 o(0xE1A0F00E); /* mov pc,lr */
1360 o(diff);
1361 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1366 /* generate a jump to a label */
1367 int gjmp(int t)
1369 int r;
1370 r=ind;
1371 o(0xE0000000|encbranch(r,t,1));
1372 return r;
1375 /* generate a jump to a fixed address */
1376 void gjmp_addr(int a)
1378 gjmp(a);
1381 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1382 int gtst(int inv, int t)
1384 int v, r;
1385 uint32_t op;
1386 v = vtop->r & VT_VALMASK;
1387 r=ind;
1388 if (v == VT_CMP) {
1389 op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1390 op|=encbranch(r,t,1);
1391 o(op);
1392 t=r;
1393 } else { /* VT_JMP || VT_JMPI */
1394 if ((v & 1) == inv) {
1395 if(!vtop->c.i)
1396 vtop->c.i=t;
1397 else {
1398 uint32_t *x;
1399 int p,lp;
1400 if(t) {
1401 p = vtop->c.i;
1402 do {
1403 p = decbranch(lp=p);
1404 } while(p);
1405 x = (uint32_t *)(cur_text_section->data + lp);
1406 *x &= 0xff000000;
1407 *x |= encbranch(lp,t,1);
1409 t = vtop->c.i;
1411 } else {
1412 t = gjmp(t);
1413 gsym(vtop->c.i);
1416 vtop--;
1417 return t;
1420 /* generate an integer binary operation */
1421 void gen_opi(int op)
1423 int c, func = 0;
1424 uint32_t opc = 0, r, fr;
1425 unsigned short retreg = REG_IRET;
1427 c=0;
1428 switch(op) {
1429 case '+':
1430 opc = 0x8;
1431 c=1;
1432 break;
1433 case TOK_ADDC1: /* add with carry generation */
1434 opc = 0x9;
1435 c=1;
1436 break;
1437 case '-':
1438 opc = 0x4;
1439 c=1;
1440 break;
1441 case TOK_SUBC1: /* sub with carry generation */
1442 opc = 0x5;
1443 c=1;
1444 break;
1445 case TOK_ADDC2: /* add with carry use */
1446 opc = 0xA;
1447 c=1;
1448 break;
1449 case TOK_SUBC2: /* sub with carry use */
1450 opc = 0xC;
1451 c=1;
1452 break;
1453 case '&':
1454 opc = 0x0;
1455 c=1;
1456 break;
1457 case '^':
1458 opc = 0x2;
1459 c=1;
1460 break;
1461 case '|':
1462 opc = 0x18;
1463 c=1;
1464 break;
1465 case '*':
1466 gv2(RC_INT, RC_INT);
1467 r = vtop[-1].r;
1468 fr = vtop[0].r;
1469 vtop--;
1470 o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1471 return;
1472 case TOK_SHL:
1473 opc = 0;
1474 c=2;
1475 break;
1476 case TOK_SHR:
1477 opc = 1;
1478 c=2;
1479 break;
1480 case TOK_SAR:
1481 opc = 2;
1482 c=2;
1483 break;
1484 case '/':
1485 case TOK_PDIV:
1486 func=TOK___divsi3;
1487 c=3;
1488 break;
1489 case TOK_UDIV:
1490 func=TOK___udivsi3;
1491 c=3;
1492 break;
1493 case '%':
1494 #ifdef TCC_ARM_EABI
1495 func=TOK___aeabi_idivmod;
1496 retreg=REG_LRET;
1497 #else
1498 func=TOK___modsi3;
1499 #endif
1500 c=3;
1501 break;
1502 case TOK_UMOD:
1503 #ifdef TCC_ARM_EABI
1504 func=TOK___aeabi_uidivmod;
1505 retreg=REG_LRET;
1506 #else
1507 func=TOK___umodsi3;
1508 #endif
1509 c=3;
1510 break;
1511 case TOK_UMULL:
1512 gv2(RC_INT, RC_INT);
1513 r=intr(vtop[-1].r2=get_reg(RC_INT));
1514 c=vtop[-1].r;
1515 vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1516 vtop--;
1517 o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1518 return;
1519 default:
1520 opc = 0x15;
1521 c=1;
1522 break;
1524 switch(c) {
1525 case 1:
1526 if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1527 if(opc == 4 || opc == 5 || opc == 0xc) {
1528 vswap();
1529 opc|=2; // sub -> rsb
1532 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1533 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1534 gv(RC_INT);
1535 vswap();
1536 c=intr(gv(RC_INT));
1537 vswap();
1538 opc=0xE0000000|(opc<<20)|(c<<16);
1539 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1540 uint32_t x;
1541 x=stuff_const(opc|0x2000000,vtop->c.i);
1542 if(x) {
1543 r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1544 o(x|(r<<12));
1545 goto done;
1548 fr=intr(gv(RC_INT));
1549 r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1550 o(opc|(r<<12)|fr);
1551 done:
1552 vtop--;
1553 if (op >= TOK_ULT && op <= TOK_GT) {
1554 vtop->r = VT_CMP;
1555 vtop->c.i = op;
1557 break;
1558 case 2:
1559 opc=0xE1A00000|(opc<<5);
1560 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1561 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1562 gv(RC_INT);
1563 vswap();
1564 r=intr(gv(RC_INT));
1565 vswap();
1566 opc|=r;
1567 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1568 fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1569 c = vtop->c.i & 0x1f;
1570 o(opc|(c<<7)|(fr<<12));
1571 } else {
1572 fr=intr(gv(RC_INT));
1573 c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1574 o(opc|(c<<12)|(fr<<8)|0x10);
1576 vtop--;
1577 break;
1578 case 3:
1579 vpush_global_sym(&func_old_type, func);
1580 vrott(3);
1581 gfunc_call(2);
1582 vpushi(0);
1583 vtop->r = retreg;
1584 break;
1585 default:
1586 tcc_error("gen_opi %i unimplemented!",op);
1590 #ifdef TCC_ARM_VFP
1591 static int is_zero(int i)
1593 if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1594 return 0;
1595 if (vtop[i].type.t == VT_FLOAT)
1596 return (vtop[i].c.f == 0.f);
1597 else if (vtop[i].type.t == VT_DOUBLE)
1598 return (vtop[i].c.d == 0.0);
1599 return (vtop[i].c.ld == 0.l);
1602 /* generate a floating point operation 'v = t1 op t2' instruction. The
1603 * two operands are guaranted to have the same floating point type */
1604 void gen_opf(int op)
1606 uint32_t x;
1607 int fneg=0,r;
1608 x=0xEE000A00|T2CPR(vtop->type.t);
1609 switch(op) {
1610 case '+':
1611 if(is_zero(-1))
1612 vswap();
1613 if(is_zero(0)) {
1614 vtop--;
1615 return;
1617 x|=0x300000;
1618 break;
1619 case '-':
1620 x|=0x300040;
1621 if(is_zero(0)) {
1622 vtop--;
1623 return;
1625 if(is_zero(-1)) {
1626 x|=0x810000; /* fsubX -> fnegX */
1627 vswap();
1628 vtop--;
1629 fneg=1;
1631 break;
1632 case '*':
1633 x|=0x200000;
1634 break;
1635 case '/':
1636 x|=0x800000;
1637 break;
1638 default:
1639 if(op < TOK_ULT || op > TOK_GT) {
1640 tcc_error("unknown fp op %x!",op);
1641 return;
1643 if(is_zero(-1)) {
1644 vswap();
1645 switch(op) {
1646 case TOK_LT: op=TOK_GT; break;
1647 case TOK_GE: op=TOK_ULE; break;
1648 case TOK_LE: op=TOK_GE; break;
1649 case TOK_GT: op=TOK_ULT; break;
1652 x|=0xB40040; /* fcmpX */
1653 if(op!=TOK_EQ && op!=TOK_NE)
1654 x|=0x80; /* fcmpX -> fcmpeX */
1655 if(is_zero(0)) {
1656 vtop--;
1657 o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1658 } else {
1659 x|=vfpr(gv(RC_FLOAT));
1660 vswap();
1661 o(x|(vfpr(gv(RC_FLOAT))<<12));
1662 vtop--;
1664 o(0xEEF1FA10); /* fmstat */
1666 switch(op) {
1667 case TOK_LE: op=TOK_ULE; break;
1668 case TOK_LT: op=TOK_ULT; break;
1669 case TOK_UGE: op=TOK_GE; break;
1670 case TOK_UGT: op=TOK_GT; break;
1673 vtop->r = VT_CMP;
1674 vtop->c.i = op;
1675 return;
1677 r=gv(RC_FLOAT);
1678 x|=vfpr(r);
1679 r=regmask(r);
1680 if(!fneg) {
1681 int r2;
1682 vswap();
1683 r2=gv(RC_FLOAT);
1684 x|=vfpr(r2)<<16;
1685 r|=regmask(r2);
1687 vtop->r=get_reg_ex(RC_FLOAT,r);
1688 if(!fneg)
1689 vtop--;
1690 o(x|(vfpr(vtop->r)<<12));
1693 #else
1694 static uint32_t is_fconst()
1696 long double f;
1697 uint32_t r;
1698 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1699 return 0;
1700 if (vtop->type.t == VT_FLOAT)
1701 f = vtop->c.f;
1702 else if (vtop->type.t == VT_DOUBLE)
1703 f = vtop->c.d;
1704 else
1705 f = vtop->c.ld;
1706 if(!ieee_finite(f))
1707 return 0;
1708 r=0x8;
1709 if(f<0.0) {
1710 r=0x18;
1711 f=-f;
1713 if(f==0.0)
1714 return r;
1715 if(f==1.0)
1716 return r|1;
1717 if(f==2.0)
1718 return r|2;
1719 if(f==3.0)
1720 return r|3;
1721 if(f==4.0)
1722 return r|4;
1723 if(f==5.0)
1724 return r|5;
1725 if(f==0.5)
1726 return r|6;
1727 if(f==10.0)
1728 return r|7;
1729 return 0;
1732 /* generate a floating point operation 'v = t1 op t2' instruction. The
1733 two operands are guaranted to have the same floating point type */
1734 void gen_opf(int op)
1736 uint32_t x, r, r2, c1, c2;
1737 //fputs("gen_opf\n",stderr);
1738 vswap();
1739 c1 = is_fconst();
1740 vswap();
1741 c2 = is_fconst();
1742 x=0xEE000100;
1743 #if LDOUBLE_SIZE == 8
1744 if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1745 x|=0x80;
1746 #else
1747 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1748 x|=0x80;
1749 else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1750 x|=0x80000;
1751 #endif
1752 switch(op)
1754 case '+':
1755 if(!c2) {
1756 vswap();
1757 c2=c1;
1759 vswap();
1760 r=fpr(gv(RC_FLOAT));
1761 vswap();
1762 if(c2) {
1763 if(c2>0xf)
1764 x|=0x200000; // suf
1765 r2=c2&0xf;
1766 } else {
1767 r2=fpr(gv(RC_FLOAT));
1769 break;
1770 case '-':
1771 if(c2) {
1772 if(c2<=0xf)
1773 x|=0x200000; // suf
1774 r2=c2&0xf;
1775 vswap();
1776 r=fpr(gv(RC_FLOAT));
1777 vswap();
1778 } else if(c1 && c1<=0xf) {
1779 x|=0x300000; // rsf
1780 r2=c1;
1781 r=fpr(gv(RC_FLOAT));
1782 vswap();
1783 } else {
1784 x|=0x200000; // suf
1785 vswap();
1786 r=fpr(gv(RC_FLOAT));
1787 vswap();
1788 r2=fpr(gv(RC_FLOAT));
1790 break;
1791 case '*':
1792 if(!c2 || c2>0xf) {
1793 vswap();
1794 c2=c1;
1796 vswap();
1797 r=fpr(gv(RC_FLOAT));
1798 vswap();
1799 if(c2 && c2<=0xf)
1800 r2=c2;
1801 else
1802 r2=fpr(gv(RC_FLOAT));
1803 x|=0x100000; // muf
1804 break;
1805 case '/':
1806 if(c2 && c2<=0xf) {
1807 x|=0x400000; // dvf
1808 r2=c2;
1809 vswap();
1810 r=fpr(gv(RC_FLOAT));
1811 vswap();
1812 } else if(c1 && c1<=0xf) {
1813 x|=0x500000; // rdf
1814 r2=c1;
1815 r=fpr(gv(RC_FLOAT));
1816 vswap();
1817 } else {
1818 x|=0x400000; // dvf
1819 vswap();
1820 r=fpr(gv(RC_FLOAT));
1821 vswap();
1822 r2=fpr(gv(RC_FLOAT));
1824 break;
1825 default:
1826 if(op >= TOK_ULT && op <= TOK_GT) {
1827 x|=0xd0f110; // cmfe
1828 /* bug (intention?) in Linux FPU emulator
1829 doesn't set carry if equal */
1830 switch(op) {
1831 case TOK_ULT:
1832 case TOK_UGE:
1833 case TOK_ULE:
1834 case TOK_UGT:
1835 tcc_error("unsigned comparision on floats?");
1836 break;
1837 case TOK_LT:
1838 op=TOK_Nset;
1839 break;
1840 case TOK_LE:
1841 op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1842 break;
1843 case TOK_EQ:
1844 case TOK_NE:
1845 x&=~0x400000; // cmfe -> cmf
1846 break;
1848 if(c1 && !c2) {
1849 c2=c1;
1850 vswap();
1851 switch(op) {
1852 case TOK_Nset:
1853 op=TOK_GT;
1854 break;
1855 case TOK_GE:
1856 op=TOK_ULE;
1857 break;
1858 case TOK_ULE:
1859 op=TOK_GE;
1860 break;
1861 case TOK_GT:
1862 op=TOK_Nset;
1863 break;
1866 vswap();
1867 r=fpr(gv(RC_FLOAT));
1868 vswap();
1869 if(c2) {
1870 if(c2>0xf)
1871 x|=0x200000;
1872 r2=c2&0xf;
1873 } else {
1874 r2=fpr(gv(RC_FLOAT));
1876 vtop[-1].r = VT_CMP;
1877 vtop[-1].c.i = op;
1878 } else {
1879 tcc_error("unknown fp op %x!",op);
1880 return;
1883 if(vtop[-1].r == VT_CMP)
1884 c1=15;
1885 else {
1886 c1=vtop->r;
1887 if(r2&0x8)
1888 c1=vtop[-1].r;
1889 vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1890 c1=fpr(vtop[-1].r);
1892 vtop--;
1893 o(x|(r<<16)|(c1<<12)|r2);
1895 #endif
1897 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1898 and 'long long' cases. */
1899 ST_FUNC void gen_cvt_itof1(int t)
1901 uint32_t r, r2;
1902 int bt;
1903 bt=vtop->type.t & VT_BTYPE;
1904 if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1905 #ifndef TCC_ARM_VFP
1906 uint32_t dsize = 0;
1907 #endif
1908 r=intr(gv(RC_INT));
1909 #ifdef TCC_ARM_VFP
1910 r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1911 o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1912 r2|=r2<<12;
1913 if(!(vtop->type.t & VT_UNSIGNED))
1914 r2|=0x80; /* fuitoX -> fsituX */
1915 o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1916 #else
1917 r2=fpr(vtop->r=get_reg(RC_FLOAT));
1918 if((t & VT_BTYPE) != VT_FLOAT)
1919 dsize=0x80; /* flts -> fltd */
1920 o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1921 if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1922 uint32_t off = 0;
1923 o(0xE3500000|(r<<12)); /* cmp */
1924 r=fpr(get_reg(RC_FLOAT));
1925 if(last_itod_magic) {
1926 off=ind+8-last_itod_magic;
1927 off/=4;
1928 if(off>255)
1929 off=0;
1931 o(0xBD1F0100|(r<<12)|off); /* ldflts */
1932 if(!off) {
1933 o(0xEA000000); /* b */
1934 last_itod_magic=ind;
1935 o(0x4F800000); /* 4294967296.0f */
1937 o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1939 #endif
1940 return;
1941 } else if(bt == VT_LLONG) {
1942 int func;
1943 CType *func_type = 0;
1944 if((t & VT_BTYPE) == VT_FLOAT) {
1945 func_type = &func_float_type;
1946 if(vtop->type.t & VT_UNSIGNED)
1947 func=TOK___floatundisf;
1948 else
1949 func=TOK___floatdisf;
1950 #if LDOUBLE_SIZE != 8
1951 } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1952 func_type = &func_ldouble_type;
1953 if(vtop->type.t & VT_UNSIGNED)
1954 func=TOK___floatundixf;
1955 else
1956 func=TOK___floatdixf;
1957 } else if((t & VT_BTYPE) == VT_DOUBLE) {
1958 #else
1959 } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1960 #endif
1961 func_type = &func_double_type;
1962 if(vtop->type.t & VT_UNSIGNED)
1963 func=TOK___floatundidf;
1964 else
1965 func=TOK___floatdidf;
1967 if(func_type) {
1968 vpush_global_sym(func_type, func);
1969 vswap();
1970 gfunc_call(1);
1971 vpushi(0);
1972 vtop->r=TREG_F0;
1973 return;
1976 tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1979 /* convert fp to int 't' type */
1980 void gen_cvt_ftoi(int t)
1982 uint32_t r, r2;
1983 int u, func = 0;
1984 u=t&VT_UNSIGNED;
1985 t&=VT_BTYPE;
1986 r2=vtop->type.t & VT_BTYPE;
1987 if(t==VT_INT) {
1988 #ifdef TCC_ARM_VFP
1989 r=vfpr(gv(RC_FLOAT));
1990 u=u?0:0x10000;
1991 o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
1992 r2=intr(vtop->r=get_reg(RC_INT));
1993 o(0xEE100A10|(r<<16)|(r2<<12));
1994 return;
1995 #else
1996 if(u) {
1997 if(r2 == VT_FLOAT)
1998 func=TOK___fixunssfsi;
1999 #if LDOUBLE_SIZE != 8
2000 else if(r2 == VT_LDOUBLE)
2001 func=TOK___fixunsxfsi;
2002 else if(r2 == VT_DOUBLE)
2003 #else
2004 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2005 #endif
2006 func=TOK___fixunsdfsi;
2007 } else {
2008 r=fpr(gv(RC_FLOAT));
2009 r2=intr(vtop->r=get_reg(RC_INT));
2010 o(0xEE100170|(r2<<12)|r);
2011 return;
2013 #endif
2014 } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2015 if(r2 == VT_FLOAT)
2016 func=TOK___fixsfdi;
2017 #if LDOUBLE_SIZE != 8
2018 else if(r2 == VT_LDOUBLE)
2019 func=TOK___fixxfdi;
2020 else if(r2 == VT_DOUBLE)
2021 #else
2022 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2023 #endif
2024 func=TOK___fixdfdi;
2026 if(func) {
2027 vpush_global_sym(&func_old_type, func);
2028 vswap();
2029 gfunc_call(1);
2030 vpushi(0);
2031 if(t == VT_LLONG)
2032 vtop->r2 = REG_LRET;
2033 vtop->r = REG_IRET;
2034 return;
2036 tcc_error("unimplemented gen_cvt_ftoi!");
2039 /* convert from one floating point type to another */
2040 void gen_cvt_ftof(int t)
2042 #ifdef TCC_ARM_VFP
2043 if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2044 uint32_t r = vfpr(gv(RC_FLOAT));
2045 o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2047 #else
2048 /* all we have to do on i386 and FPA ARM is to put the float in a register */
2049 gv(RC_FLOAT);
2050 #endif
2053 /* computed goto support */
2054 void ggoto(void)
2056 gcall_or_jmp(1);
2057 vtop--;
2060 /* Save the stack pointer onto the stack and return the location of its address */
2061 ST_FUNC void gen_vla_sp_save(int addr) {
2062 tcc_error("variable length arrays unsupported for this target");
2065 /* Restore the SP from a location on the stack */
2066 ST_FUNC void gen_vla_sp_restore(int addr) {
2067 tcc_error("variable length arrays unsupported for this target");
2070 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2071 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2072 tcc_error("variable length arrays unsupported for this target");
2075 /* end of ARM code generator */
2076 /*************************************************************/
2077 #endif
2078 /*************************************************************/