Makefile: allow CONFIG_LDDIR=lib64 configuration
[tinycc.git] / arm-gen.c
blob03b5fd77978dae5a69050ff14270322c1079cc60
1 /*
2 * ARMv4 code generator for TCC
3 *
4 * Copyright (c) 2003 Daniel Glöckner
5 * Copyright (c) 2012 Thomas Preud'homme
7 * Based on i386-gen.c by Fabrice Bellard
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #ifdef TARGET_DEFS_ONLY
26 #ifdef TCC_ARM_EABI
27 #ifndef TCC_ARM_VFP // Avoid useless warning
28 #define TCC_ARM_VFP
29 #endif
30 #endif
32 /* number of available registers */
33 #ifdef TCC_ARM_VFP
34 #define NB_REGS 13
35 #else
36 #define NB_REGS 9
37 #endif
39 #ifndef TCC_ARM_VERSION
40 # define TCC_ARM_VERSION 5
41 #endif
43 /* a register can belong to several classes. The classes must be
44 sorted from more general to more precise (see gv2() code which does
45 assumptions on it). */
46 #define RC_INT 0x0001 /* generic integer register */
47 #define RC_FLOAT 0x0002 /* generic float register */
48 #define RC_R0 0x0004
49 #define RC_R1 0x0008
50 #define RC_R2 0x0010
51 #define RC_R3 0x0020
52 #define RC_R12 0x0040
53 #define RC_F0 0x0080
54 #define RC_F1 0x0100
55 #define RC_F2 0x0200
56 #define RC_F3 0x0400
57 #ifdef TCC_ARM_VFP
58 #define RC_F4 0x0800
59 #define RC_F5 0x1000
60 #define RC_F6 0x2000
61 #define RC_F7 0x4000
62 #endif
63 #define RC_IRET RC_R0 /* function return: integer register */
64 #define RC_LRET RC_R1 /* function return: second integer register */
65 #define RC_FRET RC_F0 /* function return: float register */
67 /* pretty names for the registers */
68 enum {
69 TREG_R0 = 0,
70 TREG_R1,
71 TREG_R2,
72 TREG_R3,
73 TREG_R12,
74 TREG_F0,
75 TREG_F1,
76 TREG_F2,
77 TREG_F3,
78 #ifdef TCC_ARM_VFP
79 TREG_F4,
80 TREG_F5,
81 TREG_F6,
82 TREG_F7,
83 #endif
86 #ifdef TCC_ARM_VFP
87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
88 #endif
90 /* return registers for function */
91 #define REG_IRET TREG_R0 /* single word int return register */
92 #define REG_LRET TREG_R1 /* second word return register (for long long) */
93 #define REG_FRET TREG_F0 /* float return register */
95 #ifdef TCC_ARM_EABI
96 #define TOK___divdi3 TOK___aeabi_ldivmod
97 #define TOK___moddi3 TOK___aeabi_ldivmod
98 #define TOK___udivdi3 TOK___aeabi_uldivmod
99 #define TOK___umoddi3 TOK___aeabi_uldivmod
100 #endif
102 /* defined if function parameters must be evaluated in reverse order */
103 #define INVERT_FUNC_PARAMS
105 /* defined if structures are passed as pointers. Otherwise structures
106 are directly pushed on stack. */
107 //#define FUNC_STRUCT_PARAM_AS_PTR
109 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
110 ST_DATA CType float_type, double_type, func_float_type, func_double_type;
111 #define func_ldouble_type func_double_type
112 #else
113 #define func_float_type func_old_type
114 #define func_double_type func_old_type
115 #define func_ldouble_type func_old_type
116 #endif
118 /* pointer size, in bytes */
119 #define PTR_SIZE 4
121 /* long double size and alignment, in bytes */
122 #ifdef TCC_ARM_VFP
123 #define LDOUBLE_SIZE 8
124 #endif
126 #ifndef LDOUBLE_SIZE
127 #define LDOUBLE_SIZE 8
128 #endif
130 #ifdef TCC_ARM_EABI
131 #define LDOUBLE_ALIGN 8
132 #else
133 #define LDOUBLE_ALIGN 4
134 #endif
136 /* maximum alignment (for aligned attribute support) */
137 #define MAX_ALIGN 8
139 #define CHAR_IS_UNSIGNED
141 /******************************************************/
142 /* ELF defines */
144 #define EM_TCC_TARGET EM_ARM
146 /* relocation type for 32 bit data relocation */
147 #define R_DATA_32 R_ARM_ABS32
148 #define R_DATA_PTR R_ARM_ABS32
149 #define R_JMP_SLOT R_ARM_JUMP_SLOT
150 #define R_COPY R_ARM_COPY
152 #define ELF_START_ADDR 0x00008000
153 #define ELF_PAGE_SIZE 0x1000
155 /******************************************************/
156 #else /* ! TARGET_DEFS_ONLY */
157 /******************************************************/
158 #include "tcc.h"
160 ST_DATA const int reg_classes[NB_REGS] = {
161 /* r0 */ RC_INT | RC_R0,
162 /* r1 */ RC_INT | RC_R1,
163 /* r2 */ RC_INT | RC_R2,
164 /* r3 */ RC_INT | RC_R3,
165 /* r12 */ RC_INT | RC_R12,
166 /* f0 */ RC_FLOAT | RC_F0,
167 /* f1 */ RC_FLOAT | RC_F1,
168 /* f2 */ RC_FLOAT | RC_F2,
169 /* f3 */ RC_FLOAT | RC_F3,
170 #ifdef TCC_ARM_VFP
171 /* d4/s8 */ RC_FLOAT | RC_F4,
172 /* d5/s10 */ RC_FLOAT | RC_F5,
173 /* d6/s12 */ RC_FLOAT | RC_F6,
174 /* d7/s14 */ RC_FLOAT | RC_F7,
175 #endif
178 /* keep in sync with line 104 above */
179 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
180 ST_DATA CType float_type, double_type, func_float_type, func_double_type;
181 #endif
183 static int func_sub_sp_offset, last_itod_magic;
184 static int leaffunc;
186 static int two2mask(int a,int b) {
187 return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
190 static int regmask(int r) {
191 return reg_classes[r]&~(RC_INT|RC_FLOAT);
194 /******************************************************/
196 void o(uint32_t i)
198 /* this is a good place to start adding big-endian support*/
199 int ind1;
201 ind1 = ind + 4;
202 if (!cur_text_section)
203 tcc_error("compiler error! This happens f.ex. if the compiler\n"
204 "can't evaluate constant expressions outside of a function.");
205 if (ind1 > cur_text_section->data_allocated)
206 section_realloc(cur_text_section, ind1);
207 cur_text_section->data[ind++] = i&255;
208 i>>=8;
209 cur_text_section->data[ind++] = i&255;
210 i>>=8;
211 cur_text_section->data[ind++] = i&255;
212 i>>=8;
213 cur_text_section->data[ind++] = i;
216 static uint32_t stuff_const(uint32_t op, uint32_t c)
218 int try_neg=0;
219 uint32_t nc = 0, negop = 0;
221 switch(op&0x1F00000)
223 case 0x800000: //add
224 case 0x400000: //sub
225 try_neg=1;
226 negop=op^0xC00000;
227 nc=-c;
228 break;
229 case 0x1A00000: //mov
230 case 0x1E00000: //mvn
231 try_neg=1;
232 negop=op^0x400000;
233 nc=~c;
234 break;
235 case 0x200000: //xor
236 if(c==~0)
237 return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
238 break;
239 case 0x0: //and
240 if(c==~0)
241 return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
242 case 0x1C00000: //bic
243 try_neg=1;
244 negop=op^0x1C00000;
245 nc=~c;
246 break;
247 case 0x1800000: //orr
248 if(c==~0)
249 return (op&0xFFF0FFFF)|0x1E00000;
250 break;
252 do {
253 uint32_t m;
254 int i;
255 if(c<256) /* catch undefined <<32 */
256 return op|c;
257 for(i=2;i<32;i+=2) {
258 m=(0xff>>i)|(0xff<<(32-i));
259 if(!(c&~m))
260 return op|(i<<7)|(c<<i)|(c>>(32-i));
262 op=negop;
263 c=nc;
264 } while(try_neg--);
265 return 0;
269 //only add,sub
270 void stuff_const_harder(uint32_t op, uint32_t v) {
271 uint32_t x;
272 x=stuff_const(op,v);
273 if(x)
274 o(x);
275 else {
276 uint32_t a[16], nv, no, o2, n2;
277 int i,j,k;
278 a[0]=0xff;
279 o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
280 for(i=1;i<16;i++)
281 a[i]=(a[i-1]>>2)|(a[i-1]<<30);
282 for(i=0;i<12;i++)
283 for(j=i<4?i+12:15;j>=i+4;j--)
284 if((v&(a[i]|a[j]))==v) {
285 o(stuff_const(op,v&a[i]));
286 o(stuff_const(o2,v&a[j]));
287 return;
289 no=op^0xC00000;
290 n2=o2^0xC00000;
291 nv=-v;
292 for(i=0;i<12;i++)
293 for(j=i<4?i+12:15;j>=i+4;j--)
294 if((nv&(a[i]|a[j]))==nv) {
295 o(stuff_const(no,nv&a[i]));
296 o(stuff_const(n2,nv&a[j]));
297 return;
299 for(i=0;i<8;i++)
300 for(j=i+4;j<12;j++)
301 for(k=i<4?i+12:15;k>=j+4;k--)
302 if((v&(a[i]|a[j]|a[k]))==v) {
303 o(stuff_const(op,v&a[i]));
304 o(stuff_const(o2,v&a[j]));
305 o(stuff_const(o2,v&a[k]));
306 return;
308 no=op^0xC00000;
309 nv=-v;
310 for(i=0;i<8;i++)
311 for(j=i+4;j<12;j++)
312 for(k=i<4?i+12:15;k>=j+4;k--)
313 if((nv&(a[i]|a[j]|a[k]))==nv) {
314 o(stuff_const(no,nv&a[i]));
315 o(stuff_const(n2,nv&a[j]));
316 o(stuff_const(n2,nv&a[k]));
317 return;
319 o(stuff_const(op,v&a[0]));
320 o(stuff_const(o2,v&a[4]));
321 o(stuff_const(o2,v&a[8]));
322 o(stuff_const(o2,v&a[12]));
326 ST_FUNC uint32_t encbranch(int pos, int addr, int fail)
328 addr-=pos+8;
329 addr/=4;
330 if(addr>=0x1000000 || addr<-0x1000000) {
331 if(fail)
332 tcc_error("FIXME: function bigger than 32MB");
333 return 0;
335 return 0x0A000000|(addr&0xffffff);
338 int decbranch(int pos)
340 int x;
341 x=*(uint32_t *)(cur_text_section->data + pos);
342 x&=0x00ffffff;
343 if(x&0x800000)
344 x-=0x1000000;
345 return x*4+pos+8;
348 /* output a symbol and patch all calls to it */
349 void gsym_addr(int t, int a)
351 uint32_t *x;
352 int lt;
353 while(t) {
354 x=(uint32_t *)(cur_text_section->data + t);
355 t=decbranch(lt=t);
356 if(a==lt+4)
357 *x=0xE1A00000; // nop
358 else {
359 *x &= 0xff000000;
360 *x |= encbranch(lt,a,1);
365 void gsym(int t)
367 gsym_addr(t, ind);
370 #ifdef TCC_ARM_VFP
371 static uint32_t vfpr(int r)
373 if(r<TREG_F0 || r>TREG_F7)
374 tcc_error("compiler error! register %i is no vfp register",r);
375 return r-5;
377 #else
378 static uint32_t fpr(int r)
380 if(r<TREG_F0 || r>TREG_F3)
381 tcc_error("compiler error! register %i is no fpa register",r);
382 return r-5;
384 #endif
386 static uint32_t intr(int r)
388 if(r==4)
389 return 12;
390 if((r<0 || r>4) && r!=14)
391 tcc_error("compiler error! register %i is no int register",r);
392 return r;
395 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
397 if(*off>maxoff || *off&((1<<shift)-1)) {
398 uint32_t x, y;
399 x=0xE280E000;
400 if(*sgn)
401 x=0xE240E000;
402 x|=(*base)<<16;
403 *base=14; // lr
404 y=stuff_const(x,*off&~maxoff);
405 if(y) {
406 o(y);
407 *off&=maxoff;
408 return;
410 y=stuff_const(x,(*off+maxoff)&~maxoff);
411 if(y) {
412 o(y);
413 *sgn=!*sgn;
414 *off=((*off+maxoff)&~maxoff)-*off;
415 return;
417 stuff_const_harder(x,*off&~maxoff);
418 *off&=maxoff;
422 static uint32_t mapcc(int cc)
424 switch(cc)
426 case TOK_ULT:
427 return 0x30000000; /* CC/LO */
428 case TOK_UGE:
429 return 0x20000000; /* CS/HS */
430 case TOK_EQ:
431 return 0x00000000; /* EQ */
432 case TOK_NE:
433 return 0x10000000; /* NE */
434 case TOK_ULE:
435 return 0x90000000; /* LS */
436 case TOK_UGT:
437 return 0x80000000; /* HI */
438 case TOK_Nset:
439 return 0x40000000; /* MI */
440 case TOK_Nclear:
441 return 0x50000000; /* PL */
442 case TOK_LT:
443 return 0xB0000000; /* LT */
444 case TOK_GE:
445 return 0xA0000000; /* GE */
446 case TOK_LE:
447 return 0xD0000000; /* LE */
448 case TOK_GT:
449 return 0xC0000000; /* GT */
451 tcc_error("unexpected condition code");
452 return 0xE0000000; /* AL */
455 static int negcc(int cc)
457 switch(cc)
459 case TOK_ULT:
460 return TOK_UGE;
461 case TOK_UGE:
462 return TOK_ULT;
463 case TOK_EQ:
464 return TOK_NE;
465 case TOK_NE:
466 return TOK_EQ;
467 case TOK_ULE:
468 return TOK_UGT;
469 case TOK_UGT:
470 return TOK_ULE;
471 case TOK_Nset:
472 return TOK_Nclear;
473 case TOK_Nclear:
474 return TOK_Nset;
475 case TOK_LT:
476 return TOK_GE;
477 case TOK_GE:
478 return TOK_LT;
479 case TOK_LE:
480 return TOK_GT;
481 case TOK_GT:
482 return TOK_LE;
484 tcc_error("unexpected condition code");
485 return TOK_NE;
488 /* load 'r' from value 'sv' */
489 void load(int r, SValue *sv)
491 int v, ft, fc, fr, sign;
492 uint32_t op;
493 SValue v1;
495 fr = sv->r;
496 ft = sv->type.t;
497 fc = sv->c.ul;
499 if(fc>=0)
500 sign=0;
501 else {
502 sign=1;
503 fc=-fc;
506 v = fr & VT_VALMASK;
507 if (fr & VT_LVAL) {
508 uint32_t base = 0xB; // fp
509 if(v == VT_LLOCAL) {
510 v1.type.t = VT_PTR;
511 v1.r = VT_LOCAL | VT_LVAL;
512 v1.c.ul = sv->c.ul;
513 load(base=14 /* lr */, &v1);
514 fc=sign=0;
515 v=VT_LOCAL;
516 } else if(v == VT_CONST) {
517 v1.type.t = VT_PTR;
518 v1.r = fr&~VT_LVAL;
519 v1.c.ul = sv->c.ul;
520 v1.sym=sv->sym;
521 load(base=14, &v1);
522 fc=sign=0;
523 v=VT_LOCAL;
524 } else if(v < VT_CONST) {
525 base=intr(v);
526 fc=sign=0;
527 v=VT_LOCAL;
529 if(v == VT_LOCAL) {
530 if(is_float(ft)) {
531 calcaddr(&base,&fc,&sign,1020,2);
532 #ifdef TCC_ARM_VFP
533 op=0xED100A00; /* flds */
534 if(!sign)
535 op|=0x800000;
536 if ((ft & VT_BTYPE) != VT_FLOAT)
537 op|=0x100; /* flds -> fldd */
538 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
539 #else
540 op=0xED100100;
541 if(!sign)
542 op|=0x800000;
543 #if LDOUBLE_SIZE == 8
544 if ((ft & VT_BTYPE) != VT_FLOAT)
545 op|=0x8000;
546 #else
547 if ((ft & VT_BTYPE) == VT_DOUBLE)
548 op|=0x8000;
549 else if ((ft & VT_BTYPE) == VT_LDOUBLE)
550 op|=0x400000;
551 #endif
552 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
553 #endif
554 } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
555 || (ft & VT_BTYPE) == VT_SHORT) {
556 calcaddr(&base,&fc,&sign,255,0);
557 op=0xE1500090;
558 if ((ft & VT_BTYPE) == VT_SHORT)
559 op|=0x20;
560 if ((ft & VT_UNSIGNED) == 0)
561 op|=0x40;
562 if(!sign)
563 op|=0x800000;
564 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
565 } else {
566 calcaddr(&base,&fc,&sign,4095,0);
567 op=0xE5100000;
568 if(!sign)
569 op|=0x800000;
570 if ((ft & VT_BTYPE) == VT_BYTE)
571 op|=0x400000;
572 o(op|(intr(r)<<12)|fc|(base<<16));
574 return;
576 } else {
577 if (v == VT_CONST) {
578 op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.ul);
579 if (fr & VT_SYM || !op) {
580 o(0xE59F0000|(intr(r)<<12));
581 o(0xEA000000);
582 if(fr & VT_SYM)
583 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
584 o(sv->c.ul);
585 } else
586 o(op);
587 return;
588 } else if (v == VT_LOCAL) {
589 op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.ul);
590 if (fr & VT_SYM || !op) {
591 o(0xE59F0000|(intr(r)<<12));
592 o(0xEA000000);
593 if(fr & VT_SYM) // needed ?
594 greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
595 o(sv->c.ul);
596 o(0xE08B0000|(intr(r)<<12)|intr(r));
597 } else
598 o(op);
599 return;
600 } else if(v == VT_CMP) {
601 o(mapcc(sv->c.ul)|0x3A00001|(intr(r)<<12));
602 o(mapcc(negcc(sv->c.ul))|0x3A00000|(intr(r)<<12));
603 return;
604 } else if (v == VT_JMP || v == VT_JMPI) {
605 int t;
606 t = v & 1;
607 o(0xE3A00000|(intr(r)<<12)|t);
608 o(0xEA000000);
609 gsym(sv->c.ul);
610 o(0xE3A00000|(intr(r)<<12)|(t^1));
611 return;
612 } else if (v < VT_CONST) {
613 if(is_float(ft))
614 #ifdef TCC_ARM_VFP
615 o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
616 #else
617 o(0xEE008180|(fpr(r)<<12)|fpr(v));
618 #endif
619 else
620 o(0xE1A00000|(intr(r)<<12)|intr(v));
621 return;
624 tcc_error("load unimplemented!");
627 /* store register 'r' in lvalue 'v' */
628 void store(int r, SValue *sv)
630 SValue v1;
631 int v, ft, fc, fr, sign;
632 uint32_t op;
634 fr = sv->r;
635 ft = sv->type.t;
636 fc = sv->c.ul;
638 if(fc>=0)
639 sign=0;
640 else {
641 sign=1;
642 fc=-fc;
645 v = fr & VT_VALMASK;
646 if (fr & VT_LVAL || fr == VT_LOCAL) {
647 uint32_t base = 0xb;
648 if(v < VT_CONST) {
649 base=intr(v);
650 v=VT_LOCAL;
651 fc=sign=0;
652 } else if(v == VT_CONST) {
653 v1.type.t = ft;
654 v1.r = fr&~VT_LVAL;
655 v1.c.ul = sv->c.ul;
656 v1.sym=sv->sym;
657 load(base=14, &v1);
658 fc=sign=0;
659 v=VT_LOCAL;
661 if(v == VT_LOCAL) {
662 if(is_float(ft)) {
663 calcaddr(&base,&fc,&sign,1020,2);
664 #ifdef TCC_ARM_VFP
665 op=0xED000A00; /* fsts */
666 if(!sign)
667 op|=0x800000;
668 if ((ft & VT_BTYPE) != VT_FLOAT)
669 op|=0x100; /* fsts -> fstd */
670 o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
671 #else
672 op=0xED000100;
673 if(!sign)
674 op|=0x800000;
675 #if LDOUBLE_SIZE == 8
676 if ((ft & VT_BTYPE) != VT_FLOAT)
677 op|=0x8000;
678 #else
679 if ((ft & VT_BTYPE) == VT_DOUBLE)
680 op|=0x8000;
681 if ((ft & VT_BTYPE) == VT_LDOUBLE)
682 op|=0x400000;
683 #endif
684 o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
685 #endif
686 return;
687 } else if((ft & VT_BTYPE) == VT_SHORT) {
688 calcaddr(&base,&fc,&sign,255,0);
689 op=0xE14000B0;
690 if(!sign)
691 op|=0x800000;
692 o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
693 } else {
694 calcaddr(&base,&fc,&sign,4095,0);
695 op=0xE5000000;
696 if(!sign)
697 op|=0x800000;
698 if ((ft & VT_BTYPE) == VT_BYTE)
699 op|=0x400000;
700 o(op|(intr(r)<<12)|fc|(base<<16));
702 return;
705 tcc_error("store unimplemented");
708 static void gadd_sp(int val)
710 stuff_const_harder(0xE28DD000,val);
713 /* 'is_jmp' is '1' if it is a jump */
714 static void gcall_or_jmp(int is_jmp)
716 int r;
717 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
718 uint32_t x;
719 /* constant case */
720 x=encbranch(ind,ind+vtop->c.ul,0);
721 if(x) {
722 if (vtop->r & VT_SYM) {
723 /* relocation case */
724 greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
725 } else
726 put_elf_reloc(symtab_section, cur_text_section, ind, R_ARM_PC24, 0);
727 o(x|(is_jmp?0xE0000000:0xE1000000));
728 } else {
729 if(!is_jmp)
730 o(0xE28FE004); // add lr,pc,#4
731 o(0xE51FF004); // ldr pc,[pc,#-4]
732 if (vtop->r & VT_SYM)
733 greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
734 o(vtop->c.ul);
736 } else {
737 /* otherwise, indirect call */
738 r = gv(RC_INT);
739 if(!is_jmp)
740 o(0xE1A0E00F); // mov lr,pc
741 o(0xE1A0F000|intr(r)); // mov pc,r
745 #ifdef TCC_ARM_HARDFLOAT
746 static int is_float_hgen_aggr(CType *type)
748 if ((type->t & VT_BTYPE) == VT_STRUCT) {
749 struct Sym *ref;
750 int btype, nb_fields = 0;
752 ref = type->ref;
753 btype = ref->type.t & VT_BTYPE;
754 if (btype == VT_FLOAT || btype == VT_DOUBLE) {
755 for(; ref && btype == (ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
756 return !ref && nb_fields <= 4;
759 return 0;
762 struct avail_regs {
763 /* worst case: f(float, double, 3 float struct, double, 3 float struct, double) */
764 signed char avail[3];
765 int first_hole;
766 int last_hole;
767 int first_free_reg;
770 #define AVAIL_REGS_INITIALIZER (struct avail_regs) { { 0, 0, 0}, 0, 0, 0 }
772 /* Assign a register for a CPRC param with correct size and alignment
773 * size and align are in bytes, as returned by type_size */
774 int assign_fpreg(struct avail_regs *avregs, int align, int size)
776 int first_reg = 0;
778 if (avregs->first_free_reg == -1)
779 return -1;
780 if (align >> 3) { // alignment needed (base type: double)
781 first_reg = avregs->first_free_reg;
782 if (first_reg & 1)
783 avregs->avail[avregs->last_hole++] = first_reg++;
784 } else {
785 if (size == 4 && avregs->first_hole != avregs->last_hole)
786 return avregs->avail[avregs->first_hole++];
787 else
788 first_reg = avregs->first_free_reg;
790 if (first_reg + size / 4 <= 16) {
791 avregs->first_free_reg = first_reg + size / 4;
792 return first_reg;
794 avregs->first_free_reg = -1;
795 return -1;
797 #endif
799 /* Generate function call. The function address is pushed first, then
800 all the parameters in call order. This functions pops all the
801 parameters and the function address. */
802 void gfunc_call(int nb_args)
804 int size, align, r, args_size, i, ncrn, ncprn, argno, vfp_argno;
805 signed char plan[4][2]={{-1,-1},{-1,-1},{-1,-1},{-1,-1}};
806 SValue *before_stack = NULL; /* SValue before first on stack argument */
807 SValue *before_vfpreg_hfa = NULL; /* SValue before first in VFP reg hfa argument */
808 #ifdef TCC_ARM_HARDFLOAT
809 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
810 signed char vfp_plan[16];
811 int plan2[4+16];
812 int variadic;
813 #else
814 int plan2[4]={0,0,0,0};
815 #endif
816 int vfp_todo=0;
817 int todo=0, keep;
819 #ifdef TCC_ARM_HARDFLOAT
820 memset(vfp_plan, -1, sizeof(vfp_plan));
821 memset(plan2, 0, sizeof(plan2));
822 variadic = (vtop[-nb_args].type.ref->c == FUNC_ELLIPSIS);
823 #endif
824 r = vtop->r & VT_VALMASK;
825 if (r == VT_CMP || (r & ~1) == VT_JMP)
826 gv(RC_INT);
827 #ifdef TCC_ARM_EABI
828 if((vtop[-nb_args].type.ref->type.t & VT_BTYPE) == VT_STRUCT
829 && type_size(&vtop[-nb_args].type.ref->type, &align) <= 4) {
830 SValue tmp;
831 tmp=vtop[-nb_args];
832 vtop[-nb_args]=vtop[-nb_args+1];
833 vtop[-nb_args+1]=tmp;
834 --nb_args;
837 vpushi(0), nb_args++;
838 vtop->type.t = VT_LLONG;
839 #endif
840 ncrn = ncprn = argno = vfp_argno = args_size = 0;
841 /* Assign argument to registers and stack with alignment.
842 If, considering alignment constraints, enough registers of the correct type
843 (core or VFP) are free for the current argument, assign them to it, else
844 allocate on stack with correct alignment. Whenever a structure is allocated
845 in registers or on stack, it is always put on the stack at this stage. The
846 stack is divided in 3 zones. The zone are, from low addresses to high
847 addresses: structures to be loaded in core registers, structures to be
848 loaded in VFP registers, argument allocated to stack. SValue's representing
849 structures in the first zone are moved just after the SValue pointed by
850 before_vfpreg_hfa. SValue's representing structures in the second zone are
851 moved just after the SValue pointer by before_stack. */
852 for(i = nb_args; i-- ;) {
853 int j, assigned_vfpreg = 0;
854 size = type_size(&vtop[-i].type, &align);
855 switch(vtop[-i].type.t & VT_BTYPE) {
856 case VT_STRUCT:
857 case VT_FLOAT:
858 case VT_DOUBLE:
859 case VT_LDOUBLE:
860 #ifdef TCC_ARM_HARDFLOAT
861 if (!variadic) {
862 int hfa = 0; /* Homogeneous float aggregate */
864 if (is_float(vtop[-i].type.t)
865 || (hfa = is_float_hgen_aggr(&vtop[-i].type))) {
866 int end_reg;
868 assigned_vfpreg = assign_fpreg(&avregs, align, size);
869 end_reg = assigned_vfpreg + (size - 1) / 4;
870 if (assigned_vfpreg >= 0) {
871 vfp_plan[vfp_argno++]=TREG_F0 + assigned_vfpreg/2;
872 if (hfa) {
873 /* before_stack can only have been set because all core registers
874 are assigned, so no need to care about before_vfpreg_hfa if
875 before_stack is set */
876 if (before_stack) {
877 vrote(&vtop[-i], &vtop[-i] - before_stack);
878 before_stack++;
879 } else if (!before_vfpreg_hfa)
880 before_vfpreg_hfa = &vtop[-i-1];
881 for (j = assigned_vfpreg; j <= end_reg; j++)
882 vfp_todo|=(1<<j);
884 continue;
885 } else {
886 if (!hfa)
887 vfp_argno++;
888 /* No need to update before_stack as no more hfa can be allocated in
889 VFP regs */
890 if (!before_vfpreg_hfa)
891 before_vfpreg_hfa = &vtop[-i-1];
892 break;
896 #endif
897 ncrn = (ncrn + (align-1)/4) & -(align/4);
898 size = (size + 3) & -4;
899 if (ncrn + size/4 <= 4 || (ncrn < 4 && assigned_vfpreg != -1)) {
900 /* Either there is HFA in VFP registers, or there is arguments on stack,
901 it cannot be both. Hence either before_stack already points after
902 the slot where the vtop[-i] SValue is moved, or before_stack will not
903 be used */
904 if (before_vfpreg_hfa) {
905 vrote(&vtop[-i], &vtop[-i] - before_vfpreg_hfa);
906 before_vfpreg_hfa++;
908 for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
909 todo|=(1<<j);
910 ncrn+=size/4;
911 if (ncrn > 4) {
912 args_size = (ncrn - 4) * 4;
913 if (!before_stack)
914 before_stack = &vtop[-i-1];
917 else {
918 ncrn = 4;
919 /* No need to set before_vfpreg_hfa if not set since there will no
920 longer be any structure assigned to core registers */
921 if (!before_stack)
922 before_stack = &vtop[-i-1];
923 break;
925 continue;
926 default:
927 #ifdef TCC_ARM_EABI
928 if (!i) {
929 break;
931 #endif
932 if (ncrn < 4) {
933 int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
935 if (is_long) {
936 ncrn = (ncrn + 1) & -2;
937 if (ncrn == 4) {
938 argno++;
939 break;
942 plan[argno++][0]=ncrn++;
943 if (is_long) {
944 plan[argno-1][1]=ncrn++;
946 continue;
948 argno++;
950 #ifdef TCC_ARM_EABI
951 if(args_size & (align-1)) {
952 vpushi(0);
953 vtop->type.t = VT_VOID; /* padding */
954 vrott(i+2);
955 args_size += 4;
956 nb_args++;
957 argno++;
959 #endif
960 args_size += (size + 3) & -4;
962 #ifdef TCC_ARM_EABI
963 vtop--, nb_args--;
964 #endif
965 args_size = keep = 0;
966 for(i = 0;i < nb_args; i++) {
967 vrotb(keep+1);
968 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) {
969 size = type_size(&vtop->type, &align);
970 /* align to stack align size */
971 size = (size + 3) & -4;
972 /* allocate the necessary size on stack */
973 gadd_sp(-size);
974 /* generate structure store */
975 r = get_reg(RC_INT);
976 o(0xE1A0000D|(intr(r)<<12));
977 vset(&vtop->type, r | VT_LVAL, 0);
978 vswap();
979 vstore();
980 vtop--;
981 args_size += size;
982 } else if (is_float(vtop->type.t)) {
983 #ifdef TCC_ARM_HARDFLOAT
984 if (!variadic && --vfp_argno<16 && vfp_plan[vfp_argno]!=-1) {
985 plan2[keep++]=vfp_plan[vfp_argno];
986 continue;
988 #endif
989 #ifdef TCC_ARM_VFP
990 r=vfpr(gv(RC_FLOAT))<<12;
991 size=4;
992 if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
994 size=8;
995 r|=0x101; /* fstms -> fstmd */
997 o(0xED2D0A01+r);
998 #else
999 r=fpr(gv(RC_FLOAT))<<12;
1000 if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
1001 size = 4;
1002 else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1003 size = 8;
1004 else
1005 size = LDOUBLE_SIZE;
1007 if (size == 12)
1008 r|=0x400000;
1009 else if(size == 8)
1010 r|=0x8000;
1012 o(0xED2D0100|r|(size>>2));
1013 #endif
1014 vtop--;
1015 args_size += size;
1016 } else {
1017 int s;
1018 /* simple type (currently always same size) */
1019 /* XXX: implicit cast ? */
1020 size=4;
1021 if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
1022 lexpand_nr();
1023 s=-1;
1024 if(--argno<4 && plan[argno][1]!=-1)
1025 s=plan[argno][1];
1026 argno++;
1027 size = 8;
1028 if(s==-1) {
1029 r = gv(RC_INT);
1030 o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1031 vtop--;
1032 } else {
1033 size=0;
1034 plan2[keep]=s;
1035 keep++;
1036 vswap();
1039 s=-1;
1040 if(--argno<4 && plan[argno][0]!=-1)
1041 s=plan[argno][0];
1042 #ifdef TCC_ARM_EABI
1043 if(vtop->type.t == VT_VOID) {
1044 if(s == -1)
1045 o(0xE24DD004); /* sub sp,sp,#4 */
1046 vtop--;
1047 } else
1048 #endif
1049 if(s == -1) {
1050 r = gv(RC_INT);
1051 o(0xE52D0004|(intr(r)<<12)); /* str r,[sp,#-4]! */
1052 vtop--;
1053 } else {
1054 size=0;
1055 plan2[keep]=s;
1056 keep++;
1058 args_size += size;
1061 for(i = 0; i < keep; i++) {
1062 vrotb(keep);
1063 gv(regmask(plan2[i]));
1064 #ifdef TCC_ARM_HARDFLOAT
1065 /* arg is in s(2d+1): plan2[i]<plan2[i+1] => alignment occured (ex f,d,f) */
1066 if (i < keep - 1 && is_float(vtop->type.t) && (plan2[i] <= plan2[i + 1])) {
1067 o(0xEEF00A40|(vfpr(plan2[i])<<12)|vfpr(plan2[i]));
1069 #endif
1071 save_regs(keep); /* save used temporary registers */
1072 keep++;
1073 if(ncrn) {
1074 int nb_regs=0;
1075 if (ncrn>4)
1076 ncrn=4;
1077 todo&=((1<<ncrn)-1);
1078 if(todo) {
1079 int i;
1080 o(0xE8BD0000|todo);
1081 for(i=0;i<4;i++)
1082 if(todo&(1<<i)) {
1083 vpushi(0);
1084 vtop->r=i;
1085 keep++;
1086 nb_regs++;
1089 args_size-=nb_regs*4;
1091 if(vfp_todo) {
1092 int nb_fregs=0;
1094 for(i=0;i<16;i++)
1095 if(vfp_todo&(1<<i)) {
1096 o(0xED9D0A00|(i&1)<<22|(i>>1)<<12|nb_fregs);
1097 vpushi(0);
1098 /* There might be 2 floats in a double VFP reg but that doesn't seem
1099 to matter */
1100 if (!(i%2))
1101 vtop->r=TREG_F0+i/2;
1102 keep++;
1103 nb_fregs++;
1105 if (nb_fregs) {
1106 gadd_sp(nb_fregs*4);
1107 args_size-=nb_fregs*4;
1110 vrotb(keep);
1111 gcall_or_jmp(0);
1112 if (args_size)
1113 gadd_sp(args_size);
1114 #ifdef TCC_ARM_EABI
1115 if((vtop->type.ref->type.t & VT_BTYPE) == VT_STRUCT
1116 && type_size(&vtop->type.ref->type, &align) <= 4)
1118 store(REG_IRET,vtop-keep);
1119 ++keep;
1121 #ifdef TCC_ARM_VFP
1122 #ifdef TCC_ARM_HARDFLOAT
1123 else if(variadic && is_float(vtop->type.ref->type.t)) {
1124 #else
1125 else if(is_float(vtop->type.ref->type.t)) {
1126 #endif
1127 if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1128 o(0xEE000A10); /* fmsr s0,r0 */
1129 } else {
1130 o(0xEE000B10); /* fmdlr d0,r0 */
1131 o(0xEE201B10); /* fmdhr d0,r1 */
1134 #endif
1135 #endif
1136 vtop-=keep;
1137 leaffunc = 0;
1140 /* generate function prolog of type 't' */
1141 void gfunc_prolog(CType *func_type)
1143 Sym *sym,*sym2;
1144 int n,nf,size,align, variadic, struct_ret = 0;
1145 #ifdef TCC_ARM_HARDFLOAT
1146 struct avail_regs avregs = AVAIL_REGS_INITIALIZER;
1147 #endif
1149 sym = func_type->ref;
1150 func_vt = sym->type;
1152 n = nf = 0;
1153 variadic = (func_type->ref->c == FUNC_ELLIPSIS);
1154 if((func_vt.t & VT_BTYPE) == VT_STRUCT
1155 && type_size(&func_vt,&align) > 4)
1157 n++;
1158 struct_ret = 1;
1159 func_vc = 12; /* Offset from fp of the place to store the result */
1161 for(sym2=sym->next;sym2 && (n<4 || nf<16);sym2=sym2->next) {
1162 size = type_size(&sym2->type, &align);
1163 #ifdef TCC_ARM_HARDFLOAT
1164 if (!variadic && (is_float(sym2->type.t)
1165 || is_float_hgen_aggr(&sym2->type))) {
1166 int tmpnf = assign_fpreg(&avregs, align, size) + 1;
1167 nf = (tmpnf > nf) ? tmpnf : nf;
1168 } else
1169 #endif
1170 if (n < 4)
1171 n += (size + 3) / 4;
1173 o(0xE1A0C00D); /* mov ip,sp */
1174 if(variadic)
1175 n=4;
1176 if(n) {
1177 if(n>4)
1178 n=4;
1179 #ifdef TCC_ARM_EABI
1180 n=(n+1)&-2;
1181 #endif
1182 o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1184 if (nf) {
1185 if (nf>16)
1186 nf=16;
1187 nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1188 o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1190 o(0xE92D5800); /* save fp, ip, lr */
1191 o(0xE1A0B00D); /* mov fp, sp */
1192 func_sub_sp_offset = ind;
1193 o(0xE1A00000); /* nop, leave space for stack adjustment in epilogue */
1195 int addr, pn = struct_ret, sn = 0; /* pn=core, sn=stack */
1197 #ifdef TCC_ARM_HARDFLOAT
1198 avregs = AVAIL_REGS_INITIALIZER;
1199 #endif
1200 while ((sym = sym->next)) {
1201 CType *type;
1202 type = &sym->type;
1203 size = type_size(type, &align);
1204 size = (size + 3) >> 2;
1205 #ifdef TCC_ARM_HARDFLOAT
1206 if (!variadic && (is_float(sym->type.t)
1207 || is_float_hgen_aggr(&sym->type))) {
1208 int fpn = assign_fpreg(&avregs, align, size << 2);
1209 if (fpn >= 0) {
1210 addr = fpn * 4;
1211 } else
1212 goto from_stack;
1213 } else
1214 #endif
1215 if (pn < 4) {
1216 #ifdef TCC_ARM_EABI
1217 pn = (pn + (align-1)/4) & -(align/4);
1218 #endif
1219 addr = (nf + pn) * 4;
1220 pn += size;
1221 if (!sn && pn > 4)
1222 sn = (pn - 4);
1223 } else {
1224 #ifdef TCC_ARM_HARDFLOAT
1225 from_stack:
1226 #endif
1227 #ifdef TCC_ARM_EABI
1228 sn = (sn + (align-1)/4) & -(align/4);
1229 #endif
1230 addr = (n + nf + sn) * 4;
1231 sn += size;
1233 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | lvalue_type(type->t), addr+12);
1236 last_itod_magic=0;
1237 leaffunc = 1;
1238 loc = 0;
1241 /* generate function epilog */
1242 void gfunc_epilog(void)
1244 uint32_t x;
1245 int diff;
1246 #ifdef TCC_ARM_EABI
1247 /* Useless but harmless copy of the float result into main register(s) in case
1248 of variadic function in the hardfloat variant */
1249 if(is_float(func_vt.t)) {
1250 if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1251 o(0xEE100A10); /* fmrs r0, s0 */
1252 else {
1253 o(0xEE100B10); /* fmrdl r0, d0 */
1254 o(0xEE301B10); /* fmrdh r1, d0 */
1257 #endif
1258 o(0xE89BA800); /* restore fp, sp, pc */
1259 diff = (-loc + 3) & -4;
1260 #ifdef TCC_ARM_EABI
1261 if(!leaffunc)
1262 diff = ((diff + 11) & -8) - 4;
1263 #endif
1264 if(diff > 0) {
1265 x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1266 if(x)
1267 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1268 else {
1269 int addr;
1270 addr=ind;
1271 o(0xE59FC004); /* ldr ip,[pc+4] */
1272 o(0xE04BD00C); /* sub sp,fp,ip */
1273 o(0xE1A0F00E); /* mov pc,lr */
1274 o(diff);
1275 *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1280 /* generate a jump to a label */
1281 int gjmp(int t)
1283 int r;
1284 r=ind;
1285 o(0xE0000000|encbranch(r,t,1));
1286 return r;
1289 /* generate a jump to a fixed address */
1290 void gjmp_addr(int a)
1292 gjmp(a);
1295 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1296 int gtst(int inv, int t)
1298 int v, r;
1299 uint32_t op;
1300 v = vtop->r & VT_VALMASK;
1301 r=ind;
1302 if (v == VT_CMP) {
1303 op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
1304 op|=encbranch(r,t,1);
1305 o(op);
1306 t=r;
1307 } else if (v == VT_JMP || v == VT_JMPI) {
1308 if ((v & 1) == inv) {
1309 if(!vtop->c.i)
1310 vtop->c.i=t;
1311 else {
1312 uint32_t *x;
1313 int p,lp;
1314 if(t) {
1315 p = vtop->c.i;
1316 do {
1317 p = decbranch(lp=p);
1318 } while(p);
1319 x = (uint32_t *)(cur_text_section->data + lp);
1320 *x &= 0xff000000;
1321 *x |= encbranch(lp,t,1);
1323 t = vtop->c.i;
1325 } else {
1326 t = gjmp(t);
1327 gsym(vtop->c.i);
1329 } else {
1330 if (is_float(vtop->type.t)) {
1331 r=gv(RC_FLOAT);
1332 #ifdef TCC_ARM_VFP
1333 o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */
1334 o(0xEEF1FA10); /* fmstat */
1335 #else
1336 o(0xEE90F118|(fpr(r)<<16));
1337 #endif
1338 vtop->r = VT_CMP;
1339 vtop->c.i = TOK_NE;
1340 return gtst(inv, t);
1341 } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1342 /* constant jmp optimization */
1343 if ((vtop->c.i != 0) != inv)
1344 t = gjmp(t);
1345 } else {
1346 v = gv(RC_INT);
1347 o(0xE3300000|(intr(v)<<16));
1348 vtop->r = VT_CMP;
1349 vtop->c.i = TOK_NE;
1350 return gtst(inv, t);
1353 vtop--;
1354 return t;
1357 /* generate an integer binary operation */
1358 void gen_opi(int op)
1360 int c, func = 0;
1361 uint32_t opc = 0, r, fr;
1362 unsigned short retreg = REG_IRET;
1364 c=0;
1365 switch(op) {
1366 case '+':
1367 opc = 0x8;
1368 c=1;
1369 break;
1370 case TOK_ADDC1: /* add with carry generation */
1371 opc = 0x9;
1372 c=1;
1373 break;
1374 case '-':
1375 opc = 0x4;
1376 c=1;
1377 break;
1378 case TOK_SUBC1: /* sub with carry generation */
1379 opc = 0x5;
1380 c=1;
1381 break;
1382 case TOK_ADDC2: /* add with carry use */
1383 opc = 0xA;
1384 c=1;
1385 break;
1386 case TOK_SUBC2: /* sub with carry use */
1387 opc = 0xC;
1388 c=1;
1389 break;
1390 case '&':
1391 opc = 0x0;
1392 c=1;
1393 break;
1394 case '^':
1395 opc = 0x2;
1396 c=1;
1397 break;
1398 case '|':
1399 opc = 0x18;
1400 c=1;
1401 break;
1402 case '*':
1403 gv2(RC_INT, RC_INT);
1404 r = vtop[-1].r;
1405 fr = vtop[0].r;
1406 vtop--;
1407 o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1408 return;
1409 case TOK_SHL:
1410 opc = 0;
1411 c=2;
1412 break;
1413 case TOK_SHR:
1414 opc = 1;
1415 c=2;
1416 break;
1417 case TOK_SAR:
1418 opc = 2;
1419 c=2;
1420 break;
1421 case '/':
1422 case TOK_PDIV:
1423 func=TOK___divsi3;
1424 c=3;
1425 break;
1426 case TOK_UDIV:
1427 func=TOK___udivsi3;
1428 c=3;
1429 break;
1430 case '%':
1431 #ifdef TCC_ARM_EABI
1432 func=TOK___aeabi_idivmod;
1433 retreg=REG_LRET;
1434 #else
1435 func=TOK___modsi3;
1436 #endif
1437 c=3;
1438 break;
1439 case TOK_UMOD:
1440 #ifdef TCC_ARM_EABI
1441 func=TOK___aeabi_uidivmod;
1442 retreg=REG_LRET;
1443 #else
1444 func=TOK___umodsi3;
1445 #endif
1446 c=3;
1447 break;
1448 case TOK_UMULL:
1449 gv2(RC_INT, RC_INT);
1450 r=intr(vtop[-1].r2=get_reg(RC_INT));
1451 c=vtop[-1].r;
1452 vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1453 vtop--;
1454 o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1455 return;
1456 default:
1457 opc = 0x15;
1458 c=1;
1459 break;
1461 switch(c) {
1462 case 1:
1463 if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1464 if(opc == 4 || opc == 5 || opc == 0xc) {
1465 vswap();
1466 opc|=2; // sub -> rsb
1469 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1470 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1471 gv(RC_INT);
1472 vswap();
1473 c=intr(gv(RC_INT));
1474 vswap();
1475 opc=0xE0000000|(opc<<20)|(c<<16);
1476 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1477 uint32_t x;
1478 x=stuff_const(opc|0x2000000,vtop->c.i);
1479 if(x) {
1480 r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1481 o(x|(r<<12));
1482 goto done;
1485 fr=intr(gv(RC_INT));
1486 r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1487 o(opc|(r<<12)|fr);
1488 done:
1489 vtop--;
1490 if (op >= TOK_ULT && op <= TOK_GT) {
1491 vtop->r = VT_CMP;
1492 vtop->c.i = op;
1494 break;
1495 case 2:
1496 opc=0xE1A00000|(opc<<5);
1497 if ((vtop->r & VT_VALMASK) == VT_CMP ||
1498 (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1499 gv(RC_INT);
1500 vswap();
1501 r=intr(gv(RC_INT));
1502 vswap();
1503 opc|=r;
1504 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1505 fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1506 c = vtop->c.i & 0x1f;
1507 o(opc|(c<<7)|(fr<<12));
1508 } else {
1509 fr=intr(gv(RC_INT));
1510 c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1511 o(opc|(c<<12)|(fr<<8)|0x10);
1513 vtop--;
1514 break;
1515 case 3:
1516 vpush_global_sym(&func_old_type, func);
1517 vrott(3);
1518 gfunc_call(2);
1519 vpushi(0);
1520 vtop->r = retreg;
1521 break;
1522 default:
1523 tcc_error("gen_opi %i unimplemented!",op);
1527 #ifdef TCC_ARM_VFP
1528 static int is_zero(int i)
1530 if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1531 return 0;
1532 if (vtop[i].type.t == VT_FLOAT)
1533 return (vtop[i].c.f == 0.f);
1534 else if (vtop[i].type.t == VT_DOUBLE)
1535 return (vtop[i].c.d == 0.0);
1536 return (vtop[i].c.ld == 0.l);
1539 /* generate a floating point operation 'v = t1 op t2' instruction. The
1540 * two operands are guaranted to have the same floating point type */
1541 void gen_opf(int op)
1543 uint32_t x;
1544 int fneg=0,r;
1545 x=0xEE000A00|T2CPR(vtop->type.t);
1546 switch(op) {
1547 case '+':
1548 if(is_zero(-1))
1549 vswap();
1550 if(is_zero(0)) {
1551 vtop--;
1552 return;
1554 x|=0x300000;
1555 break;
1556 case '-':
1557 x|=0x300040;
1558 if(is_zero(0)) {
1559 vtop--;
1560 return;
1562 if(is_zero(-1)) {
1563 x|=0x810000; /* fsubX -> fnegX */
1564 vswap();
1565 vtop--;
1566 fneg=1;
1568 break;
1569 case '*':
1570 x|=0x200000;
1571 break;
1572 case '/':
1573 x|=0x800000;
1574 break;
1575 default:
1576 if(op < TOK_ULT || op > TOK_GT) {
1577 tcc_error("unknown fp op %x!",op);
1578 return;
1580 if(is_zero(-1)) {
1581 vswap();
1582 switch(op) {
1583 case TOK_LT: op=TOK_GT; break;
1584 case TOK_GE: op=TOK_ULE; break;
1585 case TOK_LE: op=TOK_GE; break;
1586 case TOK_GT: op=TOK_ULT; break;
1589 x|=0xB40040; /* fcmpX */
1590 if(op!=TOK_EQ && op!=TOK_NE)
1591 x|=0x80; /* fcmpX -> fcmpeX */
1592 if(is_zero(0)) {
1593 vtop--;
1594 o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1595 } else {
1596 x|=vfpr(gv(RC_FLOAT));
1597 vswap();
1598 o(x|(vfpr(gv(RC_FLOAT))<<12));
1599 vtop--;
1601 o(0xEEF1FA10); /* fmstat */
1603 switch(op) {
1604 case TOK_LE: op=TOK_ULE; break;
1605 case TOK_LT: op=TOK_ULT; break;
1606 case TOK_UGE: op=TOK_GE; break;
1607 case TOK_UGT: op=TOK_GT; break;
1610 vtop->r = VT_CMP;
1611 vtop->c.i = op;
1612 return;
1614 r=gv(RC_FLOAT);
1615 x|=vfpr(r);
1616 r=regmask(r);
1617 if(!fneg) {
1618 int r2;
1619 vswap();
1620 r2=gv(RC_FLOAT);
1621 x|=vfpr(r2)<<16;
1622 r|=regmask(r2);
1624 vtop->r=get_reg_ex(RC_FLOAT,r);
1625 if(!fneg)
1626 vtop--;
1627 o(x|(vfpr(vtop->r)<<12));
1630 #else
1631 static uint32_t is_fconst()
1633 long double f;
1634 uint32_t r;
1635 if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1636 return 0;
1637 if (vtop->type.t == VT_FLOAT)
1638 f = vtop->c.f;
1639 else if (vtop->type.t == VT_DOUBLE)
1640 f = vtop->c.d;
1641 else
1642 f = vtop->c.ld;
1643 if(!ieee_finite(f))
1644 return 0;
1645 r=0x8;
1646 if(f<0.0) {
1647 r=0x18;
1648 f=-f;
1650 if(f==0.0)
1651 return r;
1652 if(f==1.0)
1653 return r|1;
1654 if(f==2.0)
1655 return r|2;
1656 if(f==3.0)
1657 return r|3;
1658 if(f==4.0)
1659 return r|4;
1660 if(f==5.0)
1661 return r|5;
1662 if(f==0.5)
1663 return r|6;
1664 if(f==10.0)
1665 return r|7;
1666 return 0;
1669 /* generate a floating point operation 'v = t1 op t2' instruction. The
1670 two operands are guaranted to have the same floating point type */
1671 void gen_opf(int op)
1673 uint32_t x, r, r2, c1, c2;
1674 //fputs("gen_opf\n",stderr);
1675 vswap();
1676 c1 = is_fconst();
1677 vswap();
1678 c2 = is_fconst();
1679 x=0xEE000100;
1680 #if LDOUBLE_SIZE == 8
1681 if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1682 x|=0x80;
1683 #else
1684 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1685 x|=0x80;
1686 else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1687 x|=0x80000;
1688 #endif
1689 switch(op)
1691 case '+':
1692 if(!c2) {
1693 vswap();
1694 c2=c1;
1696 vswap();
1697 r=fpr(gv(RC_FLOAT));
1698 vswap();
1699 if(c2) {
1700 if(c2>0xf)
1701 x|=0x200000; // suf
1702 r2=c2&0xf;
1703 } else {
1704 r2=fpr(gv(RC_FLOAT));
1706 break;
1707 case '-':
1708 if(c2) {
1709 if(c2<=0xf)
1710 x|=0x200000; // suf
1711 r2=c2&0xf;
1712 vswap();
1713 r=fpr(gv(RC_FLOAT));
1714 vswap();
1715 } else if(c1 && c1<=0xf) {
1716 x|=0x300000; // rsf
1717 r2=c1;
1718 r=fpr(gv(RC_FLOAT));
1719 vswap();
1720 } else {
1721 x|=0x200000; // suf
1722 vswap();
1723 r=fpr(gv(RC_FLOAT));
1724 vswap();
1725 r2=fpr(gv(RC_FLOAT));
1727 break;
1728 case '*':
1729 if(!c2 || c2>0xf) {
1730 vswap();
1731 c2=c1;
1733 vswap();
1734 r=fpr(gv(RC_FLOAT));
1735 vswap();
1736 if(c2 && c2<=0xf)
1737 r2=c2;
1738 else
1739 r2=fpr(gv(RC_FLOAT));
1740 x|=0x100000; // muf
1741 break;
1742 case '/':
1743 if(c2 && c2<=0xf) {
1744 x|=0x400000; // dvf
1745 r2=c2;
1746 vswap();
1747 r=fpr(gv(RC_FLOAT));
1748 vswap();
1749 } else if(c1 && c1<=0xf) {
1750 x|=0x500000; // rdf
1751 r2=c1;
1752 r=fpr(gv(RC_FLOAT));
1753 vswap();
1754 } else {
1755 x|=0x400000; // dvf
1756 vswap();
1757 r=fpr(gv(RC_FLOAT));
1758 vswap();
1759 r2=fpr(gv(RC_FLOAT));
1761 break;
1762 default:
1763 if(op >= TOK_ULT && op <= TOK_GT) {
1764 x|=0xd0f110; // cmfe
1765 /* bug (intention?) in Linux FPU emulator
1766 doesn't set carry if equal */
1767 switch(op) {
1768 case TOK_ULT:
1769 case TOK_UGE:
1770 case TOK_ULE:
1771 case TOK_UGT:
1772 tcc_error("unsigned comparision on floats?");
1773 break;
1774 case TOK_LT:
1775 op=TOK_Nset;
1776 break;
1777 case TOK_LE:
1778 op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
1779 break;
1780 case TOK_EQ:
1781 case TOK_NE:
1782 x&=~0x400000; // cmfe -> cmf
1783 break;
1785 if(c1 && !c2) {
1786 c2=c1;
1787 vswap();
1788 switch(op) {
1789 case TOK_Nset:
1790 op=TOK_GT;
1791 break;
1792 case TOK_GE:
1793 op=TOK_ULE;
1794 break;
1795 case TOK_ULE:
1796 op=TOK_GE;
1797 break;
1798 case TOK_GT:
1799 op=TOK_Nset;
1800 break;
1803 vswap();
1804 r=fpr(gv(RC_FLOAT));
1805 vswap();
1806 if(c2) {
1807 if(c2>0xf)
1808 x|=0x200000;
1809 r2=c2&0xf;
1810 } else {
1811 r2=fpr(gv(RC_FLOAT));
1813 vtop[-1].r = VT_CMP;
1814 vtop[-1].c.i = op;
1815 } else {
1816 tcc_error("unknown fp op %x!",op);
1817 return;
1820 if(vtop[-1].r == VT_CMP)
1821 c1=15;
1822 else {
1823 c1=vtop->r;
1824 if(r2&0x8)
1825 c1=vtop[-1].r;
1826 vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
1827 c1=fpr(vtop[-1].r);
1829 vtop--;
1830 o(x|(r<<16)|(c1<<12)|r2);
1832 #endif
1834 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
1835 and 'long long' cases. */
1836 ST_FUNC void gen_cvt_itof1(int t)
1838 uint32_t r, r2;
1839 int bt;
1840 bt=vtop->type.t & VT_BTYPE;
1841 if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
1842 #ifndef TCC_ARM_VFP
1843 uint32_t dsize = 0;
1844 #endif
1845 r=intr(gv(RC_INT));
1846 #ifdef TCC_ARM_VFP
1847 r2=vfpr(vtop->r=get_reg(RC_FLOAT));
1848 o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
1849 r2|=r2<<12;
1850 if(!(vtop->type.t & VT_UNSIGNED))
1851 r2|=0x80; /* fuitoX -> fsituX */
1852 o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
1853 #else
1854 r2=fpr(vtop->r=get_reg(RC_FLOAT));
1855 if((t & VT_BTYPE) != VT_FLOAT)
1856 dsize=0x80; /* flts -> fltd */
1857 o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
1858 if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
1859 uint32_t off = 0;
1860 o(0xE3500000|(r<<12)); /* cmp */
1861 r=fpr(get_reg(RC_FLOAT));
1862 if(last_itod_magic) {
1863 off=ind+8-last_itod_magic;
1864 off/=4;
1865 if(off>255)
1866 off=0;
1868 o(0xBD1F0100|(r<<12)|off); /* ldflts */
1869 if(!off) {
1870 o(0xEA000000); /* b */
1871 last_itod_magic=ind;
1872 o(0x4F800000); /* 4294967296.0f */
1874 o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
1876 #endif
1877 return;
1878 } else if(bt == VT_LLONG) {
1879 int func;
1880 CType *func_type = 0;
1881 if((t & VT_BTYPE) == VT_FLOAT) {
1882 func_type = &func_float_type;
1883 if(vtop->type.t & VT_UNSIGNED)
1884 func=TOK___floatundisf;
1885 else
1886 func=TOK___floatdisf;
1887 #if LDOUBLE_SIZE != 8
1888 } else if((t & VT_BTYPE) == VT_LDOUBLE) {
1889 func_type = &func_ldouble_type;
1890 if(vtop->type.t & VT_UNSIGNED)
1891 func=TOK___floatundixf;
1892 else
1893 func=TOK___floatdixf;
1894 } else if((t & VT_BTYPE) == VT_DOUBLE) {
1895 #else
1896 } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
1897 #endif
1898 func_type = &func_double_type;
1899 if(vtop->type.t & VT_UNSIGNED)
1900 func=TOK___floatundidf;
1901 else
1902 func=TOK___floatdidf;
1904 if(func_type) {
1905 vpush_global_sym(func_type, func);
1906 vswap();
1907 gfunc_call(1);
1908 vpushi(0);
1909 vtop->r=TREG_F0;
1910 return;
1913 tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
1916 /* convert fp to int 't' type */
1917 void gen_cvt_ftoi(int t)
1919 uint32_t r, r2;
1920 int u, func = 0;
1921 u=t&VT_UNSIGNED;
1922 t&=VT_BTYPE;
1923 r2=vtop->type.t & VT_BTYPE;
1924 if(t==VT_INT) {
1925 #ifdef TCC_ARM_VFP
1926 r=vfpr(gv(RC_FLOAT));
1927 u=u?0:0x10000;
1928 o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
1929 r2=intr(vtop->r=get_reg(RC_INT));
1930 o(0xEE100A10|(r<<16)|(r2<<12));
1931 return;
1932 #else
1933 if(u) {
1934 if(r2 == VT_FLOAT)
1935 func=TOK___fixunssfsi;
1936 #if LDOUBLE_SIZE != 8
1937 else if(r2 == VT_LDOUBLE)
1938 func=TOK___fixunsxfsi;
1939 else if(r2 == VT_DOUBLE)
1940 #else
1941 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1942 #endif
1943 func=TOK___fixunsdfsi;
1944 } else {
1945 r=fpr(gv(RC_FLOAT));
1946 r2=intr(vtop->r=get_reg(RC_INT));
1947 o(0xEE100170|(r2<<12)|r);
1948 return;
1950 #endif
1951 } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
1952 if(r2 == VT_FLOAT)
1953 func=TOK___fixsfdi;
1954 #if LDOUBLE_SIZE != 8
1955 else if(r2 == VT_LDOUBLE)
1956 func=TOK___fixxfdi;
1957 else if(r2 == VT_DOUBLE)
1958 #else
1959 else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
1960 #endif
1961 func=TOK___fixdfdi;
1963 if(func) {
1964 vpush_global_sym(&func_old_type, func);
1965 vswap();
1966 gfunc_call(1);
1967 vpushi(0);
1968 if(t == VT_LLONG)
1969 vtop->r2 = REG_LRET;
1970 vtop->r = REG_IRET;
1971 return;
1973 tcc_error("unimplemented gen_cvt_ftoi!");
1976 /* convert from one floating point type to another */
1977 void gen_cvt_ftof(int t)
1979 #ifdef TCC_ARM_VFP
1980 if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
1981 uint32_t r = vfpr(gv(RC_FLOAT));
1982 o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
1984 #else
1985 /* all we have to do on i386 and FPA ARM is to put the float in a register */
1986 gv(RC_FLOAT);
1987 #endif
1990 /* computed goto support */
1991 void ggoto(void)
1993 gcall_or_jmp(1);
1994 vtop--;
1997 /* end of ARM code generator */
1998 /*************************************************************/
1999 #endif
2000 /*************************************************************/