inline asm: Fix 'P' and accept some r<nr> registers
[tinycc.git] / lib / libtcc1.c
blob3b6d6180c0b4baef56e8c99bee1be5e97993bea7
1 /* TCC runtime library.
2 Parts of this code are (c) 2002 Fabrice Bellard
4 Copyright (C) 1987, 1988, 1992, 1994, 1995 Free Software Foundation, Inc.
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 In addition to the permissions in the GNU General Public License, the
12 Free Software Foundation gives you unlimited permission to link the
13 compiled version of this file into combinations with other programs,
14 and to distribute those combinations without any restriction coming
15 from the use of this file. (The General Public License restrictions
16 do apply in other respects; for example, they cover modification of
17 the file, and distribution when not linked into a combine
18 executable.)
20 This file is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; see the file COPYING. If not, write to
27 the Free Software Foundation, 59 Temple Place - Suite 330,
28 Boston, MA 02111-1307, USA.
31 #define W_TYPE_SIZE 32
32 #define BITS_PER_UNIT 8
34 typedef int Wtype;
35 typedef unsigned int UWtype;
36 typedef unsigned int USItype;
37 typedef long long DWtype;
38 typedef unsigned long long UDWtype;
40 struct DWstruct {
41 Wtype low, high;
44 typedef union
46 struct DWstruct s;
47 DWtype ll;
48 } DWunion;
50 typedef long double XFtype;
51 #define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
52 #define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE)
54 /* the following deal with IEEE single-precision numbers */
55 #define EXCESS 126
56 #define SIGNBIT 0x80000000
57 #define HIDDEN (1 << 23)
58 #define SIGN(fp) ((fp) & SIGNBIT)
59 #define EXP(fp) (((fp) >> 23) & 0xFF)
60 #define MANT(fp) (((fp) & 0x7FFFFF) | HIDDEN)
61 #define PACK(s,e,m) ((s) | ((e) << 23) | (m))
63 /* the following deal with IEEE double-precision numbers */
64 #define EXCESSD 1022
65 #define HIDDEND (1 << 20)
66 #define EXPD(fp) (((fp.l.upper) >> 20) & 0x7FF)
67 #define SIGND(fp) ((fp.l.upper) & SIGNBIT)
68 #define MANTD(fp) (((((fp.l.upper) & 0xFFFFF) | HIDDEND) << 10) | \
69 (fp.l.lower >> 22))
70 #define HIDDEND_LL ((long long)1 << 52)
71 #define MANTD_LL(fp) ((fp.ll & (HIDDEND_LL-1)) | HIDDEND_LL)
72 #define PACKD_LL(s,e,m) (((long long)((s)+((e)<<20))<<32)|(m))
74 /* the following deal with x86 long double-precision numbers */
75 #define EXCESSLD 16382
76 #define EXPLD(fp) (fp.l.upper & 0x7fff)
77 #define SIGNLD(fp) ((fp.l.upper) & 0x8000)
79 /* only for x86 */
80 union ldouble_long {
81 long double ld;
82 struct {
83 unsigned long long lower;
84 unsigned short upper;
85 } l;
88 union double_long {
89 double d;
90 #if 1
91 struct {
92 unsigned int lower;
93 int upper;
94 } l;
95 #else
96 struct {
97 int upper;
98 unsigned int lower;
99 } l;
100 #endif
101 long long ll;
104 union float_long {
105 float f;
106 unsigned int l;
109 /* XXX: we don't support several builtin supports for now */
110 #if !defined(TCC_TARGET_X86_64) && !defined(TCC_TARGET_ARM)
112 /* XXX: use gcc/tcc intrinsic ? */
113 #if defined(TCC_TARGET_I386)
114 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
115 __asm__ ("subl %5,%1\n\tsbbl %3,%0" \
116 : "=r" ((USItype) (sh)), \
117 "=&r" ((USItype) (sl)) \
118 : "0" ((USItype) (ah)), \
119 "g" ((USItype) (bh)), \
120 "1" ((USItype) (al)), \
121 "g" ((USItype) (bl)))
122 #define umul_ppmm(w1, w0, u, v) \
123 __asm__ ("mull %3" \
124 : "=a" ((USItype) (w0)), \
125 "=d" ((USItype) (w1)) \
126 : "%0" ((USItype) (u)), \
127 "rm" ((USItype) (v)))
128 #define udiv_qrnnd(q, r, n1, n0, dv) \
129 __asm__ ("divl %4" \
130 : "=a" ((USItype) (q)), \
131 "=d" ((USItype) (r)) \
132 : "0" ((USItype) (n0)), \
133 "1" ((USItype) (n1)), \
134 "rm" ((USItype) (dv)))
135 #define count_leading_zeros(count, x) \
136 do { \
137 USItype __cbtmp; \
138 __asm__ ("bsrl %1,%0" \
139 : "=r" (__cbtmp) : "rm" ((USItype) (x))); \
140 (count) = __cbtmp ^ 31; \
141 } while (0)
142 #else
143 #error unsupported CPU type
144 #endif
146 /* most of this code is taken from libgcc2.c from gcc */
148 static UDWtype __udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
150 DWunion ww;
151 DWunion nn, dd;
152 DWunion rr;
153 UWtype d0, d1, n0, n1, n2;
154 UWtype q0, q1;
155 UWtype b, bm;
157 nn.ll = n;
158 dd.ll = d;
160 d0 = dd.s.low;
161 d1 = dd.s.high;
162 n0 = nn.s.low;
163 n1 = nn.s.high;
165 #if !defined(UDIV_NEEDS_NORMALIZATION)
166 if (d1 == 0)
168 if (d0 > n1)
170 /* 0q = nn / 0D */
172 udiv_qrnnd (q0, n0, n1, n0, d0);
173 q1 = 0;
175 /* Remainder in n0. */
177 else
179 /* qq = NN / 0d */
181 if (d0 == 0)
182 d0 = 1 / d0; /* Divide intentionally by zero. */
184 udiv_qrnnd (q1, n1, 0, n1, d0);
185 udiv_qrnnd (q0, n0, n1, n0, d0);
187 /* Remainder in n0. */
190 if (rp != 0)
192 rr.s.low = n0;
193 rr.s.high = 0;
194 *rp = rr.ll;
198 #else /* UDIV_NEEDS_NORMALIZATION */
200 if (d1 == 0)
202 if (d0 > n1)
204 /* 0q = nn / 0D */
206 count_leading_zeros (bm, d0);
208 if (bm != 0)
210 /* Normalize, i.e. make the most significant bit of the
211 denominator set. */
213 d0 = d0 << bm;
214 n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
215 n0 = n0 << bm;
218 udiv_qrnnd (q0, n0, n1, n0, d0);
219 q1 = 0;
221 /* Remainder in n0 >> bm. */
223 else
225 /* qq = NN / 0d */
227 if (d0 == 0)
228 d0 = 1 / d0; /* Divide intentionally by zero. */
230 count_leading_zeros (bm, d0);
232 if (bm == 0)
234 /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
235 conclude (the most significant bit of n1 is set) /\ (the
236 leading quotient digit q1 = 1).
238 This special case is necessary, not an optimization.
239 (Shifts counts of W_TYPE_SIZE are undefined.) */
241 n1 -= d0;
242 q1 = 1;
244 else
246 /* Normalize. */
248 b = W_TYPE_SIZE - bm;
250 d0 = d0 << bm;
251 n2 = n1 >> b;
252 n1 = (n1 << bm) | (n0 >> b);
253 n0 = n0 << bm;
255 udiv_qrnnd (q1, n1, n2, n1, d0);
258 /* n1 != d0... */
260 udiv_qrnnd (q0, n0, n1, n0, d0);
262 /* Remainder in n0 >> bm. */
265 if (rp != 0)
267 rr.s.low = n0 >> bm;
268 rr.s.high = 0;
269 *rp = rr.ll;
272 #endif /* UDIV_NEEDS_NORMALIZATION */
274 else
276 if (d1 > n1)
278 /* 00 = nn / DD */
280 q0 = 0;
281 q1 = 0;
283 /* Remainder in n1n0. */
284 if (rp != 0)
286 rr.s.low = n0;
287 rr.s.high = n1;
288 *rp = rr.ll;
291 else
293 /* 0q = NN / dd */
295 count_leading_zeros (bm, d1);
296 if (bm == 0)
298 /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
299 conclude (the most significant bit of n1 is set) /\ (the
300 quotient digit q0 = 0 or 1).
302 This special case is necessary, not an optimization. */
304 /* The condition on the next line takes advantage of that
305 n1 >= d1 (true due to program flow). */
306 if (n1 > d1 || n0 >= d0)
308 q0 = 1;
309 sub_ddmmss (n1, n0, n1, n0, d1, d0);
311 else
312 q0 = 0;
314 q1 = 0;
316 if (rp != 0)
318 rr.s.low = n0;
319 rr.s.high = n1;
320 *rp = rr.ll;
323 else
325 UWtype m1, m0;
326 /* Normalize. */
328 b = W_TYPE_SIZE - bm;
330 d1 = (d1 << bm) | (d0 >> b);
331 d0 = d0 << bm;
332 n2 = n1 >> b;
333 n1 = (n1 << bm) | (n0 >> b);
334 n0 = n0 << bm;
336 udiv_qrnnd (q0, n1, n2, n1, d1);
337 umul_ppmm (m1, m0, q0, d0);
339 if (m1 > n1 || (m1 == n1 && m0 > n0))
341 q0--;
342 sub_ddmmss (m1, m0, m1, m0, d1, d0);
345 q1 = 0;
347 /* Remainder in (n1n0 - m1m0) >> bm. */
348 if (rp != 0)
350 sub_ddmmss (n1, n0, n1, n0, m1, m0);
351 rr.s.low = (n1 << b) | (n0 >> bm);
352 rr.s.high = n1 >> bm;
353 *rp = rr.ll;
359 ww.s.low = q0;
360 ww.s.high = q1;
361 return ww.ll;
364 #define __negdi2(a) (-(a))
366 long long __divdi3(long long u, long long v)
368 int c = 0;
369 DWunion uu, vv;
370 DWtype w;
372 uu.ll = u;
373 vv.ll = v;
375 if (uu.s.high < 0) {
376 c = ~c;
377 uu.ll = __negdi2 (uu.ll);
379 if (vv.s.high < 0) {
380 c = ~c;
381 vv.ll = __negdi2 (vv.ll);
383 w = __udivmoddi4 (uu.ll, vv.ll, (UDWtype *) 0);
384 if (c)
385 w = __negdi2 (w);
386 return w;
389 long long __moddi3(long long u, long long v)
391 int c = 0;
392 DWunion uu, vv;
393 DWtype w;
395 uu.ll = u;
396 vv.ll = v;
398 if (uu.s.high < 0) {
399 c = ~c;
400 uu.ll = __negdi2 (uu.ll);
402 if (vv.s.high < 0)
403 vv.ll = __negdi2 (vv.ll);
405 __udivmoddi4 (uu.ll, vv.ll, (UDWtype *) &w);
406 if (c)
407 w = __negdi2 (w);
408 return w;
411 unsigned long long __udivdi3(unsigned long long u, unsigned long long v)
413 return __udivmoddi4 (u, v, (UDWtype *) 0);
416 unsigned long long __umoddi3(unsigned long long u, unsigned long long v)
418 UDWtype w;
420 __udivmoddi4 (u, v, &w);
421 return w;
424 /* XXX: fix tcc's code generator to do this instead */
425 long long __ashrdi3(long long a, int b)
427 #ifdef __TINYC__
428 DWunion u;
429 u.ll = a;
430 if (b >= 32) {
431 u.s.low = u.s.high >> (b - 32);
432 u.s.high = u.s.high >> 31;
433 } else if (b != 0) {
434 u.s.low = ((unsigned)u.s.low >> b) | (u.s.high << (32 - b));
435 u.s.high = u.s.high >> b;
437 return u.ll;
438 #else
439 return a >> b;
440 #endif
443 /* XXX: fix tcc's code generator to do this instead */
444 unsigned long long __lshrdi3(unsigned long long a, int b)
446 #ifdef __TINYC__
447 DWunion u;
448 u.ll = a;
449 if (b >= 32) {
450 u.s.low = (unsigned)u.s.high >> (b - 32);
451 u.s.high = 0;
452 } else if (b != 0) {
453 u.s.low = ((unsigned)u.s.low >> b) | (u.s.high << (32 - b));
454 u.s.high = (unsigned)u.s.high >> b;
456 return u.ll;
457 #else
458 return a >> b;
459 #endif
462 /* XXX: fix tcc's code generator to do this instead */
463 long long __ashldi3(long long a, int b)
465 #ifdef __TINYC__
466 DWunion u;
467 u.ll = a;
468 if (b >= 32) {
469 u.s.high = (unsigned)u.s.low << (b - 32);
470 u.s.low = 0;
471 } else if (b != 0) {
472 u.s.high = ((unsigned)u.s.high << b) | ((unsigned)u.s.low >> (32 - b));
473 u.s.low = (unsigned)u.s.low << b;
475 return u.ll;
476 #else
477 return a << b;
478 #endif
481 long long __tcc_cvt_ftol(long double x)
483 unsigned c0, c1;
484 long long ret;
485 __asm__ __volatile__ ("fnstcw %0" : "=m" (c0));
486 c1 = c0 | 0x0C00;
487 __asm__ __volatile__ ("fldcw %0" : : "m" (c1));
488 __asm__ __volatile__ ("fistpll %0" : "=m" (ret));
489 __asm__ __volatile__ ("fldcw %0" : : "m" (c0));
490 return ret;
493 #endif /* !__x86_64__ */
495 /* XXX: fix tcc's code generator to do this instead */
496 float __floatundisf(unsigned long long a)
498 DWunion uu;
499 XFtype r;
501 uu.ll = a;
502 if (uu.s.high >= 0) {
503 return (float)uu.ll;
504 } else {
505 r = (XFtype)uu.ll;
506 r += 18446744073709551616.0;
507 return (float)r;
511 double __floatundidf(unsigned long long a)
513 DWunion uu;
514 XFtype r;
516 uu.ll = a;
517 if (uu.s.high >= 0) {
518 return (double)uu.ll;
519 } else {
520 r = (XFtype)uu.ll;
521 r += 18446744073709551616.0;
522 return (double)r;
526 long double __floatundixf(unsigned long long a)
528 DWunion uu;
529 XFtype r;
531 uu.ll = a;
532 if (uu.s.high >= 0) {
533 return (long double)uu.ll;
534 } else {
535 r = (XFtype)uu.ll;
536 r += 18446744073709551616.0;
537 return (long double)r;
541 unsigned long long __fixunssfdi (float a1)
543 register union float_long fl1;
544 register int exp;
545 register unsigned long l;
547 fl1.f = a1;
549 if (fl1.l == 0)
550 return (0);
552 exp = EXP (fl1.l) - EXCESS - 24;
554 l = MANT(fl1.l);
555 if (exp >= 41)
556 return (unsigned long long)-1;
557 else if (exp >= 0)
558 return (unsigned long long)l << exp;
559 else if (exp >= -23)
560 return l >> -exp;
561 else
562 return 0;
565 unsigned long long __fixunsdfdi (double a1)
567 register union double_long dl1;
568 register int exp;
569 register unsigned long long l;
571 dl1.d = a1;
573 if (dl1.ll == 0)
574 return (0);
576 exp = EXPD (dl1) - EXCESSD - 53;
578 l = MANTD_LL(dl1);
580 if (exp >= 12)
581 return (unsigned long long)-1;
582 else if (exp >= 0)
583 return l << exp;
584 else if (exp >= -52)
585 return l >> -exp;
586 else
587 return 0;
590 unsigned long long __fixunsxfdi (long double a1)
592 register union ldouble_long dl1;
593 register int exp;
594 register unsigned long long l;
596 dl1.ld = a1;
598 if (dl1.l.lower == 0 && dl1.l.upper == 0)
599 return (0);
601 exp = EXPLD (dl1) - EXCESSLD - 64;
603 l = dl1.l.lower;
605 if (exp > 0)
606 return (unsigned long long)-1;
607 else if (exp >= -63)
608 return l >> -exp;
609 else
610 return 0;
613 long long __fixsfdi (float a1)
615 long long ret; int s;
616 ret = __fixunssfdi((s = a1 >= 0) ? a1 : -a1);
617 return s ? ret : -ret;
620 long long __fixdfdi (double a1)
622 long long ret; int s;
623 ret = __fixunsdfdi((s = a1 >= 0) ? a1 : -a1);
624 return s ? ret : -ret;
627 long long __fixxfdi (long double a1)
629 long long ret; int s;
630 ret = __fixunsxfdi((s = a1 >= 0) ? a1 : -a1);
631 return s ? ret : -ret;
634 #if defined(TCC_TARGET_X86_64) && !defined(_WIN64)
636 #ifndef __TINYC__
637 # include <stdlib.h>
638 # include <stdio.h>
639 # include <string.h>
640 # undef __va_start
641 # undef __va_arg
642 # undef __va_copy
643 # undef __va_end
644 #else
645 /* Avoid include files, they may not be available when cross compiling */
646 extern void *memset(void *s, int c, __SIZE_TYPE__ n);
647 extern void abort(void);
648 #endif
650 /* This should be in sync with our include/stdarg.h */
651 enum __va_arg_type {
652 __va_gen_reg, __va_float_reg, __va_stack
655 /* GCC compatible definition of va_list. */
656 typedef struct {
657 unsigned int gp_offset;
658 unsigned int fp_offset;
659 union {
660 unsigned int overflow_offset;
661 char *overflow_arg_area;
663 char *reg_save_area;
664 } __va_list_struct;
666 void __va_start(__va_list_struct *ap, void *fp)
668 memset(ap, 0, sizeof(__va_list_struct));
669 *ap = *(__va_list_struct *)((char *)fp - 16);
670 ap->overflow_arg_area = (char *)fp + ap->overflow_offset;
671 ap->reg_save_area = (char *)fp - 176 - 16;
674 void *__va_arg(__va_list_struct *ap,
675 enum __va_arg_type arg_type,
676 int size, int align)
678 size = (size + 7) & ~7;
679 align = (align + 7) & ~7;
680 switch (arg_type) {
681 case __va_gen_reg:
682 if (ap->gp_offset + size <= 48) {
683 ap->gp_offset += size;
684 return ap->reg_save_area + ap->gp_offset - size;
686 goto use_overflow_area;
688 case __va_float_reg:
689 if (ap->fp_offset < 128 + 48) {
690 ap->fp_offset += 16;
691 return ap->reg_save_area + ap->fp_offset - 16;
693 size = 8;
694 goto use_overflow_area;
696 case __va_stack:
697 use_overflow_area:
698 ap->overflow_arg_area += size;
699 ap->overflow_arg_area = (char*)((long long)(ap->overflow_arg_area + align - 1) & -align);
700 return ap->overflow_arg_area - size;
702 default: /* should never happen */
703 #ifndef __TINYC__
704 fprintf(stderr, "unknown ABI type for __va_arg\n");
705 #endif
706 abort();
709 #endif /* __x86_64__ */
711 #ifdef TCC_TARGET_ARM
712 #define _GNU_SOURCE
713 #include <unistd.h>
714 #include <sys/syscall.h>
715 #include <stdio.h>
717 /* Flushing for tccrun */
718 void __clear_cache(void *beginning, void *end)
720 /* __ARM_NR_cacheflush is kernel private and should not be used in user space.
721 * However, there is no ARM asm parser in tcc so we use it for now */
722 #if 1
723 syscall(__ARM_NR_cacheflush, beginning, end, 0);
724 #else
725 __asm__ ("push {r7}\n\t"
726 "mov r7, #0xf0002\n\t"
727 "mov r2, #0\n\t"
728 "swi 0\n\t"
729 "pop {r7}\n\t"
730 "ret");
731 #endif
733 #endif /* arm */