Sorted out CMake on x86-64 and fixed silly XMM# bug introduced when working on Win64...
[tinycc.git] / lib / libtcc1.c
blob159c401c1bf3af48f45dd8d640196992319dc26e
1 /* TCC runtime library.
2 Parts of this code are (c) 2002 Fabrice Bellard
4 Copyright (C) 1987, 1988, 1992, 1994, 1995 Free Software Foundation, Inc.
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 In addition to the permissions in the GNU General Public License, the
12 Free Software Foundation gives you unlimited permission to link the
13 compiled version of this file into combinations with other programs,
14 and to distribute those combinations without any restriction coming
15 from the use of this file. (The General Public License restrictions
16 do apply in other respects; for example, they cover modification of
17 the file, and distribution when not linked into a combine
18 executable.)
20 This file is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; see the file COPYING. If not, write to
27 the Free Software Foundation, 59 Temple Place - Suite 330,
28 Boston, MA 02111-1307, USA.
31 #define W_TYPE_SIZE 32
32 #define BITS_PER_UNIT 8
34 typedef int Wtype;
35 typedef unsigned int UWtype;
36 typedef unsigned int USItype;
37 typedef long long DWtype;
38 typedef unsigned long long UDWtype;
40 struct DWstruct {
41 Wtype low, high;
44 typedef union
46 struct DWstruct s;
47 DWtype ll;
48 } DWunion;
50 typedef long double XFtype;
51 #define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
52 #define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE)
54 /* the following deal with IEEE single-precision numbers */
55 #define EXCESS 126
56 #define SIGNBIT 0x80000000
57 #define HIDDEN (1 << 23)
58 #define SIGN(fp) ((fp) & SIGNBIT)
59 #define EXP(fp) (((fp) >> 23) & 0xFF)
60 #define MANT(fp) (((fp) & 0x7FFFFF) | HIDDEN)
61 #define PACK(s,e,m) ((s) | ((e) << 23) | (m))
63 /* the following deal with IEEE double-precision numbers */
64 #define EXCESSD 1022
65 #define HIDDEND (1 << 20)
66 #define EXPD(fp) (((fp.l.upper) >> 20) & 0x7FF)
67 #define SIGND(fp) ((fp.l.upper) & SIGNBIT)
68 #define MANTD(fp) (((((fp.l.upper) & 0xFFFFF) | HIDDEND) << 10) | \
69 (fp.l.lower >> 22))
70 #define HIDDEND_LL ((long long)1 << 52)
71 #define MANTD_LL(fp) ((fp.ll & (HIDDEND_LL-1)) | HIDDEND_LL)
72 #define PACKD_LL(s,e,m) (((long long)((s)+((e)<<20))<<32)|(m))
74 /* the following deal with x86 long double-precision numbers */
75 #define EXCESSLD 16382
76 #define EXPLD(fp) (fp.l.upper & 0x7fff)
77 #define SIGNLD(fp) ((fp.l.upper) & 0x8000)
79 /* only for x86 */
80 union ldouble_long {
81 long double ld;
82 struct {
83 unsigned long long lower;
84 unsigned short upper;
85 } l;
88 union double_long {
89 double d;
90 #if 1
91 struct {
92 unsigned int lower;
93 int upper;
94 } l;
95 #else
96 struct {
97 int upper;
98 unsigned int lower;
99 } l;
100 #endif
101 long long ll;
104 union float_long {
105 float f;
106 long l;
109 /* XXX: we don't support several builtin supports for now */
110 #ifndef __x86_64__
112 /* XXX: use gcc/tcc intrinsic ? */
113 #if defined(__i386__)
114 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
115 __asm__ ("subl %5,%1\n\tsbbl %3,%0" \
116 : "=r" ((USItype) (sh)), \
117 "=&r" ((USItype) (sl)) \
118 : "0" ((USItype) (ah)), \
119 "g" ((USItype) (bh)), \
120 "1" ((USItype) (al)), \
121 "g" ((USItype) (bl)))
122 #define umul_ppmm(w1, w0, u, v) \
123 __asm__ ("mull %3" \
124 : "=a" ((USItype) (w0)), \
125 "=d" ((USItype) (w1)) \
126 : "%0" ((USItype) (u)), \
127 "rm" ((USItype) (v)))
128 #define udiv_qrnnd(q, r, n1, n0, dv) \
129 __asm__ ("divl %4" \
130 : "=a" ((USItype) (q)), \
131 "=d" ((USItype) (r)) \
132 : "0" ((USItype) (n0)), \
133 "1" ((USItype) (n1)), \
134 "rm" ((USItype) (dv)))
135 #define count_leading_zeros(count, x) \
136 do { \
137 USItype __cbtmp; \
138 __asm__ ("bsrl %1,%0" \
139 : "=r" (__cbtmp) : "rm" ((USItype) (x))); \
140 (count) = __cbtmp ^ 31; \
141 } while (0)
142 #else
143 #error unsupported CPU type
144 #endif
146 /* most of this code is taken from libgcc2.c from gcc */
148 static UDWtype __udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
150 DWunion ww;
151 DWunion nn, dd;
152 DWunion rr;
153 UWtype d0, d1, n0, n1, n2;
154 UWtype q0, q1;
155 UWtype b, bm;
157 nn.ll = n;
158 dd.ll = d;
160 d0 = dd.s.low;
161 d1 = dd.s.high;
162 n0 = nn.s.low;
163 n1 = nn.s.high;
165 #if !defined(UDIV_NEEDS_NORMALIZATION)
166 if (d1 == 0)
168 if (d0 > n1)
170 /* 0q = nn / 0D */
172 udiv_qrnnd (q0, n0, n1, n0, d0);
173 q1 = 0;
175 /* Remainder in n0. */
177 else
179 /* qq = NN / 0d */
181 if (d0 == 0)
182 d0 = 1 / d0; /* Divide intentionally by zero. */
184 udiv_qrnnd (q1, n1, 0, n1, d0);
185 udiv_qrnnd (q0, n0, n1, n0, d0);
187 /* Remainder in n0. */
190 if (rp != 0)
192 rr.s.low = n0;
193 rr.s.high = 0;
194 *rp = rr.ll;
198 #else /* UDIV_NEEDS_NORMALIZATION */
200 if (d1 == 0)
202 if (d0 > n1)
204 /* 0q = nn / 0D */
206 count_leading_zeros (bm, d0);
208 if (bm != 0)
210 /* Normalize, i.e. make the most significant bit of the
211 denominator set. */
213 d0 = d0 << bm;
214 n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
215 n0 = n0 << bm;
218 udiv_qrnnd (q0, n0, n1, n0, d0);
219 q1 = 0;
221 /* Remainder in n0 >> bm. */
223 else
225 /* qq = NN / 0d */
227 if (d0 == 0)
228 d0 = 1 / d0; /* Divide intentionally by zero. */
230 count_leading_zeros (bm, d0);
232 if (bm == 0)
234 /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
235 conclude (the most significant bit of n1 is set) /\ (the
236 leading quotient digit q1 = 1).
238 This special case is necessary, not an optimization.
239 (Shifts counts of W_TYPE_SIZE are undefined.) */
241 n1 -= d0;
242 q1 = 1;
244 else
246 /* Normalize. */
248 b = W_TYPE_SIZE - bm;
250 d0 = d0 << bm;
251 n2 = n1 >> b;
252 n1 = (n1 << bm) | (n0 >> b);
253 n0 = n0 << bm;
255 udiv_qrnnd (q1, n1, n2, n1, d0);
258 /* n1 != d0... */
260 udiv_qrnnd (q0, n0, n1, n0, d0);
262 /* Remainder in n0 >> bm. */
265 if (rp != 0)
267 rr.s.low = n0 >> bm;
268 rr.s.high = 0;
269 *rp = rr.ll;
272 #endif /* UDIV_NEEDS_NORMALIZATION */
274 else
276 if (d1 > n1)
278 /* 00 = nn / DD */
280 q0 = 0;
281 q1 = 0;
283 /* Remainder in n1n0. */
284 if (rp != 0)
286 rr.s.low = n0;
287 rr.s.high = n1;
288 *rp = rr.ll;
291 else
293 /* 0q = NN / dd */
295 count_leading_zeros (bm, d1);
296 if (bm == 0)
298 /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
299 conclude (the most significant bit of n1 is set) /\ (the
300 quotient digit q0 = 0 or 1).
302 This special case is necessary, not an optimization. */
304 /* The condition on the next line takes advantage of that
305 n1 >= d1 (true due to program flow). */
306 if (n1 > d1 || n0 >= d0)
308 q0 = 1;
309 sub_ddmmss (n1, n0, n1, n0, d1, d0);
311 else
312 q0 = 0;
314 q1 = 0;
316 if (rp != 0)
318 rr.s.low = n0;
319 rr.s.high = n1;
320 *rp = rr.ll;
323 else
325 UWtype m1, m0;
326 /* Normalize. */
328 b = W_TYPE_SIZE - bm;
330 d1 = (d1 << bm) | (d0 >> b);
331 d0 = d0 << bm;
332 n2 = n1 >> b;
333 n1 = (n1 << bm) | (n0 >> b);
334 n0 = n0 << bm;
336 udiv_qrnnd (q0, n1, n2, n1, d1);
337 umul_ppmm (m1, m0, q0, d0);
339 if (m1 > n1 || (m1 == n1 && m0 > n0))
341 q0--;
342 sub_ddmmss (m1, m0, m1, m0, d1, d0);
345 q1 = 0;
347 /* Remainder in (n1n0 - m1m0) >> bm. */
348 if (rp != 0)
350 sub_ddmmss (n1, n0, n1, n0, m1, m0);
351 rr.s.low = (n1 << b) | (n0 >> bm);
352 rr.s.high = n1 >> bm;
353 *rp = rr.ll;
359 ww.s.low = q0;
360 ww.s.high = q1;
361 return ww.ll;
364 #define __negdi2(a) (-(a))
366 long long __divdi3(long long u, long long v)
368 int c = 0;
369 DWunion uu, vv;
370 DWtype w;
372 uu.ll = u;
373 vv.ll = v;
375 if (uu.s.high < 0) {
376 c = ~c;
377 uu.ll = __negdi2 (uu.ll);
379 if (vv.s.high < 0) {
380 c = ~c;
381 vv.ll = __negdi2 (vv.ll);
383 w = __udivmoddi4 (uu.ll, vv.ll, (UDWtype *) 0);
384 if (c)
385 w = __negdi2 (w);
386 return w;
389 long long __moddi3(long long u, long long v)
391 int c = 0;
392 DWunion uu, vv;
393 DWtype w;
395 uu.ll = u;
396 vv.ll = v;
398 if (uu.s.high < 0) {
399 c = ~c;
400 uu.ll = __negdi2 (uu.ll);
402 if (vv.s.high < 0)
403 vv.ll = __negdi2 (vv.ll);
405 __udivmoddi4 (uu.ll, vv.ll, (UDWtype *) &w);
406 if (c)
407 w = __negdi2 (w);
408 return w;
411 unsigned long long __udivdi3(unsigned long long u, unsigned long long v)
413 return __udivmoddi4 (u, v, (UDWtype *) 0);
416 unsigned long long __umoddi3(unsigned long long u, unsigned long long v)
418 UDWtype w;
420 __udivmoddi4 (u, v, &w);
421 return w;
424 /* XXX: fix tcc's code generator to do this instead */
425 long long __ashrdi3(long long a, int b)
427 #ifdef __TINYC__
428 DWunion u;
429 u.ll = a;
430 if (b >= 32) {
431 u.s.low = u.s.high >> (b - 32);
432 u.s.high = u.s.high >> 31;
433 } else if (b != 0) {
434 u.s.low = ((unsigned)u.s.low >> b) | (u.s.high << (32 - b));
435 u.s.high = u.s.high >> b;
437 return u.ll;
438 #else
439 return a >> b;
440 #endif
443 /* XXX: fix tcc's code generator to do this instead */
444 unsigned long long __lshrdi3(unsigned long long a, int b)
446 #ifdef __TINYC__
447 DWunion u;
448 u.ll = a;
449 if (b >= 32) {
450 u.s.low = (unsigned)u.s.high >> (b - 32);
451 u.s.high = 0;
452 } else if (b != 0) {
453 u.s.low = ((unsigned)u.s.low >> b) | (u.s.high << (32 - b));
454 u.s.high = (unsigned)u.s.high >> b;
456 return u.ll;
457 #else
458 return a >> b;
459 #endif
462 /* XXX: fix tcc's code generator to do this instead */
463 long long __ashldi3(long long a, int b)
465 #ifdef __TINYC__
466 DWunion u;
467 u.ll = a;
468 if (b >= 32) {
469 u.s.high = (unsigned)u.s.low << (b - 32);
470 u.s.low = 0;
471 } else if (b != 0) {
472 u.s.high = ((unsigned)u.s.high << b) | ((unsigned)u.s.low >> (32 - b));
473 u.s.low = (unsigned)u.s.low << b;
475 return u.ll;
476 #else
477 return a << b;
478 #endif
481 #if defined(__i386__)
482 /* FPU control word for rounding to nearest mode */
483 unsigned short __tcc_fpu_control = 0x137f;
484 /* FPU control word for round to zero mode for int conversion */
485 unsigned short __tcc_int_fpu_control = 0x137f | 0x0c00;
486 #endif
488 #endif /* !__x86_64__ */
490 /* XXX: fix tcc's code generator to do this instead */
491 float __floatundisf(unsigned long long a)
493 DWunion uu;
494 XFtype r;
496 uu.ll = a;
497 if (uu.s.high >= 0) {
498 return (float)uu.ll;
499 } else {
500 r = (XFtype)uu.ll;
501 r += 18446744073709551616.0;
502 return (float)r;
506 double __floatundidf(unsigned long long a)
508 DWunion uu;
509 XFtype r;
511 uu.ll = a;
512 if (uu.s.high >= 0) {
513 return (double)uu.ll;
514 } else {
515 r = (XFtype)uu.ll;
516 r += 18446744073709551616.0;
517 return (double)r;
521 long double __floatundixf(unsigned long long a)
523 DWunion uu;
524 XFtype r;
526 uu.ll = a;
527 if (uu.s.high >= 0) {
528 return (long double)uu.ll;
529 } else {
530 r = (XFtype)uu.ll;
531 r += 18446744073709551616.0;
532 return (long double)r;
536 unsigned long long __fixunssfdi (float a1)
538 register union float_long fl1;
539 register int exp;
540 register unsigned long l;
542 fl1.f = a1;
544 if (fl1.l == 0)
545 return (0);
547 exp = EXP (fl1.l) - EXCESS - 24;
549 l = MANT(fl1.l);
550 if (exp >= 41)
551 return (unsigned long long)-1;
552 else if (exp >= 0)
553 return (unsigned long long)l << exp;
554 else if (exp >= -23)
555 return l >> -exp;
556 else
557 return 0;
560 unsigned long long __fixunsdfdi (double a1)
562 register union double_long dl1;
563 register int exp;
564 register unsigned long long l;
566 dl1.d = a1;
568 if (dl1.ll == 0)
569 return (0);
571 exp = EXPD (dl1) - EXCESSD - 53;
573 l = MANTD_LL(dl1);
575 if (exp >= 12)
576 return (unsigned long long)-1;
577 else if (exp >= 0)
578 return l << exp;
579 else if (exp >= -52)
580 return l >> -exp;
581 else
582 return 0;
585 unsigned long long __fixunsxfdi (long double a1)
587 register union ldouble_long dl1;
588 register int exp;
589 register unsigned long long l;
591 dl1.ld = a1;
593 if (dl1.l.lower == 0 && dl1.l.upper == 0)
594 return (0);
596 exp = EXPLD (dl1) - EXCESSLD - 64;
598 l = dl1.l.lower;
600 if (exp > 0)
601 return (unsigned long long)-1;
602 else if (exp >= -63)
603 return l >> -exp;
604 else
605 return 0;
608 #if defined(__x86_64__) && !defined(_WIN64)
610 #ifndef __TINYC__
611 #include <stdlib.h>
612 #include <stdio.h>
613 #else
614 /* Avoid including stdlib.h because it is not easily available when
615 cross compiling */
616 extern void *malloc(unsigned long long);
617 extern void free(void*);
618 extern void abort(void);
619 #endif
621 enum __va_arg_type {
622 __va_gen_reg, __va_float_reg, __va_stack
625 /* GCC compatible definition of va_list. */
626 struct __va_list_struct {
627 unsigned int gp_offset;
628 unsigned int fp_offset;
629 union {
630 unsigned int overflow_offset;
631 char *overflow_arg_area;
633 char *reg_save_area;
636 void *__va_start(void *fp)
638 struct __va_list_struct *ap =
639 (struct __va_list_struct *)malloc(sizeof(struct __va_list_struct));
640 *ap = *(struct __va_list_struct *)((char *)fp - 16);
641 ap->overflow_arg_area = (char *)fp + ap->overflow_offset;
642 ap->reg_save_area = (char *)fp - 176 - 16;
643 return ap;
646 void *__va_arg(struct __va_list_struct *ap,
647 enum __va_arg_type arg_type,
648 int size)
650 size = (size + 7) & ~7;
651 switch (arg_type) {
652 case __va_gen_reg:
653 if (ap->gp_offset < 48) {
654 ap->gp_offset += 8;
655 return ap->reg_save_area + ap->gp_offset - 8;
657 size = 8;
658 goto use_overflow_area;
660 case __va_float_reg:
661 if (ap->fp_offset < 128 + 48) {
662 ap->fp_offset += 16;
663 return ap->reg_save_area + ap->fp_offset - 16;
665 size = 8;
666 goto use_overflow_area;
668 case __va_stack:
669 use_overflow_area:
670 ap->overflow_arg_area += size;
671 return ap->overflow_arg_area - size;
673 default:
674 #ifndef __TINYC__
675 fprintf(stderr, "unknown ABI type for __va_arg\n");
676 #endif
677 abort();
681 void *__va_copy(struct __va_list_struct *src)
683 struct __va_list_struct *dest =
684 (struct __va_list_struct *)malloc(sizeof(struct __va_list_struct));
685 *dest = *src;
686 return dest;
689 void __va_end(struct __va_list_struct *ap)
691 free(ap);
694 #endif /* __x86_64__ */
696 /* Flushing for tccrun */
697 #if defined(__x86_64__) || defined(__i386__)
699 void __clear_cache(char *beginning, char *end)
703 #else
704 #warning __clear_cache not defined for this architecture, avoid using tcc -run
705 #endif