2 * TCC runtime library for arm64.
4 * Copyright (c) 2015 Edmund Grimley Evans
6 * Copying and distribution of this file, with or without modification,
7 * are permitted in any medium without royalty provided the copyright
8 * notice and this notice are preserved. This file is offered as-is,
9 * without any warranty.
13 typedef signed char int8_t;
14 typedef unsigned char uint8_t;
15 typedef short int16_t;
16 typedef unsigned short uint16_t;
18 typedef unsigned uint32_t;
19 typedef long long int64_t;
20 typedef unsigned long long uint64_t;
21 void *memcpy(void*,void*,__SIZE_TYPE__
);
27 void __clear_cache(void *beg
, void *end
)
29 __arm64_clear_cache(beg
, end
);
36 static long double f3_zero(int sgn
)
39 u128_t x
= { 0, (uint64_t)sgn
<< 63 };
44 static long double f3_infinity(int sgn
)
47 u128_t x
= { 0, (uint64_t)sgn
<< 63 | 0x7fff000000000000 };
52 static long double f3_NaN(void)
56 // ARM's default NaN usually has just the top fraction bit set:
57 u128_t x
= { 0, 0x7fff800000000000 };
59 // GCC's library sets all fraction bits:
60 u128_t x
= { -1, 0x7fffffffffffffff };
66 static int fp3_convert_NaN(long double *f
, int sgn
, u128_t mnt
)
69 mnt
.x1
| 0x7fff800000000000 | (uint64_t)sgn
<< 63 };
74 static int fp3_detect_NaNs(long double *f
,
75 int a_sgn
, int a_exp
, u128_t a
,
76 int b_sgn
, int b_exp
, u128_t b
)
78 // Detect signalling NaNs:
79 if (a_exp
== 32767 && (a
.x0
| a
.x1
<< 16) && !(a
.x1
>> 47 & 1))
80 return fp3_convert_NaN(f
, a_sgn
, a
);
81 if (b_exp
== 32767 && (b
.x0
| b
.x1
<< 16) && !(b
.x1
>> 47 & 1))
82 return fp3_convert_NaN(f
, b_sgn
, b
);
85 if (a_exp
== 32767 && (a
.x0
| a
.x1
<< 16))
86 return fp3_convert_NaN(f
, a_sgn
, a
);
87 if (b_exp
== 32767 && (b
.x0
| b
.x1
<< 16))
88 return fp3_convert_NaN(f
, b_sgn
, b
);
93 static void f3_unpack(int *sgn
, int32_t *exp
, u128_t
*mnt
, long double f
)
98 *exp
= x
.x1
>> 48 & 32767;
99 x
.x1
= x
.x1
<< 16 >> 16;
101 x
.x1
|= (uint64_t)1 << 48;
107 static u128_t
f3_normalise(int32_t *exp
, u128_t mnt
)
110 if (!(mnt
.x0
| mnt
.x1
))
117 for (sh
= 32; sh
; sh
>>= 1) {
118 if (!(mnt
.x1
>> (64 - sh
))) {
119 mnt
.x1
= mnt
.x1
<< sh
| mnt
.x0
>> (64 - sh
);
120 mnt
.x0
= mnt
.x0
<< sh
;
127 static u128_t
f3_sticky_shift(int32_t sh
, u128_t x
)
130 x
.x0
= !!(x
.x0
| x
.x1
);
135 x
.x0
= x
.x1
| !!x
.x0
;
140 x
.x0
= x
.x0
>> sh
| x
.x1
<< (64 - sh
) | !!(x
.x0
<< (64 - sh
));
146 static long double f3_round(int sgn
, int32_t exp
, u128_t x
)
152 x
= f3_sticky_shift(13, x
);
155 x
= f3_sticky_shift(14 - exp
, x
);
160 x
.x0
= x
.x0
>> 2 | x
.x1
<< 62;
163 if (error
== 3 || ((error
== 2) & (x
.x0
& 1))) {
166 if (x
.x1
== (uint64_t)1 << 48)
168 else if (x
.x1
== (uint64_t)1 << 49) {
170 x
.x0
= x
.x0
>> 1 | x
.x1
<< 63;
177 return f3_infinity(sgn
);
179 x
.x1
= x
.x1
<< 16 >> 16 | (uint64_t)exp
<< 48 | (uint64_t)sgn
<< 63;
184 static long double f3_add(long double fa
, long double fb
, int neg
)
187 int32_t a_exp
, b_exp
, x_exp
;
188 int a_sgn
, b_sgn
, x_sgn
;
191 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
192 f3_unpack(&b_sgn
, &b_exp
, &b
, fb
);
194 if (fp3_detect_NaNs(&fx
, a_sgn
, a_exp
, a
, b_sgn
, b_exp
, b
))
199 // Handle infinities and zeroes:
200 if (a_exp
== 32767 && b_exp
== 32767 && a_sgn
!= b_sgn
)
203 return f3_infinity(a_sgn
);
205 return f3_infinity(b_sgn
);
206 if (!(a
.x0
| a
.x1
| b
.x0
| b
.x1
))
207 return f3_zero(a_sgn
& b_sgn
);
209 a
.x1
= a
.x1
<< 3 | a
.x0
>> 61;
211 b
.x1
= b
.x1
<< 3 | b
.x0
>> 61;
214 if (a_exp
<= b_exp
) {
215 a
= f3_sticky_shift(b_exp
- a_exp
, a
);
219 b
= f3_sticky_shift(a_exp
- b_exp
, b
);
225 if (a_sgn
== b_sgn
) {
227 x
.x1
= a
.x1
+ b
.x1
+ (x
.x0
< a
.x0
);
231 x
.x1
= a
.x1
- b
.x1
- (x
.x0
> a
.x0
);
235 x
.x1
= -x
.x1
- !!x
.x0
;
242 x
= f3_normalise(&x_exp
, x
);
244 return f3_round(x_sgn
, x_exp
+ 12, x
);
247 long double __addtf3(long double a
, long double b
)
249 return f3_add(a
, b
, 0);
252 long double __subtf3(long double a
, long double b
)
254 return f3_add(a
, b
, 1);
257 long double __multf3(long double fa
, long double fb
)
260 int32_t a_exp
, b_exp
, x_exp
;
261 int a_sgn
, b_sgn
, x_sgn
;
264 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
265 f3_unpack(&b_sgn
, &b_exp
, &b
, fb
);
267 if (fp3_detect_NaNs(&fx
, a_sgn
, a_exp
, a
, b_sgn
, b_exp
, b
))
270 // Handle infinities and zeroes:
271 if ((a_exp
== 32767 && !(b
.x0
| b
.x1
)) ||
272 (b_exp
== 32767 && !(a
.x0
| a
.x1
)))
274 if (a_exp
== 32767 || b_exp
== 32767)
275 return f3_infinity(a_sgn
^ b_sgn
);
276 if (!(a
.x0
| a
.x1
) || !(b
.x0
| b
.x1
))
277 return f3_zero(a_sgn
^ b_sgn
);
279 a
= f3_normalise(&a_exp
, a
);
280 b
= f3_normalise(&b_exp
, b
);
282 x_sgn
= a_sgn
^ b_sgn
;
283 x_exp
= a_exp
+ b_exp
- 16352;
286 // Convert to base (1 << 30), discarding bottom 6 bits, which are zero,
287 // so there are (32, 30, 30, 30) bits in (a3, a2, a1, a0):
288 uint64_t a0
= a
.x0
<< 28 >> 34;
289 uint64_t b0
= b
.x0
<< 28 >> 34;
290 uint64_t a1
= a
.x0
>> 36 | a
.x1
<< 62 >> 34;
291 uint64_t b1
= b
.x0
>> 36 | b
.x1
<< 62 >> 34;
292 uint64_t a2
= a
.x1
<< 32 >> 34;
293 uint64_t b2
= b
.x1
<< 32 >> 34;
294 uint64_t a3
= a
.x1
>> 32;
295 uint64_t b3
= b
.x1
>> 32;
296 // Use 16 small multiplications and additions that do not overflow:
297 uint64_t x0
= a0
* b0
;
298 uint64_t x1
= (x0
>> 30) + a0
* b1
+ a1
* b0
;
299 uint64_t x2
= (x1
>> 30) + a0
* b2
+ a1
* b1
+ a2
* b0
;
300 uint64_t x3
= (x2
>> 30) + a0
* b3
+ a1
* b2
+ a2
* b1
+ a3
* b0
;
301 uint64_t x4
= (x3
>> 30) + a1
* b3
+ a2
* b2
+ a3
* b1
;
302 uint64_t x5
= (x4
>> 30) + a2
* b3
+ a3
* b2
;
303 uint64_t x6
= (x5
>> 30) + a3
* b3
;
304 // We now have (64, 30, 30, ...) bits in (x6, x5, x4, ...).
305 // Take the top 128 bits, setting bottom bit if any lower bits were set:
306 uint64_t y0
= (x5
<< 34 | x4
<< 34 >> 30 | x3
<< 34 >> 60 |
307 !!(x3
<< 38 | (x2
| x1
| x0
) << 34));
309 // Top bit may be zero. Renormalise:
311 y1
= y1
<< 1 | y0
>> 63;
319 return f3_round(x_sgn
, x_exp
, x
);
322 long double __divtf3(long double fa
, long double fb
)
325 int32_t a_exp
, b_exp
, x_exp
;
326 int a_sgn
, b_sgn
, x_sgn
, i
;
329 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
330 f3_unpack(&b_sgn
, &b_exp
, &b
, fb
);
332 if (fp3_detect_NaNs(&fx
, a_sgn
, a_exp
, a
, b_sgn
, b_exp
, b
))
335 // Handle infinities and zeroes:
336 if ((a_exp
== 32767 && b_exp
== 32767) ||
337 (!(a
.x0
| a
.x1
) && !(b
.x0
| b
.x1
)))
339 if (a_exp
== 32767 || !(b
.x0
| b
.x1
))
340 return f3_infinity(a_sgn
^ b_sgn
);
341 if (!(a
.x0
| a
.x1
) || b_exp
== 32767)
342 return f3_zero(a_sgn
^ b_sgn
);
344 a
= f3_normalise(&a_exp
, a
);
345 b
= f3_normalise(&b_exp
, b
);
347 x_sgn
= a_sgn
^ b_sgn
;
348 x_exp
= a_exp
- b_exp
+ 16395;
350 a
.x0
= a
.x0
>> 1 | a
.x1
<< 63;
352 b
.x0
= b
.x0
>> 1 | b
.x1
<< 63;
356 for (i
= 0; i
< 116; i
++) {
357 x
.x1
= x
.x1
<< 1 | x
.x0
>> 63;
359 if (a
.x1
> b
.x1
|| (a
.x1
== b
.x1
&& a
.x0
>= b
.x0
)) {
360 a
.x1
= a
.x1
- b
.x1
- (a
.x0
< b
.x0
);
364 a
.x1
= a
.x1
<< 1 | a
.x0
>> 63;
367 x
.x0
|= !!(a
.x0
| a
.x1
);
369 x
= f3_normalise(&x_exp
, x
);
371 return f3_round(x_sgn
, x_exp
, x
);
374 long double __extendsftf2(float f
)
385 else if (a
<< 1 >> 24 == 255)
386 x
.x1
= (0x7fff000000000000 | aa
>> 31 << 63 | aa
<< 41 >> 16 |
387 (uint64_t)!!(a
<< 9) << 47);
389 x
.x1
= (aa
>> 31 << 63 | ((aa
>> 23 & 255) + 16256) << 48 |
395 long double __extenddftf2(double f
)
404 else if (a
<< 1 >> 53 == 2047)
405 x
.x1
= (0x7fff000000000000 | a
>> 63 << 63 | a
<< 12 >> 16 |
406 (uint64_t)!!(a
<< 12) << 47);
408 x
.x1
= a
>> 63 << 63 | ((a
>> 52 & 2047) + 15360) << 48 | a
<< 12 >> 16;
413 float __trunctfsf2(long double f
)
421 f3_unpack(&sgn
, &exp
, &mnt
, f
);
423 if (exp
== 32767 && (mnt
.x0
| mnt
.x1
<< 16))
424 x
= 0x7fc00000 | (uint32_t)sgn
<< 31 | (mnt
.x1
>> 25 & 0x007fffff);
425 else if (exp
> 16510)
426 x
= 0x7f800000 | (uint32_t)sgn
<< 31;
427 else if (exp
< 16233)
428 x
= (uint32_t)sgn
<< 31;
431 x
= mnt
.x1
>> 23 | !!(mnt
.x0
| mnt
.x1
<< 41);
433 x
= x
>> -exp
| !!(x
<< (32 + exp
));
436 if ((x
& 3) == 3 || (x
& 7) == 6)
438 x
= ((x
>> 2) + (exp
<< 23)) | (uint32_t)sgn
<< 31;
444 double __trunctfdf2(long double f
)
452 f3_unpack(&sgn
, &exp
, &mnt
, f
);
454 if (exp
== 32767 && (mnt
.x0
| mnt
.x1
<< 16))
455 x
= (0x7ff8000000000000 | (uint64_t)sgn
<< 63 |
456 mnt
.x1
<< 16 >> 12 | mnt
.x0
>> 60);
457 else if (exp
> 17406)
458 x
= 0x7ff0000000000000 | (uint64_t)sgn
<< 63;
459 else if (exp
< 15308)
460 x
= (uint64_t)sgn
<< 63;
463 x
= mnt
.x1
<< 6 | mnt
.x0
>> 58 | !!(mnt
.x0
<< 6);
465 x
= x
>> -exp
| !!(x
<< (64 + exp
));
468 if ((x
& 3) == 3 || (x
& 7) == 6)
470 x
= ((x
>> 2) + ((uint64_t)exp
<< 52)) | (uint64_t)sgn
<< 63;
476 int32_t __fixtfsi(long double fa
)
482 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
486 return a_sgn
? -0x80000000 : 0x7fffffff;
487 x
= a
.x1
>> (16431 - a_exp
);
488 return a_sgn
? -x
: x
;
491 int64_t __fixtfdi(long double fa
)
497 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
501 return a_sgn
? -0x8000000000000000 : 0x7fffffffffffffff;
502 x
= (a
.x1
<< 15 | a
.x0
>> 49) >> (16446 - a_exp
);
503 return a_sgn
? -x
: x
;
506 uint32_t __fixunstfsi(long double fa
)
511 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
512 if (a_sgn
|| a_exp
< 16369)
516 return a
.x1
>> (16431 - a_exp
);
519 uint64_t __fixunstfdi(long double fa
)
524 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
525 if (a_sgn
|| a_exp
< 16383)
529 return (a
.x1
<< 15 | a
.x0
>> 49) >> (16446 - a_exp
);
532 long double __floatsitf(int32_t a
)
545 for (i
= 16; i
; i
>>= 1)
546 if (!(mnt
>> (32 - i
))) {
550 x
.x1
= ((uint64_t)sgn
<< 63 | (uint64_t)exp
<< 48 |
551 (uint64_t)(mnt
<< 1) << 16);
557 long double __floatditf(int64_t a
)
570 for (i
= 32; i
; i
>>= 1)
571 if (!(mnt
>> (64 - i
))) {
576 x
.x1
= (uint64_t)sgn
<< 63 | (uint64_t)exp
<< 48 | mnt
<< 1 >> 16;
582 long double __floatunsitf(uint32_t a
)
590 for (i
= 16; i
; i
>>= 1)
591 if (!(mnt
>> (32 - i
))) {
595 x
.x1
= (uint64_t)exp
<< 48 | (uint64_t)(mnt
<< 1) << 16;
601 long double __floatunditf(uint64_t a
)
609 for (i
= 32; i
; i
>>= 1)
610 if (!(mnt
>> (64 - i
))) {
615 x
.x1
= (uint64_t)exp
<< 48 | mnt
<< 1 >> 16;
621 static int f3_cmp(long double fa
, long double fb
)
626 return (!(a
.x0
| a
.x1
<< 1 | b
.x0
| b
.x1
<< 1) ? 0 :
627 ((a
.x1
<< 1 >> 49 == 0x7fff && (a
.x0
| a
.x1
<< 16)) ||
628 (b
.x1
<< 1 >> 49 == 0x7fff && (b
.x0
| b
.x1
<< 16))) ? 2 :
629 a
.x1
>> 63 != b
.x1
>> 63 ? (int)(b
.x1
>> 63) - (int)(a
.x1
>> 63) :
630 a
.x1
< b
.x1
? (int)(a
.x1
>> 63 << 1) - 1 :
631 a
.x1
> b
.x1
? 1 - (int)(a
.x1
>> 63 << 1) :
632 a
.x0
< b
.x0
? (int)(a
.x1
>> 63 << 1) - 1 :
633 b
.x0
< a
.x0
? 1 - (int)(a
.x1
>> 63 << 1) : 0);
636 int __eqtf2(long double a
, long double b
)
638 return !!f3_cmp(a
, b
);
641 int __netf2(long double a
, long double b
)
643 return !!f3_cmp(a
, b
);
646 int __lttf2(long double a
, long double b
)
651 int __letf2(long double a
, long double b
)
656 int __gttf2(long double a
, long double b
)
658 return -f3_cmp(b
, a
);
661 int __getf2(long double a
, long double b
)
663 return -f3_cmp(b
, a
);