2 * TCC runtime library for arm64.
4 * Copyright (c) 2015 Edmund Grimley Evans
6 * Copying and distribution of this file, with or without modification,
7 * are permitted in any medium without royalty provided the copyright
8 * notice and this notice are preserved. This file is offered as-is,
9 * without any warranty.
13 typedef signed char int8_t;
14 typedef unsigned char uint8_t;
15 typedef short int16_t;
16 typedef unsigned short uint16_t;
18 typedef unsigned uint32_t;
19 typedef long long int64_t;
20 typedef unsigned long long uint64_t;
21 void *memcpy(void*,void*,__SIZE_TYPE__
);
27 #if !defined __riscv && !defined __APPLE__
28 void __clear_cache(void *beg
, void *end
)
30 __arm64_clear_cache(beg
, end
);
38 static long double f3_zero(int sgn
)
41 u128_t x
= { 0, (uint64_t)sgn
<< 63 };
46 static long double f3_infinity(int sgn
)
49 u128_t x
= { 0, (uint64_t)sgn
<< 63 | 0x7fff000000000000 };
54 static long double f3_NaN(void)
58 // ARM's default NaN usually has just the top fraction bit set:
59 u128_t x
= { 0, 0x7fff800000000000 };
61 // GCC's library sets all fraction bits:
62 u128_t x
= { -1, 0x7fffffffffffffff };
68 static int fp3_convert_NaN(long double *f
, int sgn
, u128_t mnt
)
71 mnt
.x1
| 0x7fff800000000000 | (uint64_t)sgn
<< 63 };
76 static int fp3_detect_NaNs(long double *f
,
77 int a_sgn
, int a_exp
, u128_t a
,
78 int b_sgn
, int b_exp
, u128_t b
)
80 // Detect signalling NaNs:
81 if (a_exp
== 32767 && (a
.x0
| a
.x1
<< 16) && !(a
.x1
>> 47 & 1))
82 return fp3_convert_NaN(f
, a_sgn
, a
);
83 if (b_exp
== 32767 && (b
.x0
| b
.x1
<< 16) && !(b
.x1
>> 47 & 1))
84 return fp3_convert_NaN(f
, b_sgn
, b
);
87 if (a_exp
== 32767 && (a
.x0
| a
.x1
<< 16))
88 return fp3_convert_NaN(f
, a_sgn
, a
);
89 if (b_exp
== 32767 && (b
.x0
| b
.x1
<< 16))
90 return fp3_convert_NaN(f
, b_sgn
, b
);
95 static void f3_unpack(int *sgn
, int32_t *exp
, u128_t
*mnt
, long double f
)
100 *exp
= x
.x1
>> 48 & 32767;
101 x
.x1
= x
.x1
<< 16 >> 16;
103 x
.x1
|= (uint64_t)1 << 48;
109 static u128_t
f3_normalise(int32_t *exp
, u128_t mnt
)
112 if (!(mnt
.x0
| mnt
.x1
))
119 for (sh
= 32; sh
; sh
>>= 1) {
120 if (!(mnt
.x1
>> (64 - sh
))) {
121 mnt
.x1
= mnt
.x1
<< sh
| mnt
.x0
>> (64 - sh
);
122 mnt
.x0
= mnt
.x0
<< sh
;
129 static u128_t
f3_sticky_shift(int32_t sh
, u128_t x
)
132 x
.x0
= !!(x
.x0
| x
.x1
);
137 x
.x0
= x
.x1
| !!x
.x0
;
142 x
.x0
= x
.x0
>> sh
| x
.x1
<< (64 - sh
) | !!(x
.x0
<< (64 - sh
));
148 static long double f3_round(int sgn
, int32_t exp
, u128_t x
)
154 x
= f3_sticky_shift(13, x
);
157 x
= f3_sticky_shift(14 - exp
, x
);
162 x
.x0
= x
.x0
>> 2 | x
.x1
<< 62;
165 if (error
== 3 || ((error
== 2) & (x
.x0
& 1))) {
168 if (x
.x1
== (uint64_t)1 << 48)
170 else if (x
.x1
== (uint64_t)1 << 49) {
172 x
.x0
= x
.x0
>> 1 | x
.x1
<< 63;
179 return f3_infinity(sgn
);
181 x
.x1
= x
.x1
<< 16 >> 16 | (uint64_t)exp
<< 48 | (uint64_t)sgn
<< 63;
186 static long double f3_add(long double fa
, long double fb
, int neg
)
189 int32_t a_exp
, b_exp
, x_exp
;
190 int a_sgn
, b_sgn
, x_sgn
;
193 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
194 f3_unpack(&b_sgn
, &b_exp
, &b
, fb
);
196 if (fp3_detect_NaNs(&fx
, a_sgn
, a_exp
, a
, b_sgn
, b_exp
, b
))
201 // Handle infinities and zeroes:
202 if (a_exp
== 32767 && b_exp
== 32767 && a_sgn
!= b_sgn
)
205 return f3_infinity(a_sgn
);
207 return f3_infinity(b_sgn
);
208 if (!(a
.x0
| a
.x1
| b
.x0
| b
.x1
))
209 return f3_zero(a_sgn
& b_sgn
);
211 a
.x1
= a
.x1
<< 3 | a
.x0
>> 61;
213 b
.x1
= b
.x1
<< 3 | b
.x0
>> 61;
216 if (a_exp
<= b_exp
) {
217 a
= f3_sticky_shift(b_exp
- a_exp
, a
);
221 b
= f3_sticky_shift(a_exp
- b_exp
, b
);
227 if (a_sgn
== b_sgn
) {
229 x
.x1
= a
.x1
+ b
.x1
+ (x
.x0
< a
.x0
);
233 x
.x1
= a
.x1
- b
.x1
- (x
.x0
> a
.x0
);
237 x
.x1
= -x
.x1
- !!x
.x0
;
244 x
= f3_normalise(&x_exp
, x
);
246 return f3_round(x_sgn
, x_exp
+ 12, x
);
249 long double __addtf3(long double a
, long double b
)
251 return f3_add(a
, b
, 0);
254 long double __subtf3(long double a
, long double b
)
256 return f3_add(a
, b
, 1);
259 long double __multf3(long double fa
, long double fb
)
262 int32_t a_exp
, b_exp
, x_exp
;
263 int a_sgn
, b_sgn
, x_sgn
;
266 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
267 f3_unpack(&b_sgn
, &b_exp
, &b
, fb
);
269 if (fp3_detect_NaNs(&fx
, a_sgn
, a_exp
, a
, b_sgn
, b_exp
, b
))
272 // Handle infinities and zeroes:
273 if ((a_exp
== 32767 && !(b
.x0
| b
.x1
)) ||
274 (b_exp
== 32767 && !(a
.x0
| a
.x1
)))
276 if (a_exp
== 32767 || b_exp
== 32767)
277 return f3_infinity(a_sgn
^ b_sgn
);
278 if (!(a
.x0
| a
.x1
) || !(b
.x0
| b
.x1
))
279 return f3_zero(a_sgn
^ b_sgn
);
281 a
= f3_normalise(&a_exp
, a
);
282 b
= f3_normalise(&b_exp
, b
);
284 x_sgn
= a_sgn
^ b_sgn
;
285 x_exp
= a_exp
+ b_exp
- 16352;
288 // Convert to base (1 << 30), discarding bottom 6 bits, which are zero,
289 // so there are (32, 30, 30, 30) bits in (a3, a2, a1, a0):
290 uint64_t a0
= a
.x0
<< 28 >> 34;
291 uint64_t b0
= b
.x0
<< 28 >> 34;
292 uint64_t a1
= a
.x0
>> 36 | a
.x1
<< 62 >> 34;
293 uint64_t b1
= b
.x0
>> 36 | b
.x1
<< 62 >> 34;
294 uint64_t a2
= a
.x1
<< 32 >> 34;
295 uint64_t b2
= b
.x1
<< 32 >> 34;
296 uint64_t a3
= a
.x1
>> 32;
297 uint64_t b3
= b
.x1
>> 32;
298 // Use 16 small multiplications and additions that do not overflow:
299 uint64_t x0
= a0
* b0
;
300 uint64_t x1
= (x0
>> 30) + a0
* b1
+ a1
* b0
;
301 uint64_t x2
= (x1
>> 30) + a0
* b2
+ a1
* b1
+ a2
* b0
;
302 uint64_t x3
= (x2
>> 30) + a0
* b3
+ a1
* b2
+ a2
* b1
+ a3
* b0
;
303 uint64_t x4
= (x3
>> 30) + a1
* b3
+ a2
* b2
+ a3
* b1
;
304 uint64_t x5
= (x4
>> 30) + a2
* b3
+ a3
* b2
;
305 uint64_t x6
= (x5
>> 30) + a3
* b3
;
306 // We now have (64, 30, 30, ...) bits in (x6, x5, x4, ...).
307 // Take the top 128 bits, setting bottom bit if any lower bits were set:
308 uint64_t y0
= (x5
<< 34 | x4
<< 34 >> 30 | x3
<< 34 >> 60 |
309 !!(x3
<< 38 | (x2
| x1
| x0
) << 34));
311 // Top bit may be zero. Renormalise:
313 y1
= y1
<< 1 | y0
>> 63;
321 return f3_round(x_sgn
, x_exp
, x
);
324 long double __divtf3(long double fa
, long double fb
)
327 int32_t a_exp
, b_exp
, x_exp
;
328 int a_sgn
, b_sgn
, x_sgn
, i
;
331 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
332 f3_unpack(&b_sgn
, &b_exp
, &b
, fb
);
334 if (fp3_detect_NaNs(&fx
, a_sgn
, a_exp
, a
, b_sgn
, b_exp
, b
))
337 // Handle infinities and zeroes:
338 if ((a_exp
== 32767 && b_exp
== 32767) ||
339 (!(a
.x0
| a
.x1
) && !(b
.x0
| b
.x1
)))
341 if (a_exp
== 32767 || !(b
.x0
| b
.x1
))
342 return f3_infinity(a_sgn
^ b_sgn
);
343 if (!(a
.x0
| a
.x1
) || b_exp
== 32767)
344 return f3_zero(a_sgn
^ b_sgn
);
346 a
= f3_normalise(&a_exp
, a
);
347 b
= f3_normalise(&b_exp
, b
);
349 x_sgn
= a_sgn
^ b_sgn
;
350 x_exp
= a_exp
- b_exp
+ 16395;
352 a
.x0
= a
.x0
>> 1 | a
.x1
<< 63;
354 b
.x0
= b
.x0
>> 1 | b
.x1
<< 63;
358 for (i
= 0; i
< 116; i
++) {
359 x
.x1
= x
.x1
<< 1 | x
.x0
>> 63;
361 if (a
.x1
> b
.x1
|| (a
.x1
== b
.x1
&& a
.x0
>= b
.x0
)) {
362 a
.x1
= a
.x1
- b
.x1
- (a
.x0
< b
.x0
);
366 a
.x1
= a
.x1
<< 1 | a
.x0
>> 63;
369 x
.x0
|= !!(a
.x0
| a
.x1
);
371 x
= f3_normalise(&x_exp
, x
);
373 return f3_round(x_sgn
, x_exp
, x
);
376 long double __extendsftf2(float f
)
387 else if (a
<< 1 >> 24 == 255)
388 x
.x1
= (0x7fff000000000000 | aa
>> 31 << 63 | aa
<< 41 >> 16 |
389 (uint64_t)!!(a
<< 9) << 47);
390 else if (a
<< 1 >> 24 == 0) {
392 while (!(a
<< 1 >> 1 >> (23 - adj
)))
394 x
.x1
= aa
>> 31 << 63 | (16256 - adj
+ 1) << 48 | aa
<< adj
<< 41 >> 16;
396 x
.x1
= (aa
>> 31 << 63 | ((aa
>> 23 & 255) + 16256) << 48 |
402 long double __extenddftf2(double f
)
411 else if (a
<< 1 >> 53 == 2047)
412 x
.x1
= (0x7fff000000000000 | a
>> 63 << 63 | a
<< 12 >> 16 |
413 (uint64_t)!!(a
<< 12) << 47);
414 else if (a
<< 1 >> 53 == 0) {
416 while (!(a
<< 1 >> 1 >> (52 - adj
)))
419 x
.x1
= a
>> 63 << 63 | (15360 - adj
+ 1) << 48 | a
<< adj
<< 12 >> 16;
421 x
.x1
= a
>> 63 << 63 | ((a
>> 52 & 2047) + 15360) << 48 | a
<< 12 >> 16;
426 float __trunctfsf2(long double f
)
434 f3_unpack(&sgn
, &exp
, &mnt
, f
);
436 if (exp
== 32767 && (mnt
.x0
| mnt
.x1
<< 16))
437 x
= 0x7fc00000 | (uint32_t)sgn
<< 31 | (mnt
.x1
>> 25 & 0x007fffff);
438 else if (exp
> 16510)
439 x
= 0x7f800000 | (uint32_t)sgn
<< 31;
440 else if (exp
< 16233)
441 x
= (uint32_t)sgn
<< 31;
444 x
= mnt
.x1
>> 23 | !!(mnt
.x0
| mnt
.x1
<< 41);
446 x
= x
>> -exp
| !!(x
<< (32 + exp
));
449 if ((x
& 3) == 3 || (x
& 7) == 6)
451 x
= ((x
>> 2) + (exp
<< 23)) | (uint32_t)sgn
<< 31;
457 double __trunctfdf2(long double f
)
465 f3_unpack(&sgn
, &exp
, &mnt
, f
);
467 if (exp
== 32767 && (mnt
.x0
| mnt
.x1
<< 16))
468 x
= (0x7ff8000000000000 | (uint64_t)sgn
<< 63 |
469 mnt
.x1
<< 16 >> 12 | mnt
.x0
>> 60);
470 else if (exp
> 17406)
471 x
= 0x7ff0000000000000 | (uint64_t)sgn
<< 63;
472 else if (exp
< 15308)
473 x
= (uint64_t)sgn
<< 63;
476 x
= mnt
.x1
<< 6 | mnt
.x0
>> 58 | !!(mnt
.x0
<< 6);
478 x
= x
>> -exp
| !!(x
<< (64 + exp
));
481 if ((x
& 3) == 3 || (x
& 7) == 6)
483 x
= ((x
>> 2) + ((uint64_t)exp
<< 52)) | (uint64_t)sgn
<< 63;
489 int32_t __fixtfsi(long double fa
)
495 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
499 return a_sgn
? -0x80000000 : 0x7fffffff;
500 x
= a
.x1
>> (16431 - a_exp
);
501 return a_sgn
? -x
: x
;
504 int64_t __fixtfdi(long double fa
)
510 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
514 return a_sgn
? -0x8000000000000000 : 0x7fffffffffffffff;
515 x
= (a
.x1
<< 15 | a
.x0
>> 49) >> (16446 - a_exp
);
516 return a_sgn
? -x
: x
;
519 uint32_t __fixunstfsi(long double fa
)
524 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
525 if (a_sgn
|| a_exp
< 16369)
529 return a
.x1
>> (16431 - a_exp
);
532 uint64_t __fixunstfdi(long double fa
)
537 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
538 if (a_sgn
|| a_exp
< 16383)
542 return (a
.x1
<< 15 | a
.x0
>> 49) >> (16446 - a_exp
);
545 long double __floatsitf(int32_t a
)
558 for (i
= 16; i
; i
>>= 1)
559 if (!(mnt
>> (32 - i
))) {
563 x
.x1
= ((uint64_t)sgn
<< 63 | (uint64_t)exp
<< 48 |
564 (uint64_t)(mnt
<< 1) << 16);
570 long double __floatditf(int64_t a
)
583 for (i
= 32; i
; i
>>= 1)
584 if (!(mnt
>> (64 - i
))) {
589 x
.x1
= (uint64_t)sgn
<< 63 | (uint64_t)exp
<< 48 | mnt
<< 1 >> 16;
595 long double __floatunsitf(uint32_t a
)
603 for (i
= 16; i
; i
>>= 1)
604 if (!(mnt
>> (32 - i
))) {
608 x
.x1
= (uint64_t)exp
<< 48 | (uint64_t)(mnt
<< 1) << 16;
614 long double __floatunditf(uint64_t a
)
622 for (i
= 32; i
; i
>>= 1)
623 if (!(mnt
>> (64 - i
))) {
628 x
.x1
= (uint64_t)exp
<< 48 | mnt
<< 1 >> 16;
634 static int f3_cmp(long double fa
, long double fb
)
639 return (!(a
.x0
| a
.x1
<< 1 | b
.x0
| b
.x1
<< 1) ? 0 :
640 ((a
.x1
<< 1 >> 49 == 0x7fff && (a
.x0
| a
.x1
<< 16)) ||
641 (b
.x1
<< 1 >> 49 == 0x7fff && (b
.x0
| b
.x1
<< 16))) ? 2 :
642 a
.x1
>> 63 != b
.x1
>> 63 ? (int)(b
.x1
>> 63) - (int)(a
.x1
>> 63) :
643 a
.x1
< b
.x1
? (int)(a
.x1
>> 63 << 1) - 1 :
644 a
.x1
> b
.x1
? 1 - (int)(a
.x1
>> 63 << 1) :
645 a
.x0
< b
.x0
? (int)(a
.x1
>> 63 << 1) - 1 :
646 b
.x0
< a
.x0
? 1 - (int)(a
.x1
>> 63 << 1) : 0);
649 int __eqtf2(long double a
, long double b
)
651 return !!f3_cmp(a
, b
);
654 int __netf2(long double a
, long double b
)
656 return !!f3_cmp(a
, b
);
659 int __lttf2(long double a
, long double b
)
664 int __letf2(long double a
, long double b
)
669 int __gttf2(long double a
, long double b
)
671 return -f3_cmp(b
, a
);
674 int __getf2(long double a
, long double b
)
676 return -f3_cmp(b
, a
);