2 * TCC runtime library for arm64.
4 * Copyright (c) 2015 Edmund Grimley Evans
6 * Copying and distribution of this file, with or without modification,
7 * are permitted in any medium without royalty provided the copyright
8 * notice and this notice are preserved. This file is offered as-is,
9 * without any warranty.
15 void __clear_cache(char *beg
, char *end
)
17 __arm64_clear_cache(beg
, end
);
24 static long double f3_zero(int sgn
)
27 u128_t x
= { 0, (uint64_t)sgn
<< 63 };
32 static long double f3_infinity(int sgn
)
35 u128_t x
= { 0, (uint64_t)sgn
<< 63 | 0x7fff000000000000 };
40 static long double f3_NaN(void)
44 // ARM's default NaN usually has just the top fraction bit set:
45 u128_t x
= { 0, 0x7fff800000000000 };
47 // GCC's library sets all fraction bits:
48 u128_t x
= { -1, 0x7fffffffffffffff };
54 static int fp3_convert_NaN(long double *f
, int sgn
, u128_t mnt
)
57 mnt
.x1
| 0x7fff800000000000 | (uint64_t)sgn
<< 63 };
62 static int fp3_detect_NaNs(long double *f
,
63 int a_sgn
, int a_exp
, u128_t a
,
64 int b_sgn
, int b_exp
, u128_t b
)
66 // Detect signalling NaNs:
67 if (a_exp
== 32767 && (a
.x0
| a
.x1
<< 16) && !(a
.x1
>> 47 & 1))
68 return fp3_convert_NaN(f
, a_sgn
, a
);
69 if (b_exp
== 32767 && (b
.x0
| b
.x1
<< 16) && !(b
.x1
>> 47 & 1))
70 return fp3_convert_NaN(f
, b_sgn
, b
);
73 if (a_exp
== 32767 && (a
.x0
| a
.x1
<< 16))
74 return fp3_convert_NaN(f
, a_sgn
, a
);
75 if (b_exp
== 32767 && (b
.x0
| b
.x1
<< 16))
76 return fp3_convert_NaN(f
, b_sgn
, b
);
81 static void f3_unpack(int *sgn
, int32_t *exp
, u128_t
*mnt
, long double f
)
86 *exp
= x
.x1
>> 48 & 32767;
87 x
.x1
= x
.x1
<< 16 >> 16;
89 x
.x1
|= (uint64_t)1 << 48;
95 static u128_t
f3_normalise(int32_t *exp
, u128_t mnt
)
98 if (!(mnt
.x0
| mnt
.x1
))
105 for (sh
= 32; sh
; sh
>>= 1) {
106 if (!(mnt
.x1
>> (64 - sh
))) {
107 mnt
.x1
= mnt
.x1
<< sh
| mnt
.x0
>> (64 - sh
);
108 mnt
.x0
= mnt
.x0
<< sh
;
115 static u128_t
f3_sticky_shift(int32_t sh
, u128_t x
)
118 x
.x0
= !!(x
.x0
| x
.x1
);
123 x
.x0
= x
.x1
| !!x
.x0
;
128 x
.x0
= x
.x0
>> sh
| x
.x1
<< (64 - sh
) | !!(x
.x0
<< (64 - sh
));
134 static long double f3_round(int sgn
, int32_t exp
, u128_t x
)
140 x
= f3_sticky_shift(13, x
);
143 x
= f3_sticky_shift(14 - exp
, x
);
148 x
.x0
= x
.x0
>> 2 | x
.x1
<< 62;
151 if (error
== 3 || ((error
== 2) & (x
.x0
& 1))) {
154 if (x
.x1
== (uint64_t)1 << 48)
156 else if (x
.x1
== (uint64_t)1 << 49) {
158 x
.x0
= x
.x0
>> 1 | x
.x1
<< 63;
165 return f3_infinity(sgn
);
167 x
.x1
= x
.x1
<< 16 >> 16 | (uint64_t)exp
<< 48 | (uint64_t)sgn
<< 63;
172 static long double f3_add(long double fa
, long double fb
, int neg
)
175 int32_t a_exp
, b_exp
, x_exp
;
176 int a_sgn
, b_sgn
, x_sgn
;
179 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
180 f3_unpack(&b_sgn
, &b_exp
, &b
, fb
);
182 if (fp3_detect_NaNs(&fx
, a_sgn
, a_exp
, a
, b_sgn
, b_exp
, b
))
187 // Handle infinities and zeroes:
188 if (a_exp
== 32767 && b_exp
== 32767 && a_sgn
!= b_sgn
)
191 return f3_infinity(a_sgn
);
193 return f3_infinity(b_sgn
);
194 if (!(a
.x0
| a
.x1
| b
.x0
| b
.x1
))
195 return f3_zero(a_sgn
& b_sgn
);
197 a
.x1
= a
.x1
<< 3 | a
.x0
>> 61;
199 b
.x1
= b
.x1
<< 3 | b
.x0
>> 61;
202 if (a_exp
<= b_exp
) {
203 a
= f3_sticky_shift(b_exp
- a_exp
, a
);
207 b
= f3_sticky_shift(a_exp
- b_exp
, b
);
213 if (a_sgn
== b_sgn
) {
215 x
.x1
= a
.x1
+ b
.x1
+ (x
.x0
< a
.x0
);
219 x
.x1
= a
.x1
- b
.x1
- (x
.x0
> a
.x0
);
223 x
.x1
= -x
.x1
- !!x
.x0
;
230 x
= f3_normalise(&x_exp
, x
);
232 return f3_round(x_sgn
, x_exp
+ 12, x
);
235 long double __addtf3(long double a
, long double b
)
237 return f3_add(a
, b
, 0);
240 long double __subtf3(long double a
, long double b
)
242 return f3_add(a
, b
, 1);
245 long double __multf3(long double fa
, long double fb
)
248 int32_t a_exp
, b_exp
, x_exp
;
249 int a_sgn
, b_sgn
, x_sgn
;
252 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
253 f3_unpack(&b_sgn
, &b_exp
, &b
, fb
);
255 if (fp3_detect_NaNs(&fx
, a_sgn
, a_exp
, a
, b_sgn
, b_exp
, b
))
258 // Handle infinities and zeroes:
259 if ((a_exp
== 32767 && !(b
.x0
| b
.x1
)) ||
260 (b_exp
== 32767 && !(a
.x0
| a
.x1
)))
262 if (a_exp
== 32767 || b_exp
== 32767)
263 return f3_infinity(a_sgn
^ b_sgn
);
264 if (!(a
.x0
| a
.x1
) || !(b
.x0
| b
.x1
))
265 return f3_zero(a_sgn
^ b_sgn
);
267 a
= f3_normalise(&a_exp
, a
);
268 b
= f3_normalise(&b_exp
, b
);
270 x_sgn
= a_sgn
^ b_sgn
;
271 x_exp
= a_exp
+ b_exp
- 16352;
274 // Convert to base (1 << 30), discarding bottom 6 bits, which are zero,
275 // so there are (32, 30, 30, 30) bits in (a3, a2, a1, a0):
276 uint64_t a0
= a
.x0
<< 28 >> 34;
277 uint64_t b0
= b
.x0
<< 28 >> 34;
278 uint64_t a1
= a
.x0
>> 36 | a
.x1
<< 62 >> 34;
279 uint64_t b1
= b
.x0
>> 36 | b
.x1
<< 62 >> 34;
280 uint64_t a2
= a
.x1
<< 32 >> 34;
281 uint64_t b2
= b
.x1
<< 32 >> 34;
282 uint64_t a3
= a
.x1
>> 32;
283 uint64_t b3
= b
.x1
>> 32;
284 // Use 16 small multiplications and additions that do not overflow:
285 uint64_t x0
= a0
* b0
;
286 uint64_t x1
= (x0
>> 30) + a0
* b1
+ a1
* b0
;
287 uint64_t x2
= (x1
>> 30) + a0
* b2
+ a1
* b1
+ a2
* b0
;
288 uint64_t x3
= (x2
>> 30) + a0
* b3
+ a1
* b2
+ a2
* b1
+ a3
* b0
;
289 uint64_t x4
= (x3
>> 30) + a1
* b3
+ a2
* b2
+ a3
* b1
;
290 uint64_t x5
= (x4
>> 30) + a2
* b3
+ a3
* b2
;
291 uint64_t x6
= (x5
>> 30) + a3
* b3
;
292 // We now have (64, 30, 30, ...) bits in (x6, x5, x4, ...).
293 // Take the top 128 bits, setting bottom bit if any lower bits were set:
294 uint64_t y0
= (x5
<< 34 | x4
<< 34 >> 30 | x3
<< 34 >> 60 |
295 !!(x3
<< 38 | (x2
| x1
| x0
) << 34));
297 // Top bit may be zero. Renormalise:
299 y1
= y1
<< 1 | y0
>> 63;
307 return f3_round(x_sgn
, x_exp
, x
);
310 long double __divtf3(long double fa
, long double fb
)
313 int32_t a_exp
, b_exp
, x_exp
;
314 int a_sgn
, b_sgn
, x_sgn
, i
;
317 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
318 f3_unpack(&b_sgn
, &b_exp
, &b
, fb
);
320 if (fp3_detect_NaNs(&fx
, a_sgn
, a_exp
, a
, b_sgn
, b_exp
, b
))
323 // Handle infinities and zeroes:
324 if ((a_exp
== 32767 && b_exp
== 32767) ||
325 (!(a
.x0
| a
.x1
) && !(b
.x0
| b
.x1
)))
327 if (a_exp
== 32767 || !(b
.x0
| b
.x1
))
328 return f3_infinity(a_sgn
^ b_sgn
);
329 if (!(a
.x0
| a
.x1
) || b_exp
== 32767)
330 return f3_zero(a_sgn
^ b_sgn
);
332 a
= f3_normalise(&a_exp
, a
);
333 b
= f3_normalise(&b_exp
, b
);
335 x_sgn
= a_sgn
^ b_sgn
;
336 x_exp
= a_exp
- b_exp
+ 16395;
338 a
.x0
= a
.x0
>> 1 | a
.x1
<< 63;
340 b
.x0
= b
.x0
>> 1 | b
.x1
<< 63;
344 for (i
= 0; i
< 116; i
++) {
345 x
.x1
= x
.x1
<< 1 | x
.x0
>> 63;
347 if (a
.x1
> b
.x1
|| (a
.x1
== b
.x1
&& a
.x0
>= b
.x0
)) {
348 a
.x1
= a
.x1
- b
.x1
- (a
.x0
< b
.x0
);
352 a
.x1
= a
.x1
<< 1 | a
.x0
>> 63;
355 x
.x0
|= !!(a
.x0
| a
.x1
);
357 x
= f3_normalise(&x_exp
, x
);
359 return f3_round(x_sgn
, x_exp
, x
);
362 long double __extendsftf2(float f
)
373 else if (a
<< 1 >> 24 == 255)
374 x
.x1
= (0x7fff000000000000 | aa
>> 31 << 63 | aa
<< 41 >> 16 |
375 (uint64_t)!!(a
<< 9) << 47);
377 x
.x1
= (aa
>> 31 << 63 | ((aa
>> 23 & 255) + 16256) << 48 |
383 long double __extenddftf2(double f
)
392 else if (a
<< 1 >> 53 == 2047)
393 x
.x1
= (0x7fff000000000000 | a
>> 63 << 63 | a
<< 12 >> 16 |
394 (uint64_t)!!(a
<< 12) << 47);
396 x
.x1
= a
>> 63 << 63 | ((a
>> 52 & 2047) + 15360) << 48 | a
<< 12 >> 16;
401 float __trunctfsf2(long double f
)
409 f3_unpack(&sgn
, &exp
, &mnt
, f
);
411 if (exp
== 32767 && (mnt
.x0
| mnt
.x1
<< 16))
412 x
= 0x7fc00000 | (uint32_t)sgn
<< 31 | (mnt
.x1
>> 25 & 0x007fffff);
413 else if (exp
> 16510)
414 x
= 0x7f800000 | (uint32_t)sgn
<< 31;
415 else if (exp
< 16233)
416 x
= (uint32_t)sgn
<< 31;
419 x
= mnt
.x1
>> 23 | !!(mnt
.x0
| mnt
.x1
<< 41);
421 x
= x
>> -exp
| !!(x
<< (32 + exp
));
424 if ((x
& 3) == 3 || (x
& 7) == 6)
426 x
= ((x
>> 2) + (exp
<< 23)) | (uint32_t)sgn
<< 31;
432 double __trunctfdf2(long double f
)
440 f3_unpack(&sgn
, &exp
, &mnt
, f
);
442 if (exp
== 32767 && (mnt
.x0
| mnt
.x1
<< 16))
443 x
= (0x7ff8000000000000 | (uint64_t)sgn
<< 63 |
444 mnt
.x1
<< 16 >> 12 | mnt
.x0
>> 60);
445 else if (exp
> 17406)
446 x
= 0x7ff0000000000000 | (uint64_t)sgn
<< 63;
447 else if (exp
< 15308)
448 x
= (uint64_t)sgn
<< 63;
451 x
= mnt
.x1
<< 6 | mnt
.x0
>> 58 | !!(mnt
.x0
<< 6);
453 x
= x
>> -exp
| !!(x
<< (64 + exp
));
456 if ((x
& 3) == 3 || (x
& 7) == 6)
458 x
= ((x
>> 2) + ((uint64_t)exp
<< 52)) | (uint64_t)sgn
<< 63;
464 int32_t __fixtfsi(long double fa
)
470 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
474 return a_sgn
? -0x80000000 : 0x7fffffff;
475 x
= a
.x1
>> (16431 - a_exp
);
476 return a_sgn
? -x
: x
;
479 int64_t __fixtfdi(long double fa
)
485 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
489 return a_sgn
? -0x8000000000000000 : 0x7fffffffffffffff;
490 x
= (a
.x1
<< 15 | a
.x0
>> 49) >> (16446 - a_exp
);
491 return a_sgn
? -x
: x
;
494 uint32_t __fixunstfsi(long double fa
)
499 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
500 if (a_sgn
|| a_exp
< 16369)
504 return a
.x1
>> (16431 - a_exp
);
507 uint64_t __fixunstfdi(long double fa
)
512 f3_unpack(&a_sgn
, &a_exp
, &a
, fa
);
513 if (a_sgn
|| a_exp
< 16383)
517 return (a
.x1
<< 15 | a
.x0
>> 49) >> (16446 - a_exp
);
520 long double __floatsitf(int32_t a
)
533 for (i
= 16; i
; i
>>= 1)
534 if (!(mnt
>> (32 - i
))) {
538 x
.x1
= ((uint64_t)sgn
<< 63 | (uint64_t)exp
<< 48 |
539 (uint64_t)(mnt
<< 1) << 16);
545 long double __floatditf(int64_t a
)
558 for (i
= 32; i
; i
>>= 1)
559 if (!(mnt
>> (64 - i
))) {
564 x
.x1
= (uint64_t)sgn
<< 63 | (uint64_t)exp
<< 48 | mnt
<< 1 >> 16;
570 long double __floatunsitf(uint32_t a
)
578 for (i
= 16; i
; i
>>= 1)
579 if (!(mnt
>> (32 - i
))) {
583 x
.x1
= (uint64_t)exp
<< 48 | (uint64_t)(mnt
<< 1) << 16;
589 long double __floatunditf(uint64_t a
)
597 for (i
= 32; i
; i
>>= 1)
598 if (!(mnt
>> (64 - i
))) {
603 x
.x1
= (uint64_t)exp
<< 48 | mnt
<< 1 >> 16;
609 static int f3_cmp(long double fa
, long double fb
)
614 return (!(a
.x0
| a
.x1
<< 1 | b
.x0
| b
.x1
<< 1) ? 0 :
615 ((a
.x1
<< 1 >> 49 == 0x7fff && (a
.x0
| a
.x1
<< 16)) ||
616 (b
.x1
<< 1 >> 49 == 0x7fff && (b
.x0
| b
.x1
<< 16))) ? 2 :
617 a
.x1
>> 63 != b
.x1
>> 63 ? (int)(b
.x1
>> 63) - (int)(a
.x1
>> 63) :
618 a
.x1
< b
.x1
? (int)(a
.x1
>> 63 << 1) - 1 :
619 a
.x1
> b
.x1
? 1 - (int)(a
.x1
>> 63 << 1) :
620 a
.x0
< b
.x0
? (int)(a
.x1
>> 63 << 1) - 1 :
621 b
.x0
< a
.x0
? 1 - (int)(a
.x1
>> 63 << 1) : 0);
624 int __eqtf2(long double a
, long double b
)
626 return !!f3_cmp(a
, b
);
629 int __netf2(long double a
, long double b
)
631 return !!f3_cmp(a
, b
);
634 int __lttf2(long double a
, long double b
)
639 int __letf2(long double a
, long double b
)
644 int __gttf2(long double a
, long double b
)
646 return -f3_cmp(b
, a
);
649 int __getf2(long double a
, long double b
)
651 return -f3_cmp(b
, a
);