riscv: asm: Add branch to label
[tinycc.git] / lib / lib-arm64.c
blob226827e99fd5a293d6d2c90bef07befd27ce1597
1 /*
2 * TCC runtime library for arm64.
4 * Copyright (c) 2015 Edmund Grimley Evans
6 * Copying and distribution of this file, with or without modification,
7 * are permitted in any medium without royalty provided the copyright
8 * notice and this notice are preserved. This file is offered as-is,
9 * without any warranty.
12 #ifdef __TINYC__
13 typedef signed char int8_t;
14 typedef unsigned char uint8_t;
15 typedef short int16_t;
16 typedef unsigned short uint16_t;
17 typedef int int32_t;
18 typedef unsigned uint32_t;
19 typedef long long int64_t;
20 typedef unsigned long long uint64_t;
21 void *memcpy(void*,void*,__SIZE_TYPE__);
22 #else
23 #include <stdint.h>
24 #include <string.h>
25 #endif
27 #if !defined __riscv && !defined __APPLE__
28 void __clear_cache(void *beg, void *end)
30 __arm64_clear_cache(beg, end);
32 #endif
34 typedef struct {
35 uint64_t x0, x1;
36 } u128_t;
38 static long double f3_zero(int sgn)
40 long double f;
41 u128_t x = { 0, (uint64_t)sgn << 63 };
42 memcpy(&f, &x, 16);
43 return f;
46 static long double f3_infinity(int sgn)
48 long double f;
49 u128_t x = { 0, (uint64_t)sgn << 63 | 0x7fff000000000000 };
50 memcpy(&f, &x, 16);
51 return f;
54 static long double f3_NaN(void)
56 long double f;
57 #if 0
58 // ARM's default NaN usually has just the top fraction bit set:
59 u128_t x = { 0, 0x7fff800000000000 };
60 #else
61 // GCC's library sets all fraction bits:
62 u128_t x = { -1, 0x7fffffffffffffff };
63 #endif
64 memcpy(&f, &x, 16);
65 return f;
68 static int fp3_convert_NaN(long double *f, int sgn, u128_t mnt)
70 u128_t x = { mnt.x0,
71 mnt.x1 | 0x7fff800000000000 | (uint64_t)sgn << 63 };
72 memcpy(f, &x, 16);
73 return 1;
76 static int fp3_detect_NaNs(long double *f,
77 int a_sgn, int a_exp, u128_t a,
78 int b_sgn, int b_exp, u128_t b)
80 // Detect signalling NaNs:
81 if (a_exp == 32767 && (a.x0 | a.x1 << 16) && !(a.x1 >> 47 & 1))
82 return fp3_convert_NaN(f, a_sgn, a);
83 if (b_exp == 32767 && (b.x0 | b.x1 << 16) && !(b.x1 >> 47 & 1))
84 return fp3_convert_NaN(f, b_sgn, b);
86 // Detect quiet NaNs:
87 if (a_exp == 32767 && (a.x0 | a.x1 << 16))
88 return fp3_convert_NaN(f, a_sgn, a);
89 if (b_exp == 32767 && (b.x0 | b.x1 << 16))
90 return fp3_convert_NaN(f, b_sgn, b);
92 return 0;
95 static void f3_unpack(int *sgn, int32_t *exp, u128_t *mnt, long double f)
97 u128_t x;
98 memcpy(&x, &f, 16);
99 *sgn = x.x1 >> 63;
100 *exp = x.x1 >> 48 & 32767;
101 x.x1 = x.x1 << 16 >> 16;
102 if (*exp)
103 x.x1 |= (uint64_t)1 << 48;
104 else
105 *exp = 1;
106 *mnt = x;
109 static u128_t f3_normalise(int32_t *exp, u128_t mnt)
111 int sh;
112 if (!(mnt.x0 | mnt.x1))
113 return mnt;
114 if (!mnt.x1) {
115 mnt.x1 = mnt.x0;
116 mnt.x0 = 0;
117 *exp -= 64;
119 for (sh = 32; sh; sh >>= 1) {
120 if (!(mnt.x1 >> (64 - sh))) {
121 mnt.x1 = mnt.x1 << sh | mnt.x0 >> (64 - sh);
122 mnt.x0 = mnt.x0 << sh;
123 *exp -= sh;
126 return mnt;
129 static u128_t f3_sticky_shift(int32_t sh, u128_t x)
131 if (sh >= 128) {
132 x.x0 = !!(x.x0 | x.x1);
133 x.x1 = 0;
134 return x;
136 if (sh >= 64) {
137 x.x0 = x.x1 | !!x.x0;
138 x.x1 = 0;
139 sh -= 64;
141 if (sh > 0) {
142 x.x0 = x.x0 >> sh | x.x1 << (64 - sh) | !!(x.x0 << (64 - sh));
143 x.x1 = x.x1 >> sh;
145 return x;
148 static long double f3_round(int sgn, int32_t exp, u128_t x)
150 long double f;
151 int error;
153 if (exp > 0) {
154 x = f3_sticky_shift(13, x);
156 else {
157 x = f3_sticky_shift(14 - exp, x);
158 exp = 0;
161 error = x.x0 & 3;
162 x.x0 = x.x0 >> 2 | x.x1 << 62;
163 x.x1 = x.x1 >> 2;
165 if (error == 3 || ((error == 2) & (x.x0 & 1))) {
166 if (!++x.x0) {
167 ++x.x1;
168 if (x.x1 == (uint64_t)1 << 48)
169 exp = 1;
170 else if (x.x1 == (uint64_t)1 << 49) {
171 ++exp;
172 x.x0 = x.x0 >> 1 | x.x1 << 63;
173 x.x1 = x.x1 >> 1;
178 if (exp >= 32767)
179 return f3_infinity(sgn);
181 x.x1 = x.x1 << 16 >> 16 | (uint64_t)exp << 48 | (uint64_t)sgn << 63;
182 memcpy(&f, &x, 16);
183 return f;
186 static long double f3_add(long double fa, long double fb, int neg)
188 u128_t a, b, x;
189 int32_t a_exp, b_exp, x_exp;
190 int a_sgn, b_sgn, x_sgn;
191 long double fx;
193 f3_unpack(&a_sgn, &a_exp, &a, fa);
194 f3_unpack(&b_sgn, &b_exp, &b, fb);
196 if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
197 return fx;
199 b_sgn ^= neg;
201 // Handle infinities and zeroes:
202 if (a_exp == 32767 && b_exp == 32767 && a_sgn != b_sgn)
203 return f3_NaN();
204 if (a_exp == 32767)
205 return f3_infinity(a_sgn);
206 if (b_exp == 32767)
207 return f3_infinity(b_sgn);
208 if (!(a.x0 | a.x1 | b.x0 | b.x1))
209 return f3_zero(a_sgn & b_sgn);
211 a.x1 = a.x1 << 3 | a.x0 >> 61;
212 a.x0 = a.x0 << 3;
213 b.x1 = b.x1 << 3 | b.x0 >> 61;
214 b.x0 = b.x0 << 3;
216 if (a_exp <= b_exp) {
217 a = f3_sticky_shift(b_exp - a_exp, a);
218 a_exp = b_exp;
220 else {
221 b = f3_sticky_shift(a_exp - b_exp, b);
222 b_exp = a_exp;
225 x_sgn = a_sgn;
226 x_exp = a_exp;
227 if (a_sgn == b_sgn) {
228 x.x0 = a.x0 + b.x0;
229 x.x1 = a.x1 + b.x1 + (x.x0 < a.x0);
231 else {
232 x.x0 = a.x0 - b.x0;
233 x.x1 = a.x1 - b.x1 - (x.x0 > a.x0);
234 if (x.x1 >> 63) {
235 x_sgn ^= 1;
236 x.x0 = -x.x0;
237 x.x1 = -x.x1 - !!x.x0;
241 if (!(x.x0 | x.x1))
242 return f3_zero(0);
244 x = f3_normalise(&x_exp, x);
246 return f3_round(x_sgn, x_exp + 12, x);
249 long double __addtf3(long double a, long double b)
251 return f3_add(a, b, 0);
254 long double __subtf3(long double a, long double b)
256 return f3_add(a, b, 1);
259 long double __multf3(long double fa, long double fb)
261 u128_t a, b, x;
262 int32_t a_exp, b_exp, x_exp;
263 int a_sgn, b_sgn, x_sgn;
264 long double fx;
266 f3_unpack(&a_sgn, &a_exp, &a, fa);
267 f3_unpack(&b_sgn, &b_exp, &b, fb);
269 if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
270 return fx;
272 // Handle infinities and zeroes:
273 if ((a_exp == 32767 && !(b.x0 | b.x1)) ||
274 (b_exp == 32767 && !(a.x0 | a.x1)))
275 return f3_NaN();
276 if (a_exp == 32767 || b_exp == 32767)
277 return f3_infinity(a_sgn ^ b_sgn);
278 if (!(a.x0 | a.x1) || !(b.x0 | b.x1))
279 return f3_zero(a_sgn ^ b_sgn);
281 a = f3_normalise(&a_exp, a);
282 b = f3_normalise(&b_exp, b);
284 x_sgn = a_sgn ^ b_sgn;
285 x_exp = a_exp + b_exp - 16352;
288 // Convert to base (1 << 30), discarding bottom 6 bits, which are zero,
289 // so there are (32, 30, 30, 30) bits in (a3, a2, a1, a0):
290 uint64_t a0 = a.x0 << 28 >> 34;
291 uint64_t b0 = b.x0 << 28 >> 34;
292 uint64_t a1 = a.x0 >> 36 | a.x1 << 62 >> 34;
293 uint64_t b1 = b.x0 >> 36 | b.x1 << 62 >> 34;
294 uint64_t a2 = a.x1 << 32 >> 34;
295 uint64_t b2 = b.x1 << 32 >> 34;
296 uint64_t a3 = a.x1 >> 32;
297 uint64_t b3 = b.x1 >> 32;
298 // Use 16 small multiplications and additions that do not overflow:
299 uint64_t x0 = a0 * b0;
300 uint64_t x1 = (x0 >> 30) + a0 * b1 + a1 * b0;
301 uint64_t x2 = (x1 >> 30) + a0 * b2 + a1 * b1 + a2 * b0;
302 uint64_t x3 = (x2 >> 30) + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
303 uint64_t x4 = (x3 >> 30) + a1 * b3 + a2 * b2 + a3 * b1;
304 uint64_t x5 = (x4 >> 30) + a2 * b3 + a3 * b2;
305 uint64_t x6 = (x5 >> 30) + a3 * b3;
306 // We now have (64, 30, 30, ...) bits in (x6, x5, x4, ...).
307 // Take the top 128 bits, setting bottom bit if any lower bits were set:
308 uint64_t y0 = (x5 << 34 | x4 << 34 >> 30 | x3 << 34 >> 60 |
309 !!(x3 << 38 | (x2 | x1 | x0) << 34));
310 uint64_t y1 = x6;
311 // Top bit may be zero. Renormalise:
312 if (!(y1 >> 63)) {
313 y1 = y1 << 1 | y0 >> 63;
314 y0 = y0 << 1;
315 --x_exp;
317 x.x0 = y0;
318 x.x1 = y1;
321 return f3_round(x_sgn, x_exp, x);
324 long double __divtf3(long double fa, long double fb)
326 u128_t a, b, x;
327 int32_t a_exp, b_exp, x_exp;
328 int a_sgn, b_sgn, x_sgn, i;
329 long double fx;
331 f3_unpack(&a_sgn, &a_exp, &a, fa);
332 f3_unpack(&b_sgn, &b_exp, &b, fb);
334 if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
335 return fx;
337 // Handle infinities and zeroes:
338 if ((a_exp == 32767 && b_exp == 32767) ||
339 (!(a.x0 | a.x1) && !(b.x0 | b.x1)))
340 return f3_NaN();
341 if (a_exp == 32767 || !(b.x0 | b.x1))
342 return f3_infinity(a_sgn ^ b_sgn);
343 if (!(a.x0 | a.x1) || b_exp == 32767)
344 return f3_zero(a_sgn ^ b_sgn);
346 a = f3_normalise(&a_exp, a);
347 b = f3_normalise(&b_exp, b);
349 x_sgn = a_sgn ^ b_sgn;
350 x_exp = a_exp - b_exp + 16395;
352 a.x0 = a.x0 >> 1 | a.x1 << 63;
353 a.x1 = a.x1 >> 1;
354 b.x0 = b.x0 >> 1 | b.x1 << 63;
355 b.x1 = b.x1 >> 1;
356 x.x0 = 0;
357 x.x1 = 0;
358 for (i = 0; i < 116; i++) {
359 x.x1 = x.x1 << 1 | x.x0 >> 63;
360 x.x0 = x.x0 << 1;
361 if (a.x1 > b.x1 || (a.x1 == b.x1 && a.x0 >= b.x0)) {
362 a.x1 = a.x1 - b.x1 - (a.x0 < b.x0);
363 a.x0 = a.x0 - b.x0;
364 x.x0 |= 1;
366 a.x1 = a.x1 << 1 | a.x0 >> 63;
367 a.x0 = a.x0 << 1;
369 x.x0 |= !!(a.x0 | a.x1);
371 x = f3_normalise(&x_exp, x);
373 return f3_round(x_sgn, x_exp, x);
376 long double __extendsftf2(float f)
378 long double fx;
379 u128_t x;
380 uint32_t a;
381 uint64_t aa;
382 memcpy(&a, &f, 4);
383 aa = a;
384 x.x0 = 0;
385 if (!(a << 1))
386 x.x1 = aa << 32;
387 else if (a << 1 >> 24 == 255)
388 x.x1 = (0x7fff000000000000 | aa >> 31 << 63 | aa << 41 >> 16 |
389 (uint64_t)!!(a << 9) << 47);
390 else if (a << 1 >> 24 == 0) {
391 uint64_t adj = 0;
392 while (!(a << 1 >> 1 >> (23 - adj)))
393 adj++;
394 x.x1 = aa >> 31 << 63 | (16256 - adj + 1) << 48 | aa << adj << 41 >> 16;
395 } else
396 x.x1 = (aa >> 31 << 63 | ((aa >> 23 & 255) + 16256) << 48 |
397 aa << 41 >> 16);
398 memcpy(&fx, &x, 16);
399 return fx;
402 long double __extenddftf2(double f)
404 long double fx;
405 u128_t x;
406 uint64_t a;
407 memcpy(&a, &f, 8);
408 x.x0 = a << 60;
409 if (!(a << 1))
410 x.x1 = a;
411 else if (a << 1 >> 53 == 2047)
412 x.x1 = (0x7fff000000000000 | a >> 63 << 63 | a << 12 >> 16 |
413 (uint64_t)!!(a << 12) << 47);
414 else if (a << 1 >> 53 == 0) {
415 uint64_t adj = 0;
416 while (!(a << 1 >> 1 >> (52 - adj)))
417 adj++;
418 x.x0 <<= adj;
419 x.x1 = a >> 63 << 63 | (15360 - adj + 1) << 48 | a << adj << 12 >> 16;
420 } else
421 x.x1 = a >> 63 << 63 | ((a >> 52 & 2047) + 15360) << 48 | a << 12 >> 16;
422 memcpy(&fx, &x, 16);
423 return fx;
426 float __trunctfsf2(long double f)
428 u128_t mnt;
429 int32_t exp;
430 int sgn;
431 uint32_t x;
432 float fx;
434 f3_unpack(&sgn, &exp, &mnt, f);
436 if (exp == 32767 && (mnt.x0 | mnt.x1 << 16))
437 x = 0x7fc00000 | (uint32_t)sgn << 31 | (mnt.x1 >> 25 & 0x007fffff);
438 else if (exp > 16510)
439 x = 0x7f800000 | (uint32_t)sgn << 31;
440 else if (exp < 16233)
441 x = (uint32_t)sgn << 31;
442 else {
443 exp -= 16257;
444 x = mnt.x1 >> 23 | !!(mnt.x0 | mnt.x1 << 41);
445 if (exp < 0) {
446 x = x >> -exp | !!(x << (32 + exp));
447 exp = 0;
449 if ((x & 3) == 3 || (x & 7) == 6)
450 x += 4;
451 x = ((x >> 2) + (exp << 23)) | (uint32_t)sgn << 31;
453 memcpy(&fx, &x, 4);
454 return fx;
457 double __trunctfdf2(long double f)
459 u128_t mnt;
460 int32_t exp;
461 int sgn;
462 uint64_t x;
463 double fx;
465 f3_unpack(&sgn, &exp, &mnt, f);
467 if (exp == 32767 && (mnt.x0 | mnt.x1 << 16))
468 x = (0x7ff8000000000000 | (uint64_t)sgn << 63 |
469 mnt.x1 << 16 >> 12 | mnt.x0 >> 60);
470 else if (exp > 17406)
471 x = 0x7ff0000000000000 | (uint64_t)sgn << 63;
472 else if (exp < 15308)
473 x = (uint64_t)sgn << 63;
474 else {
475 exp -= 15361;
476 x = mnt.x1 << 6 | mnt.x0 >> 58 | !!(mnt.x0 << 6);
477 if (exp < 0) {
478 x = x >> -exp | !!(x << (64 + exp));
479 exp = 0;
481 if ((x & 3) == 3 || (x & 7) == 6)
482 x += 4;
483 x = ((x >> 2) + ((uint64_t)exp << 52)) | (uint64_t)sgn << 63;
485 memcpy(&fx, &x, 8);
486 return fx;
489 int32_t __fixtfsi(long double fa)
491 u128_t a;
492 int32_t a_exp;
493 int a_sgn;
494 int32_t x;
495 f3_unpack(&a_sgn, &a_exp, &a, fa);
496 if (a_exp < 16369)
497 return 0;
498 if (a_exp > 16413)
499 return a_sgn ? -0x80000000 : 0x7fffffff;
500 x = a.x1 >> (16431 - a_exp);
501 return a_sgn ? -x : x;
504 int64_t __fixtfdi(long double fa)
506 u128_t a;
507 int32_t a_exp;
508 int a_sgn;
509 int64_t x;
510 f3_unpack(&a_sgn, &a_exp, &a, fa);
511 if (a_exp < 16383)
512 return 0;
513 if (a_exp > 16445)
514 return a_sgn ? -0x8000000000000000 : 0x7fffffffffffffff;
515 x = (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp);
516 return a_sgn ? -x : x;
519 uint32_t __fixunstfsi(long double fa)
521 u128_t a;
522 int32_t a_exp;
523 int a_sgn;
524 f3_unpack(&a_sgn, &a_exp, &a, fa);
525 if (a_sgn || a_exp < 16369)
526 return 0;
527 if (a_exp > 16414)
528 return -1;
529 return a.x1 >> (16431 - a_exp);
532 uint64_t __fixunstfdi(long double fa)
534 u128_t a;
535 int32_t a_exp;
536 int a_sgn;
537 f3_unpack(&a_sgn, &a_exp, &a, fa);
538 if (a_sgn || a_exp < 16383)
539 return 0;
540 if (a_exp > 16446)
541 return -1;
542 return (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp);
545 long double __floatsitf(int32_t a)
547 int sgn = 0;
548 int exp = 16414;
549 uint32_t mnt = a;
550 u128_t x = { 0, 0 };
551 long double f;
552 int i;
553 if (a) {
554 if (a < 0) {
555 sgn = 1;
556 mnt = -mnt;
558 for (i = 16; i; i >>= 1)
559 if (!(mnt >> (32 - i))) {
560 mnt <<= i;
561 exp -= i;
563 x.x1 = ((uint64_t)sgn << 63 | (uint64_t)exp << 48 |
564 (uint64_t)(mnt << 1) << 16);
566 memcpy(&f, &x, 16);
567 return f;
570 long double __floatditf(int64_t a)
572 int sgn = 0;
573 int exp = 16446;
574 uint64_t mnt = a;
575 u128_t x = { 0, 0 };
576 long double f;
577 int i;
578 if (a) {
579 if (a < 0) {
580 sgn = 1;
581 mnt = -mnt;
583 for (i = 32; i; i >>= 1)
584 if (!(mnt >> (64 - i))) {
585 mnt <<= i;
586 exp -= i;
588 x.x0 = mnt << 49;
589 x.x1 = (uint64_t)sgn << 63 | (uint64_t)exp << 48 | mnt << 1 >> 16;
591 memcpy(&f, &x, 16);
592 return f;
595 long double __floatunsitf(uint32_t a)
597 int exp = 16414;
598 uint32_t mnt = a;
599 u128_t x = { 0, 0 };
600 long double f;
601 int i;
602 if (a) {
603 for (i = 16; i; i >>= 1)
604 if (!(mnt >> (32 - i))) {
605 mnt <<= i;
606 exp -= i;
608 x.x1 = (uint64_t)exp << 48 | (uint64_t)(mnt << 1) << 16;
610 memcpy(&f, &x, 16);
611 return f;
614 long double __floatunditf(uint64_t a)
616 int exp = 16446;
617 uint64_t mnt = a;
618 u128_t x = { 0, 0 };
619 long double f;
620 int i;
621 if (a) {
622 for (i = 32; i; i >>= 1)
623 if (!(mnt >> (64 - i))) {
624 mnt <<= i;
625 exp -= i;
627 x.x0 = mnt << 49;
628 x.x1 = (uint64_t)exp << 48 | mnt << 1 >> 16;
630 memcpy(&f, &x, 16);
631 return f;
634 static int f3_cmp(long double fa, long double fb)
636 u128_t a, b;
637 memcpy(&a, &fa, 16);
638 memcpy(&b, &fb, 16);
639 return (!(a.x0 | a.x1 << 1 | b.x0 | b.x1 << 1) ? 0 :
640 ((a.x1 << 1 >> 49 == 0x7fff && (a.x0 | a.x1 << 16)) ||
641 (b.x1 << 1 >> 49 == 0x7fff && (b.x0 | b.x1 << 16))) ? 2 :
642 a.x1 >> 63 != b.x1 >> 63 ? (int)(b.x1 >> 63) - (int)(a.x1 >> 63) :
643 a.x1 < b.x1 ? (int)(a.x1 >> 63 << 1) - 1 :
644 a.x1 > b.x1 ? 1 - (int)(a.x1 >> 63 << 1) :
645 a.x0 < b.x0 ? (int)(a.x1 >> 63 << 1) - 1 :
646 b.x0 < a.x0 ? 1 - (int)(a.x1 >> 63 << 1) : 0);
649 int __eqtf2(long double a, long double b)
651 return !!f3_cmp(a, b);
654 int __netf2(long double a, long double b)
656 return !!f3_cmp(a, b);
659 int __lttf2(long double a, long double b)
661 return f3_cmp(a, b);
664 int __letf2(long double a, long double b)
666 return f3_cmp(a, b);
669 int __gttf2(long double a, long double b)
671 return -f3_cmp(b, a);
674 int __getf2(long double a, long double b)
676 return -f3_cmp(b, a);