riscv: More insns, operands and arg slots
[tinycc.git] / lib / lib-arm64.c
blobb8fd9e85a70fcf17f2b8fff7b0d20f0f283338a1
1 /*
2 * TCC runtime library for arm64.
4 * Copyright (c) 2015 Edmund Grimley Evans
6 * Copying and distribution of this file, with or without modification,
7 * are permitted in any medium without royalty provided the copyright
8 * notice and this notice are preserved. This file is offered as-is,
9 * without any warranty.
12 #ifdef __TINYC__
13 typedef signed char int8_t;
14 typedef unsigned char uint8_t;
15 typedef short int16_t;
16 typedef unsigned short uint16_t;
17 typedef int int32_t;
18 typedef unsigned uint32_t;
19 typedef long long int64_t;
20 typedef unsigned long long uint64_t;
21 void *memcpy(void*,void*,__SIZE_TYPE__);
22 #else
23 #include <stdint.h>
24 #include <string.h>
25 #endif
27 void __clear_cache(void *beg, void *end)
29 __arm64_clear_cache(beg, end);
32 typedef struct {
33 uint64_t x0, x1;
34 } u128_t;
36 static long double f3_zero(int sgn)
38 long double f;
39 u128_t x = { 0, (uint64_t)sgn << 63 };
40 memcpy(&f, &x, 16);
41 return f;
44 static long double f3_infinity(int sgn)
46 long double f;
47 u128_t x = { 0, (uint64_t)sgn << 63 | 0x7fff000000000000 };
48 memcpy(&f, &x, 16);
49 return f;
52 static long double f3_NaN(void)
54 long double f;
55 #if 0
56 // ARM's default NaN usually has just the top fraction bit set:
57 u128_t x = { 0, 0x7fff800000000000 };
58 #else
59 // GCC's library sets all fraction bits:
60 u128_t x = { -1, 0x7fffffffffffffff };
61 #endif
62 memcpy(&f, &x, 16);
63 return f;
66 static int fp3_convert_NaN(long double *f, int sgn, u128_t mnt)
68 u128_t x = { mnt.x0,
69 mnt.x1 | 0x7fff800000000000 | (uint64_t)sgn << 63 };
70 memcpy(f, &x, 16);
71 return 1;
74 static int fp3_detect_NaNs(long double *f,
75 int a_sgn, int a_exp, u128_t a,
76 int b_sgn, int b_exp, u128_t b)
78 // Detect signalling NaNs:
79 if (a_exp == 32767 && (a.x0 | a.x1 << 16) && !(a.x1 >> 47 & 1))
80 return fp3_convert_NaN(f, a_sgn, a);
81 if (b_exp == 32767 && (b.x0 | b.x1 << 16) && !(b.x1 >> 47 & 1))
82 return fp3_convert_NaN(f, b_sgn, b);
84 // Detect quiet NaNs:
85 if (a_exp == 32767 && (a.x0 | a.x1 << 16))
86 return fp3_convert_NaN(f, a_sgn, a);
87 if (b_exp == 32767 && (b.x0 | b.x1 << 16))
88 return fp3_convert_NaN(f, b_sgn, b);
90 return 0;
93 static void f3_unpack(int *sgn, int32_t *exp, u128_t *mnt, long double f)
95 u128_t x;
96 memcpy(&x, &f, 16);
97 *sgn = x.x1 >> 63;
98 *exp = x.x1 >> 48 & 32767;
99 x.x1 = x.x1 << 16 >> 16;
100 if (*exp)
101 x.x1 |= (uint64_t)1 << 48;
102 else
103 *exp = 1;
104 *mnt = x;
107 static u128_t f3_normalise(int32_t *exp, u128_t mnt)
109 int sh;
110 if (!(mnt.x0 | mnt.x1))
111 return mnt;
112 if (!mnt.x1) {
113 mnt.x1 = mnt.x0;
114 mnt.x0 = 0;
115 *exp -= 64;
117 for (sh = 32; sh; sh >>= 1) {
118 if (!(mnt.x1 >> (64 - sh))) {
119 mnt.x1 = mnt.x1 << sh | mnt.x0 >> (64 - sh);
120 mnt.x0 = mnt.x0 << sh;
121 *exp -= sh;
124 return mnt;
127 static u128_t f3_sticky_shift(int32_t sh, u128_t x)
129 if (sh >= 128) {
130 x.x0 = !!(x.x0 | x.x1);
131 x.x1 = 0;
132 return x;
134 if (sh >= 64) {
135 x.x0 = x.x1 | !!x.x0;
136 x.x1 = 0;
137 sh -= 64;
139 if (sh > 0) {
140 x.x0 = x.x0 >> sh | x.x1 << (64 - sh) | !!(x.x0 << (64 - sh));
141 x.x1 = x.x1 >> sh;
143 return x;
146 static long double f3_round(int sgn, int32_t exp, u128_t x)
148 long double f;
149 int error;
151 if (exp > 0) {
152 x = f3_sticky_shift(13, x);
154 else {
155 x = f3_sticky_shift(14 - exp, x);
156 exp = 0;
159 error = x.x0 & 3;
160 x.x0 = x.x0 >> 2 | x.x1 << 62;
161 x.x1 = x.x1 >> 2;
163 if (error == 3 || ((error == 2) & (x.x0 & 1))) {
164 if (!++x.x0) {
165 ++x.x1;
166 if (x.x1 == (uint64_t)1 << 48)
167 exp = 1;
168 else if (x.x1 == (uint64_t)1 << 49) {
169 ++exp;
170 x.x0 = x.x0 >> 1 | x.x1 << 63;
171 x.x1 = x.x1 >> 1;
176 if (exp >= 32767)
177 return f3_infinity(sgn);
179 x.x1 = x.x1 << 16 >> 16 | (uint64_t)exp << 48 | (uint64_t)sgn << 63;
180 memcpy(&f, &x, 16);
181 return f;
184 static long double f3_add(long double fa, long double fb, int neg)
186 u128_t a, b, x;
187 int32_t a_exp, b_exp, x_exp;
188 int a_sgn, b_sgn, x_sgn;
189 long double fx;
191 f3_unpack(&a_sgn, &a_exp, &a, fa);
192 f3_unpack(&b_sgn, &b_exp, &b, fb);
194 if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
195 return fx;
197 b_sgn ^= neg;
199 // Handle infinities and zeroes:
200 if (a_exp == 32767 && b_exp == 32767 && a_sgn != b_sgn)
201 return f3_NaN();
202 if (a_exp == 32767)
203 return f3_infinity(a_sgn);
204 if (b_exp == 32767)
205 return f3_infinity(b_sgn);
206 if (!(a.x0 | a.x1 | b.x0 | b.x1))
207 return f3_zero(a_sgn & b_sgn);
209 a.x1 = a.x1 << 3 | a.x0 >> 61;
210 a.x0 = a.x0 << 3;
211 b.x1 = b.x1 << 3 | b.x0 >> 61;
212 b.x0 = b.x0 << 3;
214 if (a_exp <= b_exp) {
215 a = f3_sticky_shift(b_exp - a_exp, a);
216 a_exp = b_exp;
218 else {
219 b = f3_sticky_shift(a_exp - b_exp, b);
220 b_exp = a_exp;
223 x_sgn = a_sgn;
224 x_exp = a_exp;
225 if (a_sgn == b_sgn) {
226 x.x0 = a.x0 + b.x0;
227 x.x1 = a.x1 + b.x1 + (x.x0 < a.x0);
229 else {
230 x.x0 = a.x0 - b.x0;
231 x.x1 = a.x1 - b.x1 - (x.x0 > a.x0);
232 if (x.x1 >> 63) {
233 x_sgn ^= 1;
234 x.x0 = -x.x0;
235 x.x1 = -x.x1 - !!x.x0;
239 if (!(x.x0 | x.x1))
240 return f3_zero(0);
242 x = f3_normalise(&x_exp, x);
244 return f3_round(x_sgn, x_exp + 12, x);
247 long double __addtf3(long double a, long double b)
249 return f3_add(a, b, 0);
252 long double __subtf3(long double a, long double b)
254 return f3_add(a, b, 1);
257 long double __multf3(long double fa, long double fb)
259 u128_t a, b, x;
260 int32_t a_exp, b_exp, x_exp;
261 int a_sgn, b_sgn, x_sgn;
262 long double fx;
264 f3_unpack(&a_sgn, &a_exp, &a, fa);
265 f3_unpack(&b_sgn, &b_exp, &b, fb);
267 if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
268 return fx;
270 // Handle infinities and zeroes:
271 if ((a_exp == 32767 && !(b.x0 | b.x1)) ||
272 (b_exp == 32767 && !(a.x0 | a.x1)))
273 return f3_NaN();
274 if (a_exp == 32767 || b_exp == 32767)
275 return f3_infinity(a_sgn ^ b_sgn);
276 if (!(a.x0 | a.x1) || !(b.x0 | b.x1))
277 return f3_zero(a_sgn ^ b_sgn);
279 a = f3_normalise(&a_exp, a);
280 b = f3_normalise(&b_exp, b);
282 x_sgn = a_sgn ^ b_sgn;
283 x_exp = a_exp + b_exp - 16352;
286 // Convert to base (1 << 30), discarding bottom 6 bits, which are zero,
287 // so there are (32, 30, 30, 30) bits in (a3, a2, a1, a0):
288 uint64_t a0 = a.x0 << 28 >> 34;
289 uint64_t b0 = b.x0 << 28 >> 34;
290 uint64_t a1 = a.x0 >> 36 | a.x1 << 62 >> 34;
291 uint64_t b1 = b.x0 >> 36 | b.x1 << 62 >> 34;
292 uint64_t a2 = a.x1 << 32 >> 34;
293 uint64_t b2 = b.x1 << 32 >> 34;
294 uint64_t a3 = a.x1 >> 32;
295 uint64_t b3 = b.x1 >> 32;
296 // Use 16 small multiplications and additions that do not overflow:
297 uint64_t x0 = a0 * b0;
298 uint64_t x1 = (x0 >> 30) + a0 * b1 + a1 * b0;
299 uint64_t x2 = (x1 >> 30) + a0 * b2 + a1 * b1 + a2 * b0;
300 uint64_t x3 = (x2 >> 30) + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
301 uint64_t x4 = (x3 >> 30) + a1 * b3 + a2 * b2 + a3 * b1;
302 uint64_t x5 = (x4 >> 30) + a2 * b3 + a3 * b2;
303 uint64_t x6 = (x5 >> 30) + a3 * b3;
304 // We now have (64, 30, 30, ...) bits in (x6, x5, x4, ...).
305 // Take the top 128 bits, setting bottom bit if any lower bits were set:
306 uint64_t y0 = (x5 << 34 | x4 << 34 >> 30 | x3 << 34 >> 60 |
307 !!(x3 << 38 | (x2 | x1 | x0) << 34));
308 uint64_t y1 = x6;
309 // Top bit may be zero. Renormalise:
310 if (!(y1 >> 63)) {
311 y1 = y1 << 1 | y0 >> 63;
312 y0 = y0 << 1;
313 --x_exp;
315 x.x0 = y0;
316 x.x1 = y1;
319 return f3_round(x_sgn, x_exp, x);
322 long double __divtf3(long double fa, long double fb)
324 u128_t a, b, x;
325 int32_t a_exp, b_exp, x_exp;
326 int a_sgn, b_sgn, x_sgn, i;
327 long double fx;
329 f3_unpack(&a_sgn, &a_exp, &a, fa);
330 f3_unpack(&b_sgn, &b_exp, &b, fb);
332 if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
333 return fx;
335 // Handle infinities and zeroes:
336 if ((a_exp == 32767 && b_exp == 32767) ||
337 (!(a.x0 | a.x1) && !(b.x0 | b.x1)))
338 return f3_NaN();
339 if (a_exp == 32767 || !(b.x0 | b.x1))
340 return f3_infinity(a_sgn ^ b_sgn);
341 if (!(a.x0 | a.x1) || b_exp == 32767)
342 return f3_zero(a_sgn ^ b_sgn);
344 a = f3_normalise(&a_exp, a);
345 b = f3_normalise(&b_exp, b);
347 x_sgn = a_sgn ^ b_sgn;
348 x_exp = a_exp - b_exp + 16395;
350 a.x0 = a.x0 >> 1 | a.x1 << 63;
351 a.x1 = a.x1 >> 1;
352 b.x0 = b.x0 >> 1 | b.x1 << 63;
353 b.x1 = b.x1 >> 1;
354 x.x0 = 0;
355 x.x1 = 0;
356 for (i = 0; i < 116; i++) {
357 x.x1 = x.x1 << 1 | x.x0 >> 63;
358 x.x0 = x.x0 << 1;
359 if (a.x1 > b.x1 || (a.x1 == b.x1 && a.x0 >= b.x0)) {
360 a.x1 = a.x1 - b.x1 - (a.x0 < b.x0);
361 a.x0 = a.x0 - b.x0;
362 x.x0 |= 1;
364 a.x1 = a.x1 << 1 | a.x0 >> 63;
365 a.x0 = a.x0 << 1;
367 x.x0 |= !!(a.x0 | a.x1);
369 x = f3_normalise(&x_exp, x);
371 return f3_round(x_sgn, x_exp, x);
374 long double __extendsftf2(float f)
376 long double fx;
377 u128_t x;
378 uint32_t a;
379 uint64_t aa;
380 memcpy(&a, &f, 4);
381 aa = a;
382 x.x0 = 0;
383 if (!(a << 1))
384 x.x1 = aa << 32;
385 else if (a << 1 >> 24 == 255)
386 x.x1 = (0x7fff000000000000 | aa >> 31 << 63 | aa << 41 >> 16 |
387 (uint64_t)!!(a << 9) << 47);
388 else
389 x.x1 = (aa >> 31 << 63 | ((aa >> 23 & 255) + 16256) << 48 |
390 aa << 41 >> 16);
391 memcpy(&fx, &x, 16);
392 return fx;
395 long double __extenddftf2(double f)
397 long double fx;
398 u128_t x;
399 uint64_t a;
400 memcpy(&a, &f, 8);
401 x.x0 = a << 60;
402 if (!(a << 1))
403 x.x1 = a;
404 else if (a << 1 >> 53 == 2047)
405 x.x1 = (0x7fff000000000000 | a >> 63 << 63 | a << 12 >> 16 |
406 (uint64_t)!!(a << 12) << 47);
407 else
408 x.x1 = a >> 63 << 63 | ((a >> 52 & 2047) + 15360) << 48 | a << 12 >> 16;
409 memcpy(&fx, &x, 16);
410 return fx;
413 float __trunctfsf2(long double f)
415 u128_t mnt;
416 int32_t exp;
417 int sgn;
418 uint32_t x;
419 float fx;
421 f3_unpack(&sgn, &exp, &mnt, f);
423 if (exp == 32767 && (mnt.x0 | mnt.x1 << 16))
424 x = 0x7fc00000 | (uint32_t)sgn << 31 | (mnt.x1 >> 25 & 0x007fffff);
425 else if (exp > 16510)
426 x = 0x7f800000 | (uint32_t)sgn << 31;
427 else if (exp < 16233)
428 x = (uint32_t)sgn << 31;
429 else {
430 exp -= 16257;
431 x = mnt.x1 >> 23 | !!(mnt.x0 | mnt.x1 << 41);
432 if (exp < 0) {
433 x = x >> -exp | !!(x << (32 + exp));
434 exp = 0;
436 if ((x & 3) == 3 || (x & 7) == 6)
437 x += 4;
438 x = ((x >> 2) + (exp << 23)) | (uint32_t)sgn << 31;
440 memcpy(&fx, &x, 4);
441 return fx;
444 double __trunctfdf2(long double f)
446 u128_t mnt;
447 int32_t exp;
448 int sgn;
449 uint64_t x;
450 double fx;
452 f3_unpack(&sgn, &exp, &mnt, f);
454 if (exp == 32767 && (mnt.x0 | mnt.x1 << 16))
455 x = (0x7ff8000000000000 | (uint64_t)sgn << 63 |
456 mnt.x1 << 16 >> 12 | mnt.x0 >> 60);
457 else if (exp > 17406)
458 x = 0x7ff0000000000000 | (uint64_t)sgn << 63;
459 else if (exp < 15308)
460 x = (uint64_t)sgn << 63;
461 else {
462 exp -= 15361;
463 x = mnt.x1 << 6 | mnt.x0 >> 58 | !!(mnt.x0 << 6);
464 if (exp < 0) {
465 x = x >> -exp | !!(x << (64 + exp));
466 exp = 0;
468 if ((x & 3) == 3 || (x & 7) == 6)
469 x += 4;
470 x = ((x >> 2) + ((uint64_t)exp << 52)) | (uint64_t)sgn << 63;
472 memcpy(&fx, &x, 8);
473 return fx;
476 int32_t __fixtfsi(long double fa)
478 u128_t a;
479 int32_t a_exp;
480 int a_sgn;
481 int32_t x;
482 f3_unpack(&a_sgn, &a_exp, &a, fa);
483 if (a_exp < 16369)
484 return 0;
485 if (a_exp > 16413)
486 return a_sgn ? -0x80000000 : 0x7fffffff;
487 x = a.x1 >> (16431 - a_exp);
488 return a_sgn ? -x : x;
491 int64_t __fixtfdi(long double fa)
493 u128_t a;
494 int32_t a_exp;
495 int a_sgn;
496 int64_t x;
497 f3_unpack(&a_sgn, &a_exp, &a, fa);
498 if (a_exp < 16383)
499 return 0;
500 if (a_exp > 16445)
501 return a_sgn ? -0x8000000000000000 : 0x7fffffffffffffff;
502 x = (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp);
503 return a_sgn ? -x : x;
506 uint32_t __fixunstfsi(long double fa)
508 u128_t a;
509 int32_t a_exp;
510 int a_sgn;
511 f3_unpack(&a_sgn, &a_exp, &a, fa);
512 if (a_sgn || a_exp < 16369)
513 return 0;
514 if (a_exp > 16414)
515 return -1;
516 return a.x1 >> (16431 - a_exp);
519 uint64_t __fixunstfdi(long double fa)
521 u128_t a;
522 int32_t a_exp;
523 int a_sgn;
524 f3_unpack(&a_sgn, &a_exp, &a, fa);
525 if (a_sgn || a_exp < 16383)
526 return 0;
527 if (a_exp > 16446)
528 return -1;
529 return (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp);
532 long double __floatsitf(int32_t a)
534 int sgn = 0;
535 int exp = 16414;
536 uint32_t mnt = a;
537 u128_t x = { 0, 0 };
538 long double f;
539 int i;
540 if (a) {
541 if (a < 0) {
542 sgn = 1;
543 mnt = -mnt;
545 for (i = 16; i; i >>= 1)
546 if (!(mnt >> (32 - i))) {
547 mnt <<= i;
548 exp -= i;
550 x.x1 = ((uint64_t)sgn << 63 | (uint64_t)exp << 48 |
551 (uint64_t)(mnt << 1) << 16);
553 memcpy(&f, &x, 16);
554 return f;
557 long double __floatditf(int64_t a)
559 int sgn = 0;
560 int exp = 16446;
561 uint64_t mnt = a;
562 u128_t x = { 0, 0 };
563 long double f;
564 int i;
565 if (a) {
566 if (a < 0) {
567 sgn = 1;
568 mnt = -mnt;
570 for (i = 32; i; i >>= 1)
571 if (!(mnt >> (64 - i))) {
572 mnt <<= i;
573 exp -= i;
575 x.x0 = mnt << 49;
576 x.x1 = (uint64_t)sgn << 63 | (uint64_t)exp << 48 | mnt << 1 >> 16;
578 memcpy(&f, &x, 16);
579 return f;
582 long double __floatunsitf(uint32_t a)
584 int exp = 16414;
585 uint32_t mnt = a;
586 u128_t x = { 0, 0 };
587 long double f;
588 int i;
589 if (a) {
590 for (i = 16; i; i >>= 1)
591 if (!(mnt >> (32 - i))) {
592 mnt <<= i;
593 exp -= i;
595 x.x1 = (uint64_t)exp << 48 | (uint64_t)(mnt << 1) << 16;
597 memcpy(&f, &x, 16);
598 return f;
601 long double __floatunditf(uint64_t a)
603 int exp = 16446;
604 uint64_t mnt = a;
605 u128_t x = { 0, 0 };
606 long double f;
607 int i;
608 if (a) {
609 for (i = 32; i; i >>= 1)
610 if (!(mnt >> (64 - i))) {
611 mnt <<= i;
612 exp -= i;
614 x.x0 = mnt << 49;
615 x.x1 = (uint64_t)exp << 48 | mnt << 1 >> 16;
617 memcpy(&f, &x, 16);
618 return f;
621 static int f3_cmp(long double fa, long double fb)
623 u128_t a, b;
624 memcpy(&a, &fa, 16);
625 memcpy(&b, &fb, 16);
626 return (!(a.x0 | a.x1 << 1 | b.x0 | b.x1 << 1) ? 0 :
627 ((a.x1 << 1 >> 49 == 0x7fff && (a.x0 | a.x1 << 16)) ||
628 (b.x1 << 1 >> 49 == 0x7fff && (b.x0 | b.x1 << 16))) ? 2 :
629 a.x1 >> 63 != b.x1 >> 63 ? (int)(b.x1 >> 63) - (int)(a.x1 >> 63) :
630 a.x1 < b.x1 ? (int)(a.x1 >> 63 << 1) - 1 :
631 a.x1 > b.x1 ? 1 - (int)(a.x1 >> 63 << 1) :
632 a.x0 < b.x0 ? (int)(a.x1 >> 63 << 1) - 1 :
633 b.x0 < a.x0 ? 1 - (int)(a.x1 >> 63 << 1) : 0);
636 int __eqtf2(long double a, long double b)
638 return !!f3_cmp(a, b);
641 int __netf2(long double a, long double b)
643 return !!f3_cmp(a, b);
646 int __lttf2(long double a, long double b)
648 return f3_cmp(a, b);
651 int __letf2(long double a, long double b)
653 return f3_cmp(a, b);
656 int __gttf2(long double a, long double b)
658 return -f3_cmp(b, a);
661 int __getf2(long double a, long double b)
663 return -f3_cmp(b, a);