win32/include/winapi changes from https://github.com/run4flat/tinycc.git
[tinycc.git] / lib / lib-arm64.c
blobbff1d91a72b4456b54b436931bf3cedce1c43e2c
1 /*
2 * TCC runtime library for arm64.
4 * Copyright (c) 2015 Edmund Grimley Evans
6 * Copying and distribution of this file, with or without modification,
7 * are permitted in any medium without royalty provided the copyright
8 * notice and this notice are preserved. This file is offered as-is,
9 * without any warranty.
12 #include <stdint.h>
13 #include <string.h>
15 void __clear_cache(char *beg, char *end)
17 __arm64_clear_cache(beg, end);
20 typedef struct {
21 uint64_t x0, x1;
22 } u128_t;
24 static long double f3_zero(int sgn)
26 long double f;
27 u128_t x = { 0, (uint64_t)sgn << 63 };
28 memcpy(&f, &x, 16);
29 return f;
32 static long double f3_infinity(int sgn)
34 long double f;
35 u128_t x = { 0, (uint64_t)sgn << 63 | 0x7fff000000000000 };
36 memcpy(&f, &x, 16);
37 return f;
40 static long double f3_NaN(void)
42 long double f;
43 #if 0
44 // ARM's default NaN usually has just the top fraction bit set:
45 u128_t x = { 0, 0x7fff800000000000 };
46 #else
47 // GCC's library sets all fraction bits:
48 u128_t x = { -1, 0x7fffffffffffffff };
49 #endif
50 memcpy(&f, &x, 16);
51 return f;
54 static int fp3_convert_NaN(long double *f, int sgn, u128_t mnt)
56 u128_t x = { mnt.x0,
57 mnt.x1 | 0x7fff800000000000 | (uint64_t)sgn << 63 };
58 memcpy(f, &x, 16);
59 return 1;
62 static int fp3_detect_NaNs(long double *f,
63 int a_sgn, int a_exp, u128_t a,
64 int b_sgn, int b_exp, u128_t b)
66 // Detect signalling NaNs:
67 if (a_exp == 32767 && (a.x0 | a.x1 << 16) && !(a.x1 >> 47 & 1))
68 return fp3_convert_NaN(f, a_sgn, a);
69 if (b_exp == 32767 && (b.x0 | b.x1 << 16) && !(b.x1 >> 47 & 1))
70 return fp3_convert_NaN(f, b_sgn, b);
72 // Detect quiet NaNs:
73 if (a_exp == 32767 && (a.x0 | a.x1 << 16))
74 return fp3_convert_NaN(f, a_sgn, a);
75 if (b_exp == 32767 && (b.x0 | b.x1 << 16))
76 return fp3_convert_NaN(f, b_sgn, b);
78 return 0;
81 static void f3_unpack(int *sgn, int32_t *exp, u128_t *mnt, long double f)
83 u128_t x;
84 memcpy(&x, &f, 16);
85 *sgn = x.x1 >> 63;
86 *exp = x.x1 >> 48 & 32767;
87 x.x1 = x.x1 << 16 >> 16;
88 if (*exp)
89 x.x1 |= (uint64_t)1 << 48;
90 else
91 *exp = 1;
92 *mnt = x;
95 static u128_t f3_normalise(int32_t *exp, u128_t mnt)
97 int sh;
98 if (!(mnt.x0 | mnt.x1))
99 return mnt;
100 if (!mnt.x1) {
101 mnt.x1 = mnt.x0;
102 mnt.x0 = 0;
103 *exp -= 64;
105 for (sh = 32; sh; sh >>= 1) {
106 if (!(mnt.x1 >> (64 - sh))) {
107 mnt.x1 = mnt.x1 << sh | mnt.x0 >> (64 - sh);
108 mnt.x0 = mnt.x0 << sh;
109 *exp -= sh;
112 return mnt;
115 static u128_t f3_sticky_shift(int32_t sh, u128_t x)
117 if (sh >= 128) {
118 x.x0 = !!(x.x0 | x.x1);
119 x.x1 = 0;
120 return x;
122 if (sh >= 64) {
123 x.x0 = x.x1 | !!x.x0;
124 x.x1 = 0;
125 sh -= 64;
127 if (sh > 0) {
128 x.x0 = x.x0 >> sh | x.x1 << (64 - sh) | !!(x.x0 << (64 - sh));
129 x.x1 = x.x1 >> sh;
131 return x;
134 static long double f3_round(int sgn, int32_t exp, u128_t x)
136 long double f;
137 int error;
139 if (exp > 0) {
140 x = f3_sticky_shift(13, x);
142 else {
143 x = f3_sticky_shift(14 - exp, x);
144 exp = 0;
147 error = x.x0 & 3;
148 x.x0 = x.x0 >> 2 | x.x1 << 62;
149 x.x1 = x.x1 >> 2;
151 if (error == 3 || ((error == 2) & (x.x0 & 1))) {
152 if (!++x.x0) {
153 ++x.x1;
154 if (x.x1 == (uint64_t)1 << 48)
155 exp = 1;
156 else if (x.x1 == (uint64_t)1 << 49) {
157 ++exp;
158 x.x0 = x.x0 >> 1 | x.x1 << 63;
159 x.x1 = x.x1 >> 1;
164 if (exp >= 32767)
165 return f3_infinity(sgn);
167 x.x1 = x.x1 << 16 >> 16 | (uint64_t)exp << 48 | (uint64_t)sgn << 63;
168 memcpy(&f, &x, 16);
169 return f;
172 static long double f3_add(long double fa, long double fb, int neg)
174 u128_t a, b, x;
175 int32_t a_exp, b_exp, x_exp;
176 int a_sgn, b_sgn, x_sgn;
177 long double fx;
179 f3_unpack(&a_sgn, &a_exp, &a, fa);
180 f3_unpack(&b_sgn, &b_exp, &b, fb);
182 if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
183 return fx;
185 b_sgn ^= neg;
187 // Handle infinities and zeroes:
188 if (a_exp == 32767 && b_exp == 32767 && a_sgn != b_sgn)
189 return f3_NaN();
190 if (a_exp == 32767)
191 return f3_infinity(a_sgn);
192 if (b_exp == 32767)
193 return f3_infinity(b_sgn);
194 if (!(a.x0 | a.x1 | b.x0 | b.x1))
195 return f3_zero(a_sgn & b_sgn);
197 a.x1 = a.x1 << 3 | a.x0 >> 61;
198 a.x0 = a.x0 << 3;
199 b.x1 = b.x1 << 3 | b.x0 >> 61;
200 b.x0 = b.x0 << 3;
202 if (a_exp <= b_exp) {
203 a = f3_sticky_shift(b_exp - a_exp, a);
204 a_exp = b_exp;
206 else {
207 b = f3_sticky_shift(a_exp - b_exp, b);
208 b_exp = a_exp;
211 x_sgn = a_sgn;
212 x_exp = a_exp;
213 if (a_sgn == b_sgn) {
214 x.x0 = a.x0 + b.x0;
215 x.x1 = a.x1 + b.x1 + (x.x0 < a.x0);
217 else {
218 x.x0 = a.x0 - b.x0;
219 x.x1 = a.x1 - b.x1 - (x.x0 > a.x0);
220 if (x.x1 >> 63) {
221 x_sgn ^= 1;
222 x.x0 = -x.x0;
223 x.x1 = -x.x1 - !!x.x0;
227 if (!(x.x0 | x.x1))
228 return f3_zero(0);
230 x = f3_normalise(&x_exp, x);
232 return f3_round(x_sgn, x_exp + 12, x);
235 long double __addtf3(long double a, long double b)
237 return f3_add(a, b, 0);
240 long double __subtf3(long double a, long double b)
242 return f3_add(a, b, 1);
245 long double __multf3(long double fa, long double fb)
247 u128_t a, b, x;
248 int32_t a_exp, b_exp, x_exp;
249 int a_sgn, b_sgn, x_sgn;
250 long double fx;
252 f3_unpack(&a_sgn, &a_exp, &a, fa);
253 f3_unpack(&b_sgn, &b_exp, &b, fb);
255 if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
256 return fx;
258 // Handle infinities and zeroes:
259 if ((a_exp == 32767 && !(b.x0 | b.x1)) ||
260 (b_exp == 32767 && !(a.x0 | a.x1)))
261 return f3_NaN();
262 if (a_exp == 32767 || b_exp == 32767)
263 return f3_infinity(a_sgn ^ b_sgn);
264 if (!(a.x0 | a.x1) || !(b.x0 | b.x1))
265 return f3_zero(a_sgn ^ b_sgn);
267 a = f3_normalise(&a_exp, a);
268 b = f3_normalise(&b_exp, b);
270 x_sgn = a_sgn ^ b_sgn;
271 x_exp = a_exp + b_exp - 16352;
274 // Convert to base (1 << 30), discarding bottom 6 bits, which are zero,
275 // so there are (32, 30, 30, 30) bits in (a3, a2, a1, a0):
276 uint64_t a0 = a.x0 << 28 >> 34;
277 uint64_t b0 = b.x0 << 28 >> 34;
278 uint64_t a1 = a.x0 >> 36 | a.x1 << 62 >> 34;
279 uint64_t b1 = b.x0 >> 36 | b.x1 << 62 >> 34;
280 uint64_t a2 = a.x1 << 32 >> 34;
281 uint64_t b2 = b.x1 << 32 >> 34;
282 uint64_t a3 = a.x1 >> 32;
283 uint64_t b3 = b.x1 >> 32;
284 // Use 16 small multiplications and additions that do not overflow:
285 uint64_t x0 = a0 * b0;
286 uint64_t x1 = (x0 >> 30) + a0 * b1 + a1 * b0;
287 uint64_t x2 = (x1 >> 30) + a0 * b2 + a1 * b1 + a2 * b0;
288 uint64_t x3 = (x2 >> 30) + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
289 uint64_t x4 = (x3 >> 30) + a1 * b3 + a2 * b2 + a3 * b1;
290 uint64_t x5 = (x4 >> 30) + a2 * b3 + a3 * b2;
291 uint64_t x6 = (x5 >> 30) + a3 * b3;
292 // We now have (64, 30, 30, ...) bits in (x6, x5, x4, ...).
293 // Take the top 128 bits, setting bottom bit if any lower bits were set:
294 uint64_t y0 = (x5 << 34 | x4 << 34 >> 30 | x3 << 34 >> 60 |
295 !!(x3 << 38 | (x2 | x1 | x0) << 34));
296 uint64_t y1 = x6;
297 // Top bit may be zero. Renormalise:
298 if (!(y1 >> 63)) {
299 y1 = y1 << 1 | y0 >> 63;
300 y0 = y0 << 1;
301 --x_exp;
303 x.x0 = y0;
304 x.x1 = y1;
307 return f3_round(x_sgn, x_exp, x);
310 long double __divtf3(long double fa, long double fb)
312 u128_t a, b, x;
313 int32_t a_exp, b_exp, x_exp;
314 int a_sgn, b_sgn, x_sgn, i;
315 long double fx;
317 f3_unpack(&a_sgn, &a_exp, &a, fa);
318 f3_unpack(&b_sgn, &b_exp, &b, fb);
320 if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
321 return fx;
323 // Handle infinities and zeroes:
324 if ((a_exp == 32767 && b_exp == 32767) ||
325 (!(a.x0 | a.x1) && !(b.x0 | b.x1)))
326 return f3_NaN();
327 if (a_exp == 32767 || !(b.x0 | b.x1))
328 return f3_infinity(a_sgn ^ b_sgn);
329 if (!(a.x0 | a.x1) || b_exp == 32767)
330 return f3_zero(a_sgn ^ b_sgn);
332 a = f3_normalise(&a_exp, a);
333 b = f3_normalise(&b_exp, b);
335 x_sgn = a_sgn ^ b_sgn;
336 x_exp = a_exp - b_exp + 16395;
338 a.x0 = a.x0 >> 1 | a.x1 << 63;
339 a.x1 = a.x1 >> 1;
340 b.x0 = b.x0 >> 1 | b.x1 << 63;
341 b.x1 = b.x1 >> 1;
342 x.x0 = 0;
343 x.x1 = 0;
344 for (i = 0; i < 116; i++) {
345 x.x1 = x.x1 << 1 | x.x0 >> 63;
346 x.x0 = x.x0 << 1;
347 if (a.x1 > b.x1 || (a.x1 == b.x1 && a.x0 >= b.x0)) {
348 a.x1 = a.x1 - b.x1 - (a.x0 < b.x0);
349 a.x0 = a.x0 - b.x0;
350 x.x0 |= 1;
352 a.x1 = a.x1 << 1 | a.x0 >> 63;
353 a.x0 = a.x0 << 1;
355 x.x0 |= !!(a.x0 | a.x1);
357 x = f3_normalise(&x_exp, x);
359 return f3_round(x_sgn, x_exp, x);
362 long double __extendsftf2(float f)
364 long double fx;
365 u128_t x;
366 uint32_t a;
367 uint64_t aa;
368 memcpy(&a, &f, 4);
369 aa = a;
370 x.x0 = 0;
371 if (!(a << 1))
372 x.x1 = aa << 32;
373 else if (a << 1 >> 24 == 255)
374 x.x1 = (0x7fff000000000000 | aa >> 31 << 63 | aa << 41 >> 16 |
375 (uint64_t)!!(a << 9) << 47);
376 else
377 x.x1 = (aa >> 31 << 63 | ((aa >> 23 & 255) + 16256) << 48 |
378 aa << 41 >> 16);
379 memcpy(&fx, &x, 16);
380 return fx;
383 long double __extenddftf2(double f)
385 long double fx;
386 u128_t x;
387 uint64_t a;
388 memcpy(&a, &f, 8);
389 x.x0 = a << 60;
390 if (!(a << 1))
391 x.x1 = a;
392 else if (a << 1 >> 53 == 2047)
393 x.x1 = (0x7fff000000000000 | a >> 63 << 63 | a << 12 >> 16 |
394 (uint64_t)!!(a << 12) << 47);
395 else
396 x.x1 = a >> 63 << 63 | ((a >> 52 & 2047) + 15360) << 48 | a << 12 >> 16;
397 memcpy(&fx, &x, 16);
398 return fx;
401 float __trunctfsf2(long double f)
403 u128_t mnt;
404 int32_t exp;
405 int sgn;
406 uint32_t x;
407 float fx;
409 f3_unpack(&sgn, &exp, &mnt, f);
411 if (exp == 32767 && (mnt.x0 | mnt.x1 << 16))
412 x = 0x7fc00000 | (uint32_t)sgn << 31 | (mnt.x1 >> 25 & 0x007fffff);
413 else if (exp > 16510)
414 x = 0x7f800000 | (uint32_t)sgn << 31;
415 else if (exp < 16233)
416 x = (uint32_t)sgn << 31;
417 else {
418 exp -= 16257;
419 x = mnt.x1 >> 23 | !!(mnt.x0 | mnt.x1 << 41);
420 if (exp < 0) {
421 x = x >> -exp | !!(x << (32 + exp));
422 exp = 0;
424 if ((x & 3) == 3 || (x & 7) == 6)
425 x += 4;
426 x = ((x >> 2) + (exp << 23)) | (uint32_t)sgn << 31;
428 memcpy(&fx, &x, 4);
429 return fx;
432 double __trunctfdf2(long double f)
434 u128_t mnt;
435 int32_t exp;
436 int sgn;
437 uint64_t x;
438 double fx;
440 f3_unpack(&sgn, &exp, &mnt, f);
442 if (exp == 32767 && (mnt.x0 | mnt.x1 << 16))
443 x = (0x7ff8000000000000 | (uint64_t)sgn << 63 |
444 mnt.x1 << 16 >> 12 | mnt.x0 >> 60);
445 else if (exp > 17406)
446 x = 0x7ff0000000000000 | (uint64_t)sgn << 63;
447 else if (exp < 15308)
448 x = (uint64_t)sgn << 63;
449 else {
450 exp -= 15361;
451 x = mnt.x1 << 6 | mnt.x0 >> 58 | !!(mnt.x0 << 6);
452 if (exp < 0) {
453 x = x >> -exp | !!(x << (64 + exp));
454 exp = 0;
456 if ((x & 3) == 3 || (x & 7) == 6)
457 x += 4;
458 x = ((x >> 2) + ((uint64_t)exp << 52)) | (uint64_t)sgn << 63;
460 memcpy(&fx, &x, 8);
461 return fx;
464 int32_t __fixtfsi(long double fa)
466 u128_t a;
467 int32_t a_exp;
468 int a_sgn;
469 int32_t x;
470 f3_unpack(&a_sgn, &a_exp, &a, fa);
471 if (a_exp < 16369)
472 return 0;
473 if (a_exp > 16413)
474 return a_sgn ? -0x80000000 : 0x7fffffff;
475 x = a.x1 >> (16431 - a_exp);
476 return a_sgn ? -x : x;
479 int64_t __fixtfdi(long double fa)
481 u128_t a;
482 int32_t a_exp;
483 int a_sgn;
484 int64_t x;
485 f3_unpack(&a_sgn, &a_exp, &a, fa);
486 if (a_exp < 16383)
487 return 0;
488 if (a_exp > 16445)
489 return a_sgn ? -0x8000000000000000 : 0x7fffffffffffffff;
490 x = (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp);
491 return a_sgn ? -x : x;
494 uint32_t __fixunstfsi(long double fa)
496 u128_t a;
497 int32_t a_exp;
498 int a_sgn;
499 f3_unpack(&a_sgn, &a_exp, &a, fa);
500 if (a_sgn || a_exp < 16369)
501 return 0;
502 if (a_exp > 16414)
503 return -1;
504 return a.x1 >> (16431 - a_exp);
507 uint64_t __fixunstfdi(long double fa)
509 u128_t a;
510 int32_t a_exp;
511 int a_sgn;
512 f3_unpack(&a_sgn, &a_exp, &a, fa);
513 if (a_sgn || a_exp < 16383)
514 return 0;
515 if (a_exp > 16446)
516 return -1;
517 return (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp);
520 long double __floatsitf(int32_t a)
522 int sgn = 0;
523 int exp = 16414;
524 uint32_t mnt = a;
525 u128_t x = { 0, 0 };
526 long double f;
527 int i;
528 if (a) {
529 if (a < 0) {
530 sgn = 1;
531 mnt = -mnt;
533 for (i = 16; i; i >>= 1)
534 if (!(mnt >> (32 - i))) {
535 mnt <<= i;
536 exp -= i;
538 x.x1 = ((uint64_t)sgn << 63 | (uint64_t)exp << 48 |
539 (uint64_t)(mnt << 1) << 16);
541 memcpy(&f, &x, 16);
542 return f;
545 long double __floatditf(int64_t a)
547 int sgn = 0;
548 int exp = 16446;
549 uint64_t mnt = a;
550 u128_t x = { 0, 0 };
551 long double f;
552 int i;
553 if (a) {
554 if (a < 0) {
555 sgn = 1;
556 mnt = -mnt;
558 for (i = 32; i; i >>= 1)
559 if (!(mnt >> (64 - i))) {
560 mnt <<= i;
561 exp -= i;
563 x.x0 = mnt << 49;
564 x.x1 = (uint64_t)sgn << 63 | (uint64_t)exp << 48 | mnt << 1 >> 16;
566 memcpy(&f, &x, 16);
567 return f;
570 long double __floatunsitf(uint32_t a)
572 int exp = 16414;
573 uint32_t mnt = a;
574 u128_t x = { 0, 0 };
575 long double f;
576 int i;
577 if (a) {
578 for (i = 16; i; i >>= 1)
579 if (!(mnt >> (32 - i))) {
580 mnt <<= i;
581 exp -= i;
583 x.x1 = (uint64_t)exp << 48 | (uint64_t)(mnt << 1) << 16;
585 memcpy(&f, &x, 16);
586 return f;
589 long double __floatunditf(uint64_t a)
591 int exp = 16446;
592 uint64_t mnt = a;
593 u128_t x = { 0, 0 };
594 long double f;
595 int i;
596 if (a) {
597 for (i = 32; i; i >>= 1)
598 if (!(mnt >> (64 - i))) {
599 mnt <<= i;
600 exp -= i;
602 x.x0 = mnt << 49;
603 x.x1 = (uint64_t)exp << 48 | mnt << 1 >> 16;
605 memcpy(&f, &x, 16);
606 return f;
609 static int f3_cmp(long double fa, long double fb)
611 u128_t a, b;
612 memcpy(&a, &fa, 16);
613 memcpy(&b, &fb, 16);
614 return (!(a.x0 | a.x1 << 1 | b.x0 | b.x1 << 1) ? 0 :
615 ((a.x1 << 1 >> 49 == 0x7fff && (a.x0 | a.x1 << 16)) ||
616 (b.x1 << 1 >> 49 == 0x7fff && (b.x0 | b.x1 << 16))) ? 2 :
617 a.x1 >> 63 != b.x1 >> 63 ? (int)(b.x1 >> 63) - (int)(a.x1 >> 63) :
618 a.x1 < b.x1 ? (int)(a.x1 >> 63 << 1) - 1 :
619 a.x1 > b.x1 ? 1 - (int)(a.x1 >> 63 << 1) :
620 a.x0 < b.x0 ? (int)(a.x1 >> 63 << 1) - 1 :
621 b.x0 < a.x0 ? 1 - (int)(a.x1 >> 63 << 1) : 0);
624 int __eqtf2(long double a, long double b)
626 return !!f3_cmp(a, b);
629 int __netf2(long double a, long double b)
631 return !!f3_cmp(a, b);
634 int __lttf2(long double a, long double b)
636 return f3_cmp(a, b);
639 int __letf2(long double a, long double b)
641 return f3_cmp(a, b);
644 int __gttf2(long double a, long double b)
646 return -f3_cmp(b, a);
649 int __getf2(long double a, long double b)
651 return -f3_cmp(b, a);