2 * Test 128-bit floating-point arithmetic on arm64:
3 * build with two different compilers and compare the output.
5 * Copyright (c) 2015 Edmund Grimley Evans
7 * Copying and distribution of this file, with or without modification,
8 * are permitted in any medium without royalty provided the copyright
9 * notice and this notice are preserved. This file is offered as-is,
10 * without any warranty.
18 #define check(x) ((x) ? (void)0 : check_fail(#x, __FILE__, __LINE__))
20 void check_fail(const char *assertion
, const char *file
, unsigned int line
)
22 printf("%s:%d: Check (%s) failed.", file
, line
, assertion
);
27 unsigned long long x0
, x1
;
30 float copy_fi(uint32_t x
)
37 double copy_di(uint64_t x
)
44 long double copy_ldi(u128_t x
)
51 uint32_t copy_if(float f
)
58 uint64_t copy_id(double f
)
65 u128_t
copy_ild(long double f
)
72 long double make(int sgn
, int exp
, uint64_t high
, uint64_t low
)
75 (0x0000ffffffffffff & high
) |
76 (0x7fff000000000000 & (uint64_t)exp
<< 48) |
77 (0x8000000000000000 & (uint64_t)sgn
<< 63) };
81 void cmp(long double a
, long double b
)
83 u128_t ax
= copy_ild(a
);
84 u128_t bx
= copy_ild(b
);
92 check(eq
== 0 || eq
== 1);
93 check(lt
== 0 || lt
== 1);
94 check(gt
== 0 || gt
== 1);
95 check(ne
== !eq
&& le
== (lt
| eq
) && ge
== (gt
| eq
));
96 check(eq
+ lt
+ gt
< 2);
98 printf("cmp %016llx%016llx %016llx%016llx %d %d %d\n",
99 ax
.x1
, ax
.x0
, bx
.x1
, bx
.x0
, lt
, eq
, gt
);
106 for (i
= 0; i
< 2; i
++)
107 for (j
= 0; j
< 2; j
++)
108 cmp(make(i
, 0, 0, 0), make(j
, 0, 0, 0));
110 for (i
= 0; i
< 2; i
++) {
111 for (j
= 0; j
< 64; j
++) {
112 long double f1
= make(i
, 32767, (uint64_t)1 << j
, 0);
113 long double f2
= make(i
, 32767, 0, (uint64_t)1 << j
);
121 for (i
= 0; i
< 6; i
++)
122 for (j
= 0; j
< 6; j
++)
123 cmp(make(i
& 1, i
>> 1, 0, 0),
124 make(j
& 1, j
>> 1, 0, 0));
126 for (i
= 0; i
< 2; i
++) {
127 for (j
= 0; j
< 2; j
++) {
129 for (a
= 0; a
< 2; a
++) {
130 for (b
= 0; b
< 2; b
++) {
131 cmp(make(i
, j
, a
, b
), make(i
, j
, 0, 0));
132 cmp(make(i
, j
, 0, 0), make(i
, j
, a
, b
));
139 void xop(const char *name
, long double a
, long double b
, long double c
)
141 u128_t ax
= copy_ild(a
);
142 u128_t bx
= copy_ild(b
);
143 u128_t cx
= copy_ild(c
);
144 printf("%s %016llx%016llx %016llx%016llx %016llx%016llx\n",
145 name
, ax
.x1
, ax
.x0
, bx
.x1
, bx
.x0
, cx
.x1
, cx
.x0
);
148 void fadd(long double a
, long double b
)
150 xop("add", a
, b
, a
+ b
);
153 void fsub(long double a
, long double b
)
155 xop("sub", a
, b
, a
- b
);
158 void fmul(long double a
, long double b
)
160 xop("mul", a
, b
, a
* b
);
163 void fdiv(long double a
, long double b
)
165 xop("div", a
, b
, a
/ b
);
174 x
[n
++] = make(0, 32000, 0x95132b76effc, 0xd79035214b4f8d53);
175 x
[n
++] = make(1, 32001, 0xbe71d7a51587, 0x30601c6815d6c3ac);
176 x
[n
++] = make(0, 32767, 0, 1);
177 x
[n
++] = make(0, 32767, (uint64_t)1 << 46, 0);
178 x
[n
++] = make(1, 32767, (uint64_t)1 << 47, 0);
179 x
[n
++] = make(1, 32767, 0x7596c7099ad5, 0xe25fed2c58f73fc9);
180 x
[n
++] = make(0, 32767, 0x835d143360f9, 0x5e315efb35630666);
181 check(n
== sizeof(x
) / sizeof(*x
));
182 for (i
= 0; i
< n
; i
++) {
183 for (j
= 0; j
< n
; j
++) {
192 // Check infinities and zeroes:
196 x
[n
++] = make(1, 32000, 0x62acda85f700, 0x47b6c9f35edc4044);
197 x
[n
++] = make(0, 32001, 0x94b7abf55af7, 0x9f425fe354428e19);
198 x
[n
++] = make(0, 32767, 0, 0);
199 x
[n
++] = make(1, 32767, 0, 0);
200 x
[n
++] = make(0, 0, 0, 0);
201 x
[n
++] = make(1, 0, 0, 0);
202 check(n
== sizeof(x
) / sizeof(*x
));
203 for (i
= 0; i
< n
; i
++) {
204 for (j
= 0; j
< n
; j
++) {
216 // Check shifting and add/sub:
219 for (i
= -130; i
<= 130; i
++) {
220 int s1
= (uint32_t)i
% 3 < 1;
221 int s2
= (uint32_t)i
% 5 < 2;
222 fadd(make(s1
, 16384 , 0x502c065e4f71a65d, 0xd2f9bdb031f4f031),
223 make(s2
, 16384 + i
, 0xae267395a9bc1033, 0xb56b5800da1ba448));
227 // Check normalisation:
229 uint64_t a0
= 0xc6bab0a6afbef5ed;
230 uint64_t a1
= 0x4f84136c4a2e9b52;
231 int ee
[] = { 0, 1, 10000 };
233 for (e
= 0; e
< sizeof(ee
) / sizeof(*ee
); e
++) {
235 fsub(make(0, exp
, a1
, a0
), make(0, 0, 0, 0));
236 for (i
= 63; i
>= 0; i
--)
237 fsub(make(0, exp
, a1
| (uint64_t)1 << i
>> 1, a0
),
238 make(0, exp
, a1
>> i
<< i
, 0));
239 for (i
= 63; i
>=0; i
--)
240 fsub(make(0, exp
, a1
, a0
| (uint64_t)1 << i
>> 1),
241 make(0, exp
, a1
, a0
>> i
<< i
));
245 // Carry/overflow from rounding:
247 fadd(make(0, 114, -1, -1), make(0, 1, 0, 0));
248 fadd(make(0, 32766, -1, -1), make(0, 32653, 0, 0));
249 fsub(make(1, 32766, -1, -1), make(0, 32653, 0, 0));
258 long double max
= make(0, 32766, -1, -1);
259 long double min
= make(0, 0, 0, 1);
265 for (i
= 117; i
> 0; i
--)
266 fmul(make(0, 16268, 0x643dcea76edc, 0xe0877a598403627a),
267 make(i
& 1, i
, 0, 0));
269 fmul(make(0, 16383, -1, -3), make(0, 16383, 0, 1));
270 // Round to next exponent:
271 fmul(make(0, 16383, -1, -2), make(0, 16383, 0, 1));
272 // Round from subnormal to normal:
273 fmul(make(0, 1, -1, -1), make(0, 16382, 0, 0));
275 for (i
= 0; i
< 2; i
++)
276 for (j
= 0; j
< 112; j
++)
277 fmul(make(0, 16383, (uint64_t)1 << i
, 0),
279 j
< 64 ? 0 : (uint64_t)1 << (j
- 64),
280 j
< 64 ? (uint64_t)1 << j
: 0));
288 long double max
= make(0, 32766, -1, -1);
289 long double min
= make(0, 0, 0, 1);
296 for (i
= 0; i
< 64; i
++)
297 fdiv(make(0, 16383, -1, -1), make(0, 16383, -1, -(uint64_t)1 << i
));
298 for (i
= 0; i
< 48; i
++)
299 fdiv(make(0, 16383, -1, -1), make(0, 16383, -(uint64_t)1 << i
, 0));
302 void cvtlsw(int32_t a
)
305 u128_t x
= copy_ild(f
);
306 printf("cvtlsw %08lx %016llx%016llx\n", (long)(uint32_t)a
, x
.x1
, x
.x0
);
309 void cvtlsx(int64_t a
)
312 u128_t x
= copy_ild(f
);
313 printf("cvtlsx %016llx %016llx%016llx\n",
314 (long long)(uint64_t)a
, x
.x1
, x
.x0
);
317 void cvtluw(uint32_t a
)
320 u128_t x
= copy_ild(f
);
321 printf("cvtluw %08lx %016llx%016llx\n", (long)a
, x
.x1
, x
.x0
);
324 void cvtlux(uint64_t a
)
327 u128_t x
= copy_ild(f
);
328 printf("cvtlux %016llx %016llx%016llx\n", (long long)a
, x
.x1
, x
.x0
);
331 void cvtil(long double a
)
333 u128_t x
= copy_ild(a
);
338 printf("cvtswl %016llx%016llx %08lx\n",
339 x
.x1
, x
.x0
, (long)(uint32_t)b1
);
340 printf("cvtsxl %016llx%016llx %016llx\n",
341 x
.x1
, x
.x0
, (long long)(uint64_t)b2
);
342 printf("cvtuwl %016llx%016llx %08lx\n",
343 x
.x1
, x
.x0
, (long)b3
);
344 printf("cvtuxl %016llx%016llx %016llx\n",
345 x
.x1
, x
.x0
, (long long)b4
);
350 uint32_t ax
= copy_if(a
);
352 u128_t bx
= copy_ild(b
);
353 printf("cvtlf %08lx %016llx%016llx\n", (long)ax
, bx
.x1
, bx
.x0
);
358 uint64_t ax
= copy_id(a
);
360 u128_t bx
= copy_ild(b
);
361 printf("cvtld %016llx %016llx%016llx\n", (long long)ax
, bx
.x1
, bx
.x0
);
364 void cvtfl(long double a
)
366 u128_t ax
= copy_ild(a
);
368 uint32_t bx
= copy_if(b
);
369 printf("cvtfl %016llx%016llx %08lx\n", ax
.x1
, ax
.x0
, (long)bx
);
372 void cvtdl(long double a
)
374 u128_t ax
= copy_ild(a
);
376 uint64_t bx
= copy_id(b
);
377 printf("cvtdl %016llx%016llx %016llx\n", ax
.x1
, ax
.x0
, (long long)bx
);
385 uint32_t x
= 0xad040c5b;
387 for (i
= 0; i
< 31; i
++)
388 cvtlsw(x
>> (31 - i
));
389 for (i
= 0; i
< 31; i
++)
390 cvtlsw(-(x
>> (31 - i
)));
394 uint64_t x
= 0xb630a248cad9afd2;
396 for (i
= 0; i
< 63; i
++)
397 cvtlsx(x
>> (63 - i
));
398 for (i
= 0; i
< 63; i
++)
399 cvtlsx(-(x
>> (63 - i
)));
400 cvtlsx(0x8000000000000000);
403 uint32_t x
= 0xad040c5b;
405 for (i
= 0; i
< 32; i
++)
406 cvtluw(x
>> (31 - i
));
409 uint64_t x
= 0xb630a248cad9afd2;
411 for (i
= 0; i
< 64; i
++)
412 cvtlux(x
>> (63 - i
));
415 for (i
= 0; i
< 2; i
++) {
416 cvtil(make(i
, 32767, 0, 1));
417 cvtil(make(i
, 32767, (uint64_t)1 << 47, 0));
418 cvtil(make(i
, 32767, 123, 456));
419 cvtil(make(i
, 32767, 0, 0));
420 cvtil(make(i
, 16382, -1, -1));
421 cvtil(make(i
, 16383, -1, -1));
422 cvtil(make(i
, 16384, 0x7fffffffffff, -1));
423 cvtil(make(i
, 16384, 0x800000000000, 0));
424 for (j
= 0; j
< 68; j
++)
425 cvtil(make(i
, 16381 + j
, 0xd4822c0a10ec, 0x1fe2f8b2669f5c9d));
428 cvtlf(copy_fi(0x00000000));
429 cvtlf(copy_fi(0x456789ab));
430 cvtlf(copy_fi(0x7f800000));
431 cvtlf(copy_fi(0x7f923456));
432 cvtlf(copy_fi(0x7fdbcdef));
433 cvtlf(copy_fi(0x80000000));
434 cvtlf(copy_fi(0xabcdef12));
435 cvtlf(copy_fi(0xff800000));
436 cvtlf(copy_fi(0xff923456));
437 cvtlf(copy_fi(0xffdbcdef));
439 cvtld(copy_di(0x0000000000000000));
440 cvtld(copy_di(0x456789abcdef0123));
441 cvtld(copy_di(0x7ff0000000000000));
442 cvtld(copy_di(0x7ff123456789abcd));
443 cvtld(copy_di(0x7ffabcdef1234567));
444 cvtld(copy_di(0x8000000000000000));
445 cvtld(copy_di(0xcdef123456789abc));
446 cvtld(copy_di(0xfff0000000000000));
447 cvtld(copy_di(0xfff123456789abcd));
448 cvtld(copy_di(0xfffabcdef1234567));
450 for (i
= 0; i
< 2; i
++) { \
451 cvtfl(make(i
, 0, 0, 0));
452 cvtfl(make(i
, 16232, -1, -1));
453 cvtfl(make(i
, 16233, 0, 0));
454 cvtfl(make(i
, 16233, 0, 1));
455 cvtfl(make(i
, 16383, 0xab0ffd000000, 0));
456 cvtfl(make(i
, 16383, 0xab0ffd000001, 0));
457 cvtfl(make(i
, 16383, 0xab0ffeffffff, 0));
458 cvtfl(make(i
, 16383, 0xab0fff000000, 0));
459 cvtfl(make(i
, 16383, 0xab0fff000001, 0));
460 cvtfl(make(i
, 16510, 0xfffffeffffff, -1));
461 cvtfl(make(i
, 16510, 0xffffff000000, 0));
462 cvtfl(make(i
, 16511, 0, 0));
463 cvtfl(make(i
, 32767, 0, 0));
464 cvtfl(make(i
, 32767, 0, 1));
465 cvtfl(make(i
, 32767, 0x4cbe01ac5f40, 0x75cee3c6afbb00b5));
466 cvtfl(make(i
, 32767, 0x800000000000, 1));
467 cvtfl(make(i
, 32767, 0xa11caaaf6a52, 0x696033e871eab099));
470 for (i
= 0; i
< 2; i
++) {
471 cvtdl(make(i
, 0, 0, 0));
472 cvtdl(make(i
, 15307, -1, -1));
473 cvtdl(make(i
, 15308, 0, 0));
474 cvtdl(make(i
, 15308, 0, 1));
475 cvtdl(make(i
, 16383, 0xabc123abc0ff, 0xe800000000000000));
476 cvtdl(make(i
, 16383, 0xabc123abc0ff, 0xe800000000000001));
477 cvtdl(make(i
, 16383, 0xabc123abc0ff, 0xf7ffffffffffffff));
478 cvtdl(make(i
, 16383, 0xabc123abc0ff, 0xf800000000000000));
479 cvtdl(make(i
, 16383, 0xabc123abc0ff, 0xf800000000000001));
480 cvtdl(make(i
, 17406, 0xffffffffffff, 0xf7ffffffffffffff));
481 cvtdl(make(i
, 17406, 0xffffffffffff, 0xf800000000000000));
482 cvtdl(make(i
, 17407, 0, 0));
483 cvtdl(make(i
, 32767, 0, 0));
484 cvtdl(make(i
, 32767, 0, 1));
485 cvtdl(make(i
, 32767, 0x4cbe01ac5f40, 0x75cee3c6afbb00b5));
486 cvtdl(make(i
, 32767, 0x800000000000, 1));
487 cvtdl(make(i
, 32767, 0xa11caaaf6a52, 0x696033e871eab099));
506 printf("This test program is intended for a little-endian architecture\n"
507 "with an IEEE-standard 128-bit long double.\n");