Remove use of mpa2.h
[glibc.git] / sysdeps / powerpc / powerpc64 / power4 / fpu / mpa.c
blobb1784f27c35511939343eaf5041f23aad950bc7c
2 /*
3 * IBM Accurate Mathematical Library
4 * written by International Business Machines Corp.
5 * Copyright (C) 2001-2013 Free Software Foundation, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
20 /************************************************************************/
21 /* MODULE_NAME: mpa.c */
22 /* */
23 /* FUNCTIONS: */
24 /* mcr */
25 /* acr */
26 /* cpy */
27 /* norm */
28 /* denorm */
29 /* mp_dbl */
30 /* dbl_mp */
31 /* add_magnitudes */
32 /* sub_magnitudes */
33 /* add */
34 /* sub */
35 /* mul */
36 /* inv */
37 /* dvd */
38 /* */
39 /* Arithmetic functions for multiple precision numbers. */
40 /* Relative errors are bounded */
41 /************************************************************************/
44 #include "endian.h"
45 #include "mpa.h"
46 #include <sys/param.h>
48 const mp_no mpone = {1, {1.0, 1.0}};
49 const mp_no mptwo = {1, {1.0, 2.0}};
51 /* Compare mantissa of two multiple precision numbers regardless of the sign
52 and exponent of the numbers. */
53 static int
54 mcr (const mp_no *x, const mp_no *y, int p)
56 long i;
57 long p2 = p;
58 for (i = 1; i <= p2; i++)
60 if (X[i] == Y[i])
61 continue;
62 else if (X[i] > Y[i])
63 return 1;
64 else
65 return -1;
67 return 0;
70 /* Compare the absolute values of two multiple precision numbers. */
71 int
72 __acr (const mp_no *x, const mp_no *y, int p)
74 long i;
76 if (X[0] == ZERO)
78 if (Y[0] == ZERO)
79 i = 0;
80 else
81 i = -1;
83 else if (Y[0] == ZERO)
84 i = 1;
85 else
87 if (EX > EY)
88 i = 1;
89 else if (EX < EY)
90 i = -1;
91 else
92 i = mcr (x, y, p);
95 return i;
98 /* Copy multiple precision number X into Y. They could be the same
99 number. */
100 void
101 __cpy (const mp_no *x, mp_no *y, int p)
103 long i;
105 EY = EX;
106 for (i = 0; i <= p; i++)
107 Y[i] = X[i];
109 return;
112 /* Convert a multiple precision number *X into a double precision
113 number *Y, normalized case (|x| >= 2**(-1022))). */
114 static void
115 norm (const mp_no *x, double *y, int p)
117 #define R RADIXI
118 long i;
119 double a, c, u, v, z[5];
120 if (p < 5)
122 if (p == 1)
123 c = X[1];
124 else if (p == 2)
125 c = X[1] + R * X[2];
126 else if (p == 3)
127 c = X[1] + R * (X[2] + R * X[3]);
128 else if (p == 4)
129 c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
131 else
133 for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
135 a *= TWO;
136 z[1] *= TWO;
139 for (i = 2; i < 5; i++)
141 z[i] = X[i] * a;
142 u = (z[i] + CUTTER) - CUTTER;
143 if (u > z[i])
144 u -= RADIX;
145 z[i] -= u;
146 z[i - 1] += u * RADIXI;
149 u = (z[3] + TWO71) - TWO71;
150 if (u > z[3])
151 u -= TWO19;
152 v = z[3] - u;
154 if (v == TWO18)
156 if (z[4] == ZERO)
158 for (i = 5; i <= p; i++)
160 if (X[i] == ZERO)
161 continue;
162 else
164 z[3] += ONE;
165 break;
169 else
170 z[3] += ONE;
173 c = (z[1] + R * (z[2] + R * z[3])) / a;
176 c *= X[0];
178 for (i = 1; i < EX; i++)
179 c *= RADIX;
180 for (i = 1; i > EX; i--)
181 c *= RADIXI;
183 *y = c;
184 return;
185 #undef R
188 /* Convert a multiple precision number *X into a double precision
189 number *Y, Denormal case (|x| < 2**(-1022))). */
190 static void
191 denorm (const mp_no *x, double *y, int p)
193 long i, k;
194 long p2 = p;
195 double c, u, z[5];
197 #define R RADIXI
198 if (EX < -44 || (EX == -44 && X[1] < TWO5))
200 *y = ZERO;
201 return;
204 if (p2 == 1)
206 if (EX == -42)
208 z[1] = X[1] + TWO10;
209 z[2] = ZERO;
210 z[3] = ZERO;
211 k = 3;
213 else if (EX == -43)
215 z[1] = TWO10;
216 z[2] = X[1];
217 z[3] = ZERO;
218 k = 2;
220 else
222 z[1] = TWO10;
223 z[2] = ZERO;
224 z[3] = X[1];
225 k = 1;
228 else if (p2 == 2)
230 if (EX == -42)
232 z[1] = X[1] + TWO10;
233 z[2] = X[2];
234 z[3] = ZERO;
235 k = 3;
237 else if (EX == -43)
239 z[1] = TWO10;
240 z[2] = X[1];
241 z[3] = X[2];
242 k = 2;
244 else
246 z[1] = TWO10;
247 z[2] = ZERO;
248 z[3] = X[1];
249 k = 1;
252 else
254 if (EX == -42)
256 z[1] = X[1] + TWO10;
257 z[2] = X[2];
258 k = 3;
260 else if (EX == -43)
262 z[1] = TWO10;
263 z[2] = X[1];
264 k = 2;
266 else
268 z[1] = TWO10;
269 z[2] = ZERO;
270 k = 1;
272 z[3] = X[k];
275 u = (z[3] + TWO57) - TWO57;
276 if (u > z[3])
277 u -= TWO5;
279 if (u == z[3])
281 for (i = k + 1; i <= p2; i++)
283 if (X[i] == ZERO)
284 continue;
285 else
287 z[3] += ONE;
288 break;
293 c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
295 *y = c * TWOM1032;
296 return;
298 #undef R
301 /* Convert multiple precision number *X into double precision number *Y. The
302 result is correctly rounded to the nearest/even. */
303 void
304 __mp_dbl (const mp_no *x, double *y, int p)
306 if (X[0] == ZERO)
308 *y = ZERO;
309 return;
312 if (EX > -42)
313 norm (x, y, p);
314 else if (EX == -42 && X[1] >= TWO10)
315 norm (x, y, p);
316 else
317 denorm (x, y, p);
320 /* Get the multiple precision equivalent of X into *Y. If the precision is too
321 small, the result is truncated. */
322 void
323 __dbl_mp (double x, mp_no *y, int p)
325 long i, n;
326 long p2 = p;
327 double u;
329 /* Sign. */
330 if (x == ZERO)
332 Y[0] = ZERO;
333 return;
335 else if (x > ZERO)
336 Y[0] = ONE;
337 else
339 Y[0] = MONE;
340 x = -x;
343 /* Exponent. */
344 for (EY = ONE; x >= RADIX; EY += ONE)
345 x *= RADIXI;
346 for (; x < ONE; EY -= ONE)
347 x *= RADIX;
349 /* Digits. */
350 n = MIN (p2, 4);
351 for (i = 1; i <= n; i++)
353 u = (x + TWO52) - TWO52;
354 if (u > x)
355 u -= ONE;
356 Y[i] = u;
357 x -= u;
358 x *= RADIX;
360 for (; i <= p2; i++)
361 Y[i] = ZERO;
362 return;
365 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
366 sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
367 Y and Z. No guard digit is used. The result equals the exact sum,
368 truncated. */
369 static void
370 add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
372 long i, j, k;
373 long p2 = p;
375 EZ = EX;
377 i = p2;
378 j = p2 + EY - EX;
379 k = p2 + 1;
381 if (j < 1)
383 __cpy (x, z, p);
384 return;
386 else
387 Z[k] = ZERO;
389 for (; j > 0; i--, j--)
391 Z[k] += X[i] + Y[j];
392 if (Z[k] >= RADIX)
394 Z[k] -= RADIX;
395 Z[--k] = ONE;
397 else
398 Z[--k] = ZERO;
401 for (; i > 0; i--)
403 Z[k] += X[i];
404 if (Z[k] >= RADIX)
406 Z[k] -= RADIX;
407 Z[--k] = ONE;
409 else
410 Z[--k] = ZERO;
413 if (Z[1] == ZERO)
415 for (i = 1; i <= p2; i++)
416 Z[i] = Z[i + 1];
418 else
419 EZ += ONE;
422 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
423 The sign of the difference *Z is not changed. X and Y may overlap but not X
424 and Z or Y and Z. One guard digit is used. The error is less than one
425 ULP. */
426 static void
427 sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
429 long i, j, k;
430 long p2 = p;
432 EZ = EX;
434 if (EX == EY)
436 i = j = k = p2;
437 Z[k] = Z[k + 1] = ZERO;
439 else
441 j = EX - EY;
442 if (j > p2)
444 __cpy (x, z, p);
445 return;
447 else
449 i = p2;
450 j = p2 + 1 - j;
451 k = p2;
452 if (Y[j] > ZERO)
454 Z[k + 1] = RADIX - Y[j--];
455 Z[k] = MONE;
457 else
459 Z[k + 1] = ZERO;
460 Z[k] = ZERO;
461 j--;
466 for (; j > 0; i--, j--)
468 Z[k] += (X[i] - Y[j]);
469 if (Z[k] < ZERO)
471 Z[k] += RADIX;
472 Z[--k] = MONE;
474 else
475 Z[--k] = ZERO;
478 for (; i > 0; i--)
480 Z[k] += X[i];
481 if (Z[k] < ZERO)
483 Z[k] += RADIX;
484 Z[--k] = MONE;
486 else
487 Z[--k] = ZERO;
490 for (i = 1; Z[i] == ZERO; i++);
491 EZ = EZ - i + 1;
492 for (k = 1; i <= p2 + 1;)
493 Z[k++] = Z[i++];
494 for (; k <= p2;)
495 Z[k++] = ZERO;
497 return;
500 /* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
501 and Z or Y and Z. One guard digit is used. The error is less than one
502 ULP. */
503 void
504 __add (const mp_no *x, const mp_no *y, mp_no *z, int p)
506 int n;
508 if (X[0] == ZERO)
510 __cpy (y, z, p);
511 return;
513 else if (Y[0] == ZERO)
515 __cpy (x, z, p);
516 return;
519 if (X[0] == Y[0])
521 if (__acr (x, y, p) > 0)
523 add_magnitudes (x, y, z, p);
524 Z[0] = X[0];
526 else
528 add_magnitudes (y, x, z, p);
529 Z[0] = Y[0];
532 else
534 if ((n = __acr (x, y, p)) == 1)
536 sub_magnitudes (x, y, z, p);
537 Z[0] = X[0];
539 else if (n == -1)
541 sub_magnitudes (y, x, z, p);
542 Z[0] = Y[0];
544 else
545 Z[0] = ZERO;
547 return;
550 /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
551 not X and Z or Y and Z. One guard digit is used. The error is less than
552 one ULP. */
553 void
554 __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
556 int n;
558 if (X[0] == ZERO)
560 __cpy (y, z, p);
561 Z[0] = -Z[0];
562 return;
564 else if (Y[0] == ZERO)
566 __cpy (x, z, p);
567 return;
570 if (X[0] != Y[0])
572 if (__acr (x, y, p) > 0)
574 add_magnitudes (x, y, z, p);
575 Z[0] = X[0];
577 else
579 add_magnitudes (y, x, z, p);
580 Z[0] = -Y[0];
583 else
585 if ((n = __acr (x, y, p)) == 1)
587 sub_magnitudes (x, y, z, p);
588 Z[0] = X[0];
590 else if (n == -1)
592 sub_magnitudes (y, x, z, p);
593 Z[0] = -Y[0];
595 else
596 Z[0] = ZERO;
598 return;
601 /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
602 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
603 digits. In case P > 3 the error is bounded by 1.001 ULP. */
604 void
605 __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
607 long i, i1, i2, j, k, k2;
608 long p2 = p;
609 double u, zk, zk2;
611 /* Is z=0? */
612 if (X[0] * Y[0] == ZERO)
614 Z[0] = ZERO;
615 return;
618 /* Multiply, add and carry */
619 k2 = (p2 < 3) ? p2 + p2 : p2 + 3;
620 zk = Z[k2] = ZERO;
621 for (k = k2; k > 1;)
623 if (k > p2)
625 i1 = k - p2;
626 i2 = p2 + 1;
628 else
630 i1 = 1;
631 i2 = k;
633 #if 1
634 /* Rearrange this inner loop to allow the fmadd instructions to be
635 independent and execute in parallel on processors that have
636 dual symmetrical FP pipelines. */
637 if (i1 < (i2 - 1))
639 /* Make sure we have at least 2 iterations. */
640 if (((i2 - i1) & 1L) == 1L)
642 /* Handle the odd iterations case. */
643 zk2 = x->d[i2 - 1] * y->d[i1];
645 else
646 zk2 = 0.0;
647 /* Do two multiply/adds per loop iteration, using independent
648 accumulators; zk and zk2. */
649 for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2)
651 zk += x->d[i] * y->d[j];
652 zk2 += x->d[i + 1] * y->d[j - 1];
654 zk += zk2; /* Final sum. */
656 else
658 /* Special case when iterations is 1. */
659 zk += x->d[i1] * y->d[i1];
661 #else
662 /* The original code. */
663 for (i = i1, j = i2 - 1; i < i2; i++, j--)
664 zk += X[i] * Y[j];
665 #endif
667 u = (zk + CUTTER) - CUTTER;
668 if (u > zk)
669 u -= RADIX;
670 Z[k] = zk - u;
671 zk = u * RADIXI;
672 --k;
674 Z[k] = zk;
676 /* Is there a carry beyond the most significant digit? */
677 if (Z[1] == ZERO)
679 for (i = 1; i <= p2; i++)
680 Z[i] = Z[i + 1];
681 EZ = EX + EY - 1;
683 else
684 EZ = EX + EY;
686 Z[0] = X[0] * Y[0];
687 return;
690 /* Invert *X and store in *Y. Relative error bound:
691 - For P = 2: 1.001 * R ^ (1 - P)
692 - For P = 3: 1.063 * R ^ (1 - P)
693 - For P > 3: 2.001 * R ^ (1 - P)
695 *X = 0 is not permissible. */
696 void
697 __inv (const mp_no *x, mp_no *y, int p)
699 long i;
700 double t;
701 mp_no z, w;
702 static const int np1[] =
703 { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
704 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
707 __cpy (x, &z, p);
708 z.e = 0;
709 __mp_dbl (&z, &t, p);
710 t = ONE / t;
711 __dbl_mp (t, y, p);
712 EY -= EX;
714 for (i = 0; i < np1[p]; i++)
716 __cpy (y, &w, p);
717 __mul (x, &w, y, p);
718 __sub (&mptwo, y, &z, p);
719 __mul (&w, &z, y, p);
721 return;
724 /* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
725 or Y and Z. Relative error bound:
726 - For P = 2: 2.001 * R ^ (1 - P)
727 - For P = 3: 2.063 * R ^ (1 - P)
728 - For P > 3: 3.001 * R ^ (1 - P)
730 *X = 0 is not permissible. */
731 void
732 __dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
734 mp_no w;
736 if (X[0] == ZERO)
737 Z[0] = ZERO;
738 else
740 __inv (y, &w, p);
741 __mul (x, &w, z, p);
743 return;