3 * IBM Accurate Mathematical Library
4 * written by International Business Machines Corp.
5 * Copyright (C) 2001-2013 Free Software Foundation, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
20 /************************************************************************/
21 /* MODULE_NAME: mpa.c */
39 /* Arithmetic functions for multiple precision numbers. */
40 /* Relative errors are bounded */
41 /************************************************************************/
46 #include <sys/param.h>
48 const mp_no mpone
= {1, {1.0, 1.0}};
49 const mp_no mptwo
= {1, {1.0, 2.0}};
51 /* Compare mantissa of two multiple precision numbers regardless of the sign
52 and exponent of the numbers. */
54 mcr (const mp_no
*x
, const mp_no
*y
, int p
)
58 for (i
= 1; i
<= p2
; i
++)
70 /* Compare the absolute values of two multiple precision numbers. */
72 __acr (const mp_no
*x
, const mp_no
*y
, int p
)
83 else if (Y
[0] == ZERO
)
98 /* Copy multiple precision number X into Y. They could be the same
101 __cpy (const mp_no
*x
, mp_no
*y
, int p
)
106 for (i
= 0; i
<= p
; i
++)
112 /* Convert a multiple precision number *X into a double precision
113 number *Y, normalized case (|x| >= 2**(-1022))). */
115 norm (const mp_no
*x
, double *y
, int p
)
119 double a
, c
, u
, v
, z
[5];
127 c
= X
[1] + R
* (X
[2] + R
* X
[3]);
129 c
= (X
[1] + R
* X
[2]) + R
* R
* (X
[3] + R
* X
[4]);
133 for (a
= ONE
, z
[1] = X
[1]; z
[1] < TWO23
;)
139 for (i
= 2; i
< 5; i
++)
142 u
= (z
[i
] + CUTTER
) - CUTTER
;
146 z
[i
- 1] += u
* RADIXI
;
149 u
= (z
[3] + TWO71
) - TWO71
;
158 for (i
= 5; i
<= p
; i
++)
173 c
= (z
[1] + R
* (z
[2] + R
* z
[3])) / a
;
178 for (i
= 1; i
< EX
; i
++)
180 for (i
= 1; i
> EX
; i
--)
188 /* Convert a multiple precision number *X into a double precision
189 number *Y, Denormal case (|x| < 2**(-1022))). */
191 denorm (const mp_no
*x
, double *y
, int p
)
198 if (EX
< -44 || (EX
== -44 && X
[1] < TWO5
))
275 u
= (z
[3] + TWO57
) - TWO57
;
281 for (i
= k
+ 1; i
<= p2
; i
++)
293 c
= X
[0] * ((z
[1] + R
* (z
[2] + R
* z
[3])) - TWO10
);
301 /* Convert multiple precision number *X into double precision number *Y. The
302 result is correctly rounded to the nearest/even. */
304 __mp_dbl (const mp_no
*x
, double *y
, int p
)
314 else if (EX
== -42 && X
[1] >= TWO10
)
320 /* Get the multiple precision equivalent of X into *Y. If the precision is too
321 small, the result is truncated. */
323 __dbl_mp (double x
, mp_no
*y
, int p
)
344 for (EY
= ONE
; x
>= RADIX
; EY
+= ONE
)
346 for (; x
< ONE
; EY
-= ONE
)
351 for (i
= 1; i
<= n
; i
++)
353 u
= (x
+ TWO52
) - TWO52
;
365 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
366 sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
367 Y and Z. No guard digit is used. The result equals the exact sum,
370 add_magnitudes (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
389 for (; j
> 0; i
--, j
--)
415 for (i
= 1; i
<= p2
; i
++)
422 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
423 The sign of the difference *Z is not changed. X and Y may overlap but not X
424 and Z or Y and Z. One guard digit is used. The error is less than one
427 sub_magnitudes (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
437 Z
[k
] = Z
[k
+ 1] = ZERO
;
454 Z
[k
+ 1] = RADIX
- Y
[j
--];
466 for (; j
> 0; i
--, j
--)
468 Z
[k
] += (X
[i
] - Y
[j
]);
490 for (i
= 1; Z
[i
] == ZERO
; i
++);
492 for (k
= 1; i
<= p2
+ 1;)
500 /* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
501 and Z or Y and Z. One guard digit is used. The error is less than one
504 __add (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
513 else if (Y
[0] == ZERO
)
521 if (__acr (x
, y
, p
) > 0)
523 add_magnitudes (x
, y
, z
, p
);
528 add_magnitudes (y
, x
, z
, p
);
534 if ((n
= __acr (x
, y
, p
)) == 1)
536 sub_magnitudes (x
, y
, z
, p
);
541 sub_magnitudes (y
, x
, z
, p
);
550 /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
551 not X and Z or Y and Z. One guard digit is used. The error is less than
554 __sub (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
564 else if (Y
[0] == ZERO
)
572 if (__acr (x
, y
, p
) > 0)
574 add_magnitudes (x
, y
, z
, p
);
579 add_magnitudes (y
, x
, z
, p
);
585 if ((n
= __acr (x
, y
, p
)) == 1)
587 sub_magnitudes (x
, y
, z
, p
);
592 sub_magnitudes (y
, x
, z
, p
);
601 /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
602 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
603 digits. In case P > 3 the error is bounded by 1.001 ULP. */
605 __mul (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
607 long i
, i1
, i2
, j
, k
, k2
;
612 if (X
[0] * Y
[0] == ZERO
)
618 /* Multiply, add and carry */
619 k2
= (p2
< 3) ? p2
+ p2
: p2
+ 3;
634 /* Rearrange this inner loop to allow the fmadd instructions to be
635 independent and execute in parallel on processors that have
636 dual symmetrical FP pipelines. */
639 /* Make sure we have at least 2 iterations. */
640 if (((i2
- i1
) & 1L) == 1L)
642 /* Handle the odd iterations case. */
643 zk2
= x
->d
[i2
- 1] * y
->d
[i1
];
647 /* Do two multiply/adds per loop iteration, using independent
648 accumulators; zk and zk2. */
649 for (i
= i1
, j
= i2
- 1; i
< i2
- 1; i
+= 2, j
-= 2)
651 zk
+= x
->d
[i
] * y
->d
[j
];
652 zk2
+= x
->d
[i
+ 1] * y
->d
[j
- 1];
654 zk
+= zk2
; /* Final sum. */
658 /* Special case when iterations is 1. */
659 zk
+= x
->d
[i1
] * y
->d
[i1
];
662 /* The original code. */
663 for (i
= i1
, j
= i2
- 1; i
< i2
; i
++, j
--)
667 u
= (zk
+ CUTTER
) - CUTTER
;
676 /* Is there a carry beyond the most significant digit? */
679 for (i
= 1; i
<= p2
; i
++)
690 /* Invert *X and store in *Y. Relative error bound:
691 - For P = 2: 1.001 * R ^ (1 - P)
692 - For P = 3: 1.063 * R ^ (1 - P)
693 - For P > 3: 2.001 * R ^ (1 - P)
695 *X = 0 is not permissible. */
697 __inv (const mp_no
*x
, mp_no
*y
, int p
)
702 static const int np1
[] =
703 { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
704 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
709 __mp_dbl (&z
, &t
, p
);
714 for (i
= 0; i
< np1
[p
]; i
++)
718 __sub (&mptwo
, y
, &z
, p
);
719 __mul (&w
, &z
, y
, p
);
724 /* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
725 or Y and Z. Relative error bound:
726 - For P = 2: 2.001 * R ^ (1 - P)
727 - For P = 3: 2.063 * R ^ (1 - P)
728 - For P > 3: 3.001 * R ^ (1 - P)
730 *X = 0 is not permissible. */
732 __dvd (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)