2 * IBM Accurate Mathematical Library
3 * written by International Business Machines Corp.
4 * Copyright (C) 2001-2014 Free Software Foundation, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 /************************************************************************/
20 /* MODULE_NAME: mpa.c */
38 /* Arithmetic functions for multiple precision numbers. */
39 /* Relative errors are bounded */
40 /************************************************************************/
45 #include <sys/param.h>
53 const mp_no mpone
= { 1, { 1.0, 1.0 } };
54 const mp_no mptwo
= { 1, { 1.0, 2.0 } };
58 /* Compare mantissa of two multiple precision numbers regardless of the sign
59 and exponent of the numbers. */
61 mcr (const mp_no
*x
, const mp_no
*y
, int p
)
65 for (i
= 1; i
<= p2
; i
++)
77 /* Compare the absolute values of two multiple precision numbers. */
79 __acr (const mp_no
*x
, const mp_no
*y
, int p
)
107 /* Copy multiple precision number X into Y. They could be the same
110 __cpy (const mp_no
*x
, mp_no
*y
, int p
)
115 for (i
= 0; i
<= p
; i
++)
121 /* Convert a multiple precision number *X into a double precision
122 number *Y, normalized case (|x| >= 2**(-1022))). */
124 norm (const mp_no
*x
, double *y
, int p
)
129 mantissa_t a
, u
, v
, z
[5];
137 c
= X
[1] + R
* (X
[2] + R
* X
[3]);
139 c
= (X
[1] + R
* X
[2]) + R
* R
* (X
[3] + R
* X
[4]);
143 for (a
= 1, z
[1] = X
[1]; z
[1] < TWO23
; )
149 for (i
= 2; i
< 5; i
++)
151 mantissa_store_t d
, r
;
152 d
= X
[i
] * (mantissa_store_t
) a
;
158 u
= ALIGN_DOWN_TO (z
[3], TWO19
);
165 for (i
= 5; i
<= p
; i
++)
180 c
= (z
[1] + R
* (z
[2] + R
* z
[3])) / a
;
185 for (i
= 1; i
< EX
; i
++)
187 for (i
= 1; i
> EX
; i
--)
194 /* Convert a multiple precision number *X into a double precision
195 number *Y, Denormal case (|x| < 2**(-1022))). */
197 denorm (const mp_no
*x
, double *y
, int p
)
205 if (EX
< -44 || (EX
== -44 && X
[1] < TWO5
))
282 u
= ALIGN_DOWN_TO (z
[3], TWO5
);
286 for (i
= k
+ 1; i
<= p2
; i
++)
298 c
= X
[0] * ((z
[1] + R
* (z
[2] + R
* z
[3])) - TWO10
);
304 /* Convert multiple precision number *X into double precision number *Y. The
305 result is correctly rounded to the nearest/even. */
307 __mp_dbl (const mp_no
*x
, double *y
, int p
)
315 if (__glibc_likely (EX
> -42 || (EX
== -42 && X
[1] >= TWO10
)))
322 /* Get the multiple precision equivalent of X into *Y. If the precision is too
323 small, the result is truncated. */
326 __dbl_mp (double x
, mp_no
*y
, int p
)
346 for (EY
= 1; x
>= RADIX
; EY
+= 1)
348 for (; x
< 1; EY
-= 1)
353 for (i
= 1; i
<= n
; i
++)
355 INTEGER_OF (x
, Y
[i
]);
362 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
363 sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
364 Y and Z. No guard digit is used. The result equals the exact sum,
368 add_magnitudes (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
380 if (__glibc_unlikely (j
< 1))
388 for (; j
> 0; i
--, j
--)
420 for (i
= 1; i
<= p2
; i
++)
430 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
431 The sign of the difference *Z is not changed. X and Y may overlap but not X
432 and Z or Y and Z. One guard digit is used. The error is less than one
436 sub_magnitudes (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
447 /* Y is too small compared to X, copy X over to the result. */
448 if (__glibc_unlikely (j
< 1))
454 /* The relevant least significant digit in Y is non-zero, so we factor it in
455 to enhance accuracy. */
456 if (j
< p2
&& Y
[j
+ 1] > 0)
458 Z
[k
+ 1] = RADIX
- Y
[j
+ 1];
464 /* Subtract and borrow. */
465 for (; j
> 0; i
--, j
--)
480 /* We're done with digits from Y, so it's just digits in X. */
497 for (i
= 1; Z
[i
] == 0; i
++)
500 for (k
= 1; i
<= p2
+ 1; )
506 /* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
507 and Z or Y and Z. One guard digit is used. The error is less than one
511 __add (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
528 if (__acr (x
, y
, p
) > 0)
530 add_magnitudes (x
, y
, z
, p
);
535 add_magnitudes (y
, x
, z
, p
);
541 if ((n
= __acr (x
, y
, p
)) == 1)
543 sub_magnitudes (x
, y
, z
, p
);
548 sub_magnitudes (y
, x
, z
, p
);
556 /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
557 not X and Z or Y and Z. One guard digit is used. The error is less than
561 __sub (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
579 if (__acr (x
, y
, p
) > 0)
581 add_magnitudes (x
, y
, z
, p
);
586 add_magnitudes (y
, x
, z
, p
);
592 if ((n
= __acr (x
, y
, p
)) == 1)
594 sub_magnitudes (x
, y
, z
, p
);
599 sub_magnitudes (y
, x
, z
, p
);
608 /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
609 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
610 digits. In case P > 3 the error is bounded by 1.001 ULP. */
613 __mul (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
615 long i
, j
, k
, ip
, ip2
;
619 mantissa_store_t
*diag
;
622 if (__glibc_unlikely (X
[0] * Y
[0] == 0))
628 /* We need not iterate through all X's and Y's since it's pointless to
629 multiply zeroes. Here, both are zero... */
630 for (ip2
= p2
; ip2
> 0; ip2
--)
631 if (X
[ip2
] != 0 || Y
[ip2
] != 0)
634 a
= X
[ip2
] != 0 ? y
: x
;
636 /* ... and here, at least one of them is still zero. */
637 for (ip
= ip2
; ip
> 0; ip
--)
641 /* The product looks like this for p = 3 (as an example):
646 -----------------------------
651 So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
652 for P >= 3. We compute the above digits in two parts; the last P-1
653 digits and then the first P digits. The last P-1 digits are a sum of
654 products of the input digits from P to P-k where K is 0 for the least
655 significant digit and increases as we go towards the left. The product
656 term is of the form X[k]*X[P-k] as can be seen in the above example.
658 The first P digits are also a sum of products with the same product term,
659 except that the sum is from 1 to k. This is also evident from the above
662 Another thing that becomes evident is that only the most significant
663 ip+ip2 digits of the result are non-zero, where ip and ip2 are the
664 'internal precision' of the input numbers, i.e. digits after ip and ip2
667 k
= (__glibc_unlikely (p2
< 3)) ? p2
+ p2
: p2
+ 3;
669 while (k
> ip
+ ip2
+ 1)
674 /* Precompute sums of diagonal elements so that we can directly use them
675 later. See the next comment to know we why need them. */
676 diag
= alloca (k
* sizeof (mantissa_store_t
));
677 mantissa_store_t d
= 0;
678 for (i
= 1; i
<= ip
; i
++)
680 d
+= X
[i
] * (mantissa_store_t
) Y
[i
];
691 /* We want to add this only once, but since we subtract it in the sum
692 of products above, we add twice. */
693 zk
+= 2 * X
[lim
] * (mantissa_store_t
) Y
[lim
];
695 for (i
= k
- p2
, j
= p2
; i
< j
; i
++, j
--)
696 zk
+= (X
[i
] + X
[j
]) * (mantissa_store_t
) (Y
[i
] + Y
[j
]);
700 DIV_RADIX (zk
, Z
[k
]);
704 /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
705 goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the
706 number of multiplications, we halve the range and if k is an even number,
707 add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute
708 X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
710 This reduction tells us that we're summing two things, the first term
711 through the half range and the negative of the sum of the product of all
712 terms of X and Y in the full range. i.e.
714 SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in
715 a single loop so that it completes in O(n) time and can hence be directly
716 used in the loop below. */
722 /* We want to add this only once, but since we subtract it in the sum
723 of products above, we add twice. */
724 zk
+= 2 * X
[lim
] * (mantissa_store_t
) Y
[lim
];
726 for (i
= 1, j
= k
- 1; i
< j
; i
++, j
--)
727 zk
+= (X
[i
] + X
[j
]) * (mantissa_store_t
) (Y
[i
] + Y
[j
]);
731 DIV_RADIX (zk
, Z
[k
]);
736 /* Get the exponent sum into an intermediate variable. This is a subtle
737 optimization, where given enough registers, all operations on the exponent
738 happen in registers and the result is written out only once into EZ. */
741 /* Is there a carry beyond the most significant digit? */
742 if (__glibc_unlikely (Z
[1] == 0))
744 for (i
= 1; i
<= p2
; i
++)
755 /* Square *X and store result in *Y. X and Y may not overlap. For P in
756 [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
757 error is bounded by 1.001 ULP. This is a faster special case of
761 __sqr (const mp_no
*x
, mp_no
*y
, int p
)
767 if (__glibc_unlikely (X
[0] == 0))
773 /* We need not iterate through all X's since it's pointless to
775 for (ip
= p
; ip
> 0; ip
--)
779 k
= (__glibc_unlikely (p
< 3)) ? p
+ p
: p
+ 3;
781 while (k
> 2 * ip
+ 1)
788 mantissa_store_t yk2
= 0;
792 yk
+= X
[lim
] * (mantissa_store_t
) X
[lim
];
794 /* In __mul, this loop (and the one within the next while loop) run
795 between a range to calculate the mantissa as follows:
797 Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
800 For X == Y, we can get away with summing halfway and doubling the
801 result. For cases where the range size is even, the mid-point needs
802 to be added separately (above). */
803 for (i
= k
- p
, j
= p
; i
< j
; i
++, j
--)
804 yk2
+= X
[i
] * (mantissa_store_t
) X
[j
];
808 DIV_RADIX (yk
, Y
[k
]);
814 mantissa_store_t yk2
= 0;
818 yk
+= X
[lim
] * (mantissa_store_t
) X
[lim
];
820 /* Likewise for this loop. */
821 for (i
= 1, j
= k
- 1; i
< j
; i
++, j
--)
822 yk2
+= X
[i
] * (mantissa_store_t
) X
[j
];
826 DIV_RADIX (yk
, Y
[k
]);
831 /* Squares are always positive. */
834 /* Get the exponent sum into an intermediate variable. This is a subtle
835 optimization, where given enough registers, all operations on the exponent
836 happen in registers and the result is written out only once into EZ. */
839 /* Is there a carry beyond the most significant digit? */
840 if (__glibc_unlikely (Y
[1] == 0))
842 for (i
= 1; i
<= p
; i
++)
851 /* Invert *X and store in *Y. Relative error bound:
852 - For P = 2: 1.001 * R ^ (1 - P)
853 - For P = 3: 1.063 * R ^ (1 - P)
854 - For P > 3: 2.001 * R ^ (1 - P)
856 *X = 0 is not permissible. */
859 __inv (const mp_no
*x
, mp_no
*y
, int p
)
864 static const int np1
[] =
865 { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
866 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
871 __mp_dbl (&z
, &t
, p
);
876 for (i
= 0; i
< np1
[p
]; i
++)
880 __sub (&mptwo
, y
, &z
, p
);
881 __mul (&w
, &z
, y
, p
);
885 /* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
886 or Y and Z. Relative error bound:
887 - For P = 2: 2.001 * R ^ (1 - P)
888 - For P = 3: 2.063 * R ^ (1 - P)
889 - For P > 3: 3.001 * R ^ (1 - P)
891 *X = 0 is not permissible. */
894 __dvd (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)