2 * IBM Accurate Mathematical Library
3 * written by International Business Machines Corp.
4 * Copyright (C) 2001-2017 Free Software Foundation, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 /************************************************************************/
20 /* MODULE_NAME: mpa.c */
38 /* Arithmetic functions for multiple precision numbers. */
39 /* Relative errors are bounded */
40 /************************************************************************/
45 #include <sys/param.h>
53 const mp_no __mpone
= { 1, { 1.0, 1.0 } };
54 const mp_no __mptwo
= { 1, { 1.0, 2.0 } };
58 /* Compare mantissa of two multiple precision numbers regardless of the sign
59 and exponent of the numbers. */
61 mcr (const mp_no
*x
, const mp_no
*y
, int p
)
65 for (i
= 1; i
<= p2
; i
++)
77 /* Compare the absolute values of two multiple precision numbers. */
79 __acr (const mp_no
*x
, const mp_no
*y
, int p
)
107 /* Copy multiple precision number X into Y. They could be the same
110 __cpy (const mp_no
*x
, mp_no
*y
, int p
)
115 for (i
= 0; i
<= p
; i
++)
121 /* Convert a multiple precision number *X into a double precision
122 number *Y, normalized case (|x| >= 2**(-1022))). X has precision
123 P, which is positive. */
125 norm (const mp_no
*x
, double *y
, int p
)
130 mantissa_t a
, u
, v
, z
[5];
138 c
= X
[1] + R
* (X
[2] + R
* X
[3]);
140 c
= (X
[1] + R
* X
[2]) + R
* R
* (X
[3] + R
* X
[4]);
144 for (a
= 1, z
[1] = X
[1]; z
[1] < TWO23
; )
150 for (i
= 2; i
< 5; i
++)
152 mantissa_store_t d
, r
;
153 d
= X
[i
] * (mantissa_store_t
) a
;
159 u
= ALIGN_DOWN_TO (z
[3], TWO19
);
166 for (i
= 5; i
<= p
; i
++)
181 c
= (z
[1] + R
* (z
[2] + R
* z
[3])) / a
;
186 for (i
= 1; i
< EX
; i
++)
188 for (i
= 1; i
> EX
; i
--)
195 /* Convert a multiple precision number *X into a double precision
196 number *Y, Denormal case (|x| < 2**(-1022))). */
198 denorm (const mp_no
*x
, double *y
, int p
)
206 if (EX
< -44 || (EX
== -44 && X
[1] < TWO5
))
283 u
= ALIGN_DOWN_TO (z
[3], TWO5
);
287 for (i
= k
+ 1; i
<= p2
; i
++)
299 c
= X
[0] * ((z
[1] + R
* (z
[2] + R
* z
[3])) - TWO10
);
305 /* Convert multiple precision number *X into double precision number *Y. The
306 result is correctly rounded to the nearest/even. */
308 __mp_dbl (const mp_no
*x
, double *y
, int p
)
316 if (__glibc_likely (EX
> -42 || (EX
== -42 && X
[1] >= TWO10
)))
323 /* Get the multiple precision equivalent of X into *Y. If the precision is too
324 small, the result is truncated. */
327 __dbl_mp (double x
, mp_no
*y
, int p
)
347 for (EY
= 1; x
>= RADIX
; EY
+= 1)
349 for (; x
< 1; EY
-= 1)
354 for (i
= 1; i
<= n
; i
++)
356 INTEGER_OF (x
, Y
[i
]);
363 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
364 sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
365 Y and Z. No guard digit is used. The result equals the exact sum,
369 add_magnitudes (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
381 if (__glibc_unlikely (j
< 1))
389 for (; j
> 0; i
--, j
--)
421 for (i
= 1; i
<= p2
; i
++)
431 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
432 The sign of the difference *Z is not changed. X and Y may overlap but not X
433 and Z or Y and Z. One guard digit is used. The error is less than one
437 sub_magnitudes (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
448 /* Y is too small compared to X, copy X over to the result. */
449 if (__glibc_unlikely (j
< 1))
455 /* The relevant least significant digit in Y is non-zero, so we factor it in
456 to enhance accuracy. */
457 if (j
< p2
&& Y
[j
+ 1] > 0)
459 Z
[k
+ 1] = RADIX
- Y
[j
+ 1];
465 /* Subtract and borrow. */
466 for (; j
> 0; i
--, j
--)
481 /* We're done with digits from Y, so it's just digits in X. */
498 for (i
= 1; Z
[i
] == 0; i
++)
501 for (k
= 1; i
<= p2
+ 1; )
507 /* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
508 and Z or Y and Z. One guard digit is used. The error is less than one
512 __add (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
529 if (__acr (x
, y
, p
) > 0)
531 add_magnitudes (x
, y
, z
, p
);
536 add_magnitudes (y
, x
, z
, p
);
542 if ((n
= __acr (x
, y
, p
)) == 1)
544 sub_magnitudes (x
, y
, z
, p
);
549 sub_magnitudes (y
, x
, z
, p
);
557 /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
558 not X and Z or Y and Z. One guard digit is used. The error is less than
562 __sub (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
580 if (__acr (x
, y
, p
) > 0)
582 add_magnitudes (x
, y
, z
, p
);
587 add_magnitudes (y
, x
, z
, p
);
593 if ((n
= __acr (x
, y
, p
)) == 1)
595 sub_magnitudes (x
, y
, z
, p
);
600 sub_magnitudes (y
, x
, z
, p
);
609 /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
610 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
611 digits. In case P > 3 the error is bounded by 1.001 ULP. */
614 __mul (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
616 long i
, j
, k
, ip
, ip2
;
620 mantissa_store_t
*diag
;
623 if (__glibc_unlikely (X
[0] * Y
[0] == 0))
629 /* We need not iterate through all X's and Y's since it's pointless to
630 multiply zeroes. Here, both are zero... */
631 for (ip2
= p2
; ip2
> 0; ip2
--)
632 if (X
[ip2
] != 0 || Y
[ip2
] != 0)
635 a
= X
[ip2
] != 0 ? y
: x
;
637 /* ... and here, at least one of them is still zero. */
638 for (ip
= ip2
; ip
> 0; ip
--)
642 /* The product looks like this for p = 3 (as an example):
647 -----------------------------
652 So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
653 for P >= 3. We compute the above digits in two parts; the last P-1
654 digits and then the first P digits. The last P-1 digits are a sum of
655 products of the input digits from P to P-k where K is 0 for the least
656 significant digit and increases as we go towards the left. The product
657 term is of the form X[k]*X[P-k] as can be seen in the above example.
659 The first P digits are also a sum of products with the same product term,
660 except that the sum is from 1 to k. This is also evident from the above
663 Another thing that becomes evident is that only the most significant
664 ip+ip2 digits of the result are non-zero, where ip and ip2 are the
665 'internal precision' of the input numbers, i.e. digits after ip and ip2
668 k
= (__glibc_unlikely (p2
< 3)) ? p2
+ p2
: p2
+ 3;
670 while (k
> ip
+ ip2
+ 1)
675 /* Precompute sums of diagonal elements so that we can directly use them
676 later. See the next comment to know we why need them. */
677 diag
= alloca (k
* sizeof (mantissa_store_t
));
678 mantissa_store_t d
= 0;
679 for (i
= 1; i
<= ip
; i
++)
681 d
+= X
[i
] * (mantissa_store_t
) Y
[i
];
692 /* We want to add this only once, but since we subtract it in the sum
693 of products above, we add twice. */
694 zk
+= 2 * X
[lim
] * (mantissa_store_t
) Y
[lim
];
696 for (i
= k
- p2
, j
= p2
; i
< j
; i
++, j
--)
697 zk
+= (X
[i
] + X
[j
]) * (mantissa_store_t
) (Y
[i
] + Y
[j
]);
701 DIV_RADIX (zk
, Z
[k
]);
705 /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
706 goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the
707 number of multiplications, we halve the range and if k is an even number,
708 add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute
709 X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
711 This reduction tells us that we're summing two things, the first term
712 through the half range and the negative of the sum of the product of all
713 terms of X and Y in the full range. i.e.
715 SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in
716 a single loop so that it completes in O(n) time and can hence be directly
717 used in the loop below. */
723 /* We want to add this only once, but since we subtract it in the sum
724 of products above, we add twice. */
725 zk
+= 2 * X
[lim
] * (mantissa_store_t
) Y
[lim
];
727 for (i
= 1, j
= k
- 1; i
< j
; i
++, j
--)
728 zk
+= (X
[i
] + X
[j
]) * (mantissa_store_t
) (Y
[i
] + Y
[j
]);
732 DIV_RADIX (zk
, Z
[k
]);
737 /* Get the exponent sum into an intermediate variable. This is a subtle
738 optimization, where given enough registers, all operations on the exponent
739 happen in registers and the result is written out only once into EZ. */
742 /* Is there a carry beyond the most significant digit? */
743 if (__glibc_unlikely (Z
[1] == 0))
745 for (i
= 1; i
<= p2
; i
++)
756 /* Square *X and store result in *Y. X and Y may not overlap. For P in
757 [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
758 error is bounded by 1.001 ULP. This is a faster special case of
762 __sqr (const mp_no
*x
, mp_no
*y
, int p
)
768 if (__glibc_unlikely (X
[0] == 0))
774 /* We need not iterate through all X's since it's pointless to
776 for (ip
= p
; ip
> 0; ip
--)
780 k
= (__glibc_unlikely (p
< 3)) ? p
+ p
: p
+ 3;
782 while (k
> 2 * ip
+ 1)
789 mantissa_store_t yk2
= 0;
793 yk
+= X
[lim
] * (mantissa_store_t
) X
[lim
];
795 /* In __mul, this loop (and the one within the next while loop) run
796 between a range to calculate the mantissa as follows:
798 Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
801 For X == Y, we can get away with summing halfway and doubling the
802 result. For cases where the range size is even, the mid-point needs
803 to be added separately (above). */
804 for (i
= k
- p
, j
= p
; i
< j
; i
++, j
--)
805 yk2
+= X
[i
] * (mantissa_store_t
) X
[j
];
809 DIV_RADIX (yk
, Y
[k
]);
815 mantissa_store_t yk2
= 0;
819 yk
+= X
[lim
] * (mantissa_store_t
) X
[lim
];
821 /* Likewise for this loop. */
822 for (i
= 1, j
= k
- 1; i
< j
; i
++, j
--)
823 yk2
+= X
[i
] * (mantissa_store_t
) X
[j
];
827 DIV_RADIX (yk
, Y
[k
]);
832 /* Squares are always positive. */
835 /* Get the exponent sum into an intermediate variable. This is a subtle
836 optimization, where given enough registers, all operations on the exponent
837 happen in registers and the result is written out only once into EZ. */
840 /* Is there a carry beyond the most significant digit? */
841 if (__glibc_unlikely (Y
[1] == 0))
843 for (i
= 1; i
<= p
; i
++)
852 /* Invert *X and store in *Y. Relative error bound:
853 - For P = 2: 1.001 * R ^ (1 - P)
854 - For P = 3: 1.063 * R ^ (1 - P)
855 - For P > 3: 2.001 * R ^ (1 - P)
857 *X = 0 is not permissible. */
860 __inv (const mp_no
*x
, mp_no
*y
, int p
)
865 static const int np1
[] =
866 { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
867 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
872 __mp_dbl (&z
, &t
, p
);
877 for (i
= 0; i
< np1
[p
]; i
++)
881 __sub (&__mptwo
, y
, &z
, p
);
882 __mul (&w
, &z
, y
, p
);
886 /* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
887 or Y and Z. Relative error bound:
888 - For P = 2: 2.001 * R ^ (1 - P)
889 - For P = 3: 2.063 * R ^ (1 - P)
890 - For P > 3: 3.001 * R ^ (1 - P)
892 *X = 0 is not permissible. */
895 __dvd (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)