Update copyright dates with scripts/update-copyrights.
[glibc.git] / sysdeps / ieee754 / dbl-64 / mpa.c
blob7d6b0c5a23398b41c421cd677c3b41a1d9be416b
1 /*
2 * IBM Accurate Mathematical Library
3 * written by International Business Machines Corp.
4 * Copyright (C) 2001-2015 Free Software Foundation, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 /************************************************************************/
20 /* MODULE_NAME: mpa.c */
21 /* */
22 /* FUNCTIONS: */
23 /* mcr */
24 /* acr */
25 /* cpy */
26 /* norm */
27 /* denorm */
28 /* mp_dbl */
29 /* dbl_mp */
30 /* add_magnitudes */
31 /* sub_magnitudes */
32 /* add */
33 /* sub */
34 /* mul */
35 /* inv */
36 /* dvd */
37 /* */
38 /* Arithmetic functions for multiple precision numbers. */
39 /* Relative errors are bounded */
40 /************************************************************************/
43 #include "endian.h"
44 #include "mpa.h"
45 #include <sys/param.h>
46 #include <alloca.h>
48 #ifndef SECTION
49 # define SECTION
50 #endif
52 #ifndef NO__CONST
53 const mp_no __mpone = { 1, { 1.0, 1.0 } };
54 const mp_no __mptwo = { 1, { 1.0, 2.0 } };
55 #endif
57 #ifndef NO___ACR
58 /* Compare mantissa of two multiple precision numbers regardless of the sign
59 and exponent of the numbers. */
60 static int
61 mcr (const mp_no *x, const mp_no *y, int p)
63 long i;
64 long p2 = p;
65 for (i = 1; i <= p2; i++)
67 if (X[i] == Y[i])
68 continue;
69 else if (X[i] > Y[i])
70 return 1;
71 else
72 return -1;
74 return 0;
77 /* Compare the absolute values of two multiple precision numbers. */
78 int
79 __acr (const mp_no *x, const mp_no *y, int p)
81 long i;
83 if (X[0] == 0)
85 if (Y[0] == 0)
86 i = 0;
87 else
88 i = -1;
90 else if (Y[0] == 0)
91 i = 1;
92 else
94 if (EX > EY)
95 i = 1;
96 else if (EX < EY)
97 i = -1;
98 else
99 i = mcr (x, y, p);
102 return i;
104 #endif
106 #ifndef NO___CPY
107 /* Copy multiple precision number X into Y. They could be the same
108 number. */
109 void
110 __cpy (const mp_no *x, mp_no *y, int p)
112 long i;
114 EY = EX;
115 for (i = 0; i <= p; i++)
116 Y[i] = X[i];
118 #endif
120 #ifndef NO___MP_DBL
121 /* Convert a multiple precision number *X into a double precision
122 number *Y, normalized case (|x| >= 2**(-1022))). */
123 static void
124 norm (const mp_no *x, double *y, int p)
126 # define R RADIXI
127 long i;
128 double c;
129 mantissa_t a, u, v, z[5];
130 if (p < 5)
132 if (p == 1)
133 c = X[1];
134 else if (p == 2)
135 c = X[1] + R * X[2];
136 else if (p == 3)
137 c = X[1] + R * (X[2] + R * X[3]);
138 else if (p == 4)
139 c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
141 else
143 for (a = 1, z[1] = X[1]; z[1] < TWO23; )
145 a *= 2;
146 z[1] *= 2;
149 for (i = 2; i < 5; i++)
151 mantissa_store_t d, r;
152 d = X[i] * (mantissa_store_t) a;
153 DIV_RADIX (d, r);
154 z[i] = r;
155 z[i - 1] += d;
158 u = ALIGN_DOWN_TO (z[3], TWO19);
159 v = z[3] - u;
161 if (v == TWO18)
163 if (z[4] == 0)
165 for (i = 5; i <= p; i++)
167 if (X[i] == 0)
168 continue;
169 else
171 z[3] += 1;
172 break;
176 else
177 z[3] += 1;
180 c = (z[1] + R * (z[2] + R * z[3])) / a;
183 c *= X[0];
185 for (i = 1; i < EX; i++)
186 c *= RADIX;
187 for (i = 1; i > EX; i--)
188 c *= RADIXI;
190 *y = c;
191 # undef R
194 /* Convert a multiple precision number *X into a double precision
195 number *Y, Denormal case (|x| < 2**(-1022))). */
196 static void
197 denorm (const mp_no *x, double *y, int p)
199 long i, k;
200 long p2 = p;
201 double c;
202 mantissa_t u, z[5];
204 # define R RADIXI
205 if (EX < -44 || (EX == -44 && X[1] < TWO5))
207 *y = 0;
208 return;
211 if (p2 == 1)
213 if (EX == -42)
215 z[1] = X[1] + TWO10;
216 z[2] = 0;
217 z[3] = 0;
218 k = 3;
220 else if (EX == -43)
222 z[1] = TWO10;
223 z[2] = X[1];
224 z[3] = 0;
225 k = 2;
227 else
229 z[1] = TWO10;
230 z[2] = 0;
231 z[3] = X[1];
232 k = 1;
235 else if (p2 == 2)
237 if (EX == -42)
239 z[1] = X[1] + TWO10;
240 z[2] = X[2];
241 z[3] = 0;
242 k = 3;
244 else if (EX == -43)
246 z[1] = TWO10;
247 z[2] = X[1];
248 z[3] = X[2];
249 k = 2;
251 else
253 z[1] = TWO10;
254 z[2] = 0;
255 z[3] = X[1];
256 k = 1;
259 else
261 if (EX == -42)
263 z[1] = X[1] + TWO10;
264 z[2] = X[2];
265 k = 3;
267 else if (EX == -43)
269 z[1] = TWO10;
270 z[2] = X[1];
271 k = 2;
273 else
275 z[1] = TWO10;
276 z[2] = 0;
277 k = 1;
279 z[3] = X[k];
282 u = ALIGN_DOWN_TO (z[3], TWO5);
284 if (u == z[3])
286 for (i = k + 1; i <= p2; i++)
288 if (X[i] == 0)
289 continue;
290 else
292 z[3] += 1;
293 break;
298 c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
300 *y = c * TWOM1032;
301 # undef R
304 /* Convert multiple precision number *X into double precision number *Y. The
305 result is correctly rounded to the nearest/even. */
306 void
307 __mp_dbl (const mp_no *x, double *y, int p)
309 if (X[0] == 0)
311 *y = 0;
312 return;
315 if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
316 norm (x, y, p);
317 else
318 denorm (x, y, p);
320 #endif
322 /* Get the multiple precision equivalent of X into *Y. If the precision is too
323 small, the result is truncated. */
324 void
325 SECTION
326 __dbl_mp (double x, mp_no *y, int p)
328 long i, n;
329 long p2 = p;
331 /* Sign. */
332 if (x == 0)
334 Y[0] = 0;
335 return;
337 else if (x > 0)
338 Y[0] = 1;
339 else
341 Y[0] = -1;
342 x = -x;
345 /* Exponent. */
346 for (EY = 1; x >= RADIX; EY += 1)
347 x *= RADIXI;
348 for (; x < 1; EY -= 1)
349 x *= RADIX;
351 /* Digits. */
352 n = MIN (p2, 4);
353 for (i = 1; i <= n; i++)
355 INTEGER_OF (x, Y[i]);
356 x *= RADIX;
358 for (; i <= p2; i++)
359 Y[i] = 0;
362 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
363 sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
364 Y and Z. No guard digit is used. The result equals the exact sum,
365 truncated. */
366 static void
367 SECTION
368 add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
370 long i, j, k;
371 long p2 = p;
372 mantissa_t zk;
374 EZ = EX;
376 i = p2;
377 j = p2 + EY - EX;
378 k = p2 + 1;
380 if (__glibc_unlikely (j < 1))
382 __cpy (x, z, p);
383 return;
386 zk = 0;
388 for (; j > 0; i--, j--)
390 zk += X[i] + Y[j];
391 if (zk >= RADIX)
393 Z[k--] = zk - RADIX;
394 zk = 1;
396 else
398 Z[k--] = zk;
399 zk = 0;
403 for (; i > 0; i--)
405 zk += X[i];
406 if (zk >= RADIX)
408 Z[k--] = zk - RADIX;
409 zk = 1;
411 else
413 Z[k--] = zk;
414 zk = 0;
418 if (zk == 0)
420 for (i = 1; i <= p2; i++)
421 Z[i] = Z[i + 1];
423 else
425 Z[1] = zk;
426 EZ += 1;
430 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
431 The sign of the difference *Z is not changed. X and Y may overlap but not X
432 and Z or Y and Z. One guard digit is used. The error is less than one
433 ULP. */
434 static void
435 SECTION
436 sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
438 long i, j, k;
439 long p2 = p;
440 mantissa_t zk;
442 EZ = EX;
443 i = p2;
444 j = p2 + EY - EX;
445 k = p2;
447 /* Y is too small compared to X, copy X over to the result. */
448 if (__glibc_unlikely (j < 1))
450 __cpy (x, z, p);
451 return;
454 /* The relevant least significant digit in Y is non-zero, so we factor it in
455 to enhance accuracy. */
456 if (j < p2 && Y[j + 1] > 0)
458 Z[k + 1] = RADIX - Y[j + 1];
459 zk = -1;
461 else
462 zk = Z[k + 1] = 0;
464 /* Subtract and borrow. */
465 for (; j > 0; i--, j--)
467 zk += (X[i] - Y[j]);
468 if (zk < 0)
470 Z[k--] = zk + RADIX;
471 zk = -1;
473 else
475 Z[k--] = zk;
476 zk = 0;
480 /* We're done with digits from Y, so it's just digits in X. */
481 for (; i > 0; i--)
483 zk += X[i];
484 if (zk < 0)
486 Z[k--] = zk + RADIX;
487 zk = -1;
489 else
491 Z[k--] = zk;
492 zk = 0;
496 /* Normalize. */
497 for (i = 1; Z[i] == 0; i++)
499 EZ = EZ - i + 1;
500 for (k = 1; i <= p2 + 1; )
501 Z[k++] = Z[i++];
502 for (; k <= p2; )
503 Z[k++] = 0;
506 /* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
507 and Z or Y and Z. One guard digit is used. The error is less than one
508 ULP. */
509 void
510 SECTION
511 __add (const mp_no *x, const mp_no *y, mp_no *z, int p)
513 int n;
515 if (X[0] == 0)
517 __cpy (y, z, p);
518 return;
520 else if (Y[0] == 0)
522 __cpy (x, z, p);
523 return;
526 if (X[0] == Y[0])
528 if (__acr (x, y, p) > 0)
530 add_magnitudes (x, y, z, p);
531 Z[0] = X[0];
533 else
535 add_magnitudes (y, x, z, p);
536 Z[0] = Y[0];
539 else
541 if ((n = __acr (x, y, p)) == 1)
543 sub_magnitudes (x, y, z, p);
544 Z[0] = X[0];
546 else if (n == -1)
548 sub_magnitudes (y, x, z, p);
549 Z[0] = Y[0];
551 else
552 Z[0] = 0;
556 /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
557 not X and Z or Y and Z. One guard digit is used. The error is less than
558 one ULP. */
559 void
560 SECTION
561 __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
563 int n;
565 if (X[0] == 0)
567 __cpy (y, z, p);
568 Z[0] = -Z[0];
569 return;
571 else if (Y[0] == 0)
573 __cpy (x, z, p);
574 return;
577 if (X[0] != Y[0])
579 if (__acr (x, y, p) > 0)
581 add_magnitudes (x, y, z, p);
582 Z[0] = X[0];
584 else
586 add_magnitudes (y, x, z, p);
587 Z[0] = -Y[0];
590 else
592 if ((n = __acr (x, y, p)) == 1)
594 sub_magnitudes (x, y, z, p);
595 Z[0] = X[0];
597 else if (n == -1)
599 sub_magnitudes (y, x, z, p);
600 Z[0] = -Y[0];
602 else
603 Z[0] = 0;
607 #ifndef NO__MUL
608 /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
609 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
610 digits. In case P > 3 the error is bounded by 1.001 ULP. */
611 void
612 SECTION
613 __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
615 long i, j, k, ip, ip2;
616 long p2 = p;
617 mantissa_store_t zk;
618 const mp_no *a;
619 mantissa_store_t *diag;
621 /* Is z=0? */
622 if (__glibc_unlikely (X[0] * Y[0] == 0))
624 Z[0] = 0;
625 return;
628 /* We need not iterate through all X's and Y's since it's pointless to
629 multiply zeroes. Here, both are zero... */
630 for (ip2 = p2; ip2 > 0; ip2--)
631 if (X[ip2] != 0 || Y[ip2] != 0)
632 break;
634 a = X[ip2] != 0 ? y : x;
636 /* ... and here, at least one of them is still zero. */
637 for (ip = ip2; ip > 0; ip--)
638 if (a->d[ip] != 0)
639 break;
641 /* The product looks like this for p = 3 (as an example):
644 a1 a2 a3
645 x b1 b2 b3
646 -----------------------------
647 a1*b3 a2*b3 a3*b3
648 a1*b2 a2*b2 a3*b2
649 a1*b1 a2*b1 a3*b1
651 So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
652 for P >= 3. We compute the above digits in two parts; the last P-1
653 digits and then the first P digits. The last P-1 digits are a sum of
654 products of the input digits from P to P-k where K is 0 for the least
655 significant digit and increases as we go towards the left. The product
656 term is of the form X[k]*X[P-k] as can be seen in the above example.
658 The first P digits are also a sum of products with the same product term,
659 except that the sum is from 1 to k. This is also evident from the above
660 example.
662 Another thing that becomes evident is that only the most significant
663 ip+ip2 digits of the result are non-zero, where ip and ip2 are the
664 'internal precision' of the input numbers, i.e. digits after ip and ip2
665 are all 0. */
667 k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3;
669 while (k > ip + ip2 + 1)
670 Z[k--] = 0;
672 zk = 0;
674 /* Precompute sums of diagonal elements so that we can directly use them
675 later. See the next comment to know we why need them. */
676 diag = alloca (k * sizeof (mantissa_store_t));
677 mantissa_store_t d = 0;
678 for (i = 1; i <= ip; i++)
680 d += X[i] * (mantissa_store_t) Y[i];
681 diag[i] = d;
683 while (i < k)
684 diag[i++] = d;
686 while (k > p2)
688 long lim = k / 2;
690 if (k % 2 == 0)
691 /* We want to add this only once, but since we subtract it in the sum
692 of products above, we add twice. */
693 zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
695 for (i = k - p2, j = p2; i < j; i++, j--)
696 zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
698 zk -= diag[k - 1];
700 DIV_RADIX (zk, Z[k]);
701 k--;
704 /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
705 goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the
706 number of multiplications, we halve the range and if k is an even number,
707 add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute
708 X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
710 This reduction tells us that we're summing two things, the first term
711 through the half range and the negative of the sum of the product of all
712 terms of X and Y in the full range. i.e.
714 SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in
715 a single loop so that it completes in O(n) time and can hence be directly
716 used in the loop below. */
717 while (k > 1)
719 long lim = k / 2;
721 if (k % 2 == 0)
722 /* We want to add this only once, but since we subtract it in the sum
723 of products above, we add twice. */
724 zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
726 for (i = 1, j = k - 1; i < j; i++, j--)
727 zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
729 zk -= diag[k - 1];
731 DIV_RADIX (zk, Z[k]);
732 k--;
734 Z[k] = zk;
736 /* Get the exponent sum into an intermediate variable. This is a subtle
737 optimization, where given enough registers, all operations on the exponent
738 happen in registers and the result is written out only once into EZ. */
739 int e = EX + EY;
741 /* Is there a carry beyond the most significant digit? */
742 if (__glibc_unlikely (Z[1] == 0))
744 for (i = 1; i <= p2; i++)
745 Z[i] = Z[i + 1];
746 e--;
749 EZ = e;
750 Z[0] = X[0] * Y[0];
752 #endif
754 #ifndef NO__SQR
755 /* Square *X and store result in *Y. X and Y may not overlap. For P in
756 [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
757 error is bounded by 1.001 ULP. This is a faster special case of
758 multiplication. */
759 void
760 SECTION
761 __sqr (const mp_no *x, mp_no *y, int p)
763 long i, j, k, ip;
764 mantissa_store_t yk;
766 /* Is z=0? */
767 if (__glibc_unlikely (X[0] == 0))
769 Y[0] = 0;
770 return;
773 /* We need not iterate through all X's since it's pointless to
774 multiply zeroes. */
775 for (ip = p; ip > 0; ip--)
776 if (X[ip] != 0)
777 break;
779 k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
781 while (k > 2 * ip + 1)
782 Y[k--] = 0;
784 yk = 0;
786 while (k > p)
788 mantissa_store_t yk2 = 0;
789 long lim = k / 2;
791 if (k % 2 == 0)
792 yk += X[lim] * (mantissa_store_t) X[lim];
794 /* In __mul, this loop (and the one within the next while loop) run
795 between a range to calculate the mantissa as follows:
797 Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
798 + X[n] * Y[k]
800 For X == Y, we can get away with summing halfway and doubling the
801 result. For cases where the range size is even, the mid-point needs
802 to be added separately (above). */
803 for (i = k - p, j = p; i < j; i++, j--)
804 yk2 += X[i] * (mantissa_store_t) X[j];
806 yk += 2 * yk2;
808 DIV_RADIX (yk, Y[k]);
809 k--;
812 while (k > 1)
814 mantissa_store_t yk2 = 0;
815 long lim = k / 2;
817 if (k % 2 == 0)
818 yk += X[lim] * (mantissa_store_t) X[lim];
820 /* Likewise for this loop. */
821 for (i = 1, j = k - 1; i < j; i++, j--)
822 yk2 += X[i] * (mantissa_store_t) X[j];
824 yk += 2 * yk2;
826 DIV_RADIX (yk, Y[k]);
827 k--;
829 Y[k] = yk;
831 /* Squares are always positive. */
832 Y[0] = 1;
834 /* Get the exponent sum into an intermediate variable. This is a subtle
835 optimization, where given enough registers, all operations on the exponent
836 happen in registers and the result is written out only once into EZ. */
837 int e = EX * 2;
839 /* Is there a carry beyond the most significant digit? */
840 if (__glibc_unlikely (Y[1] == 0))
842 for (i = 1; i <= p; i++)
843 Y[i] = Y[i + 1];
844 e--;
847 EY = e;
849 #endif
851 /* Invert *X and store in *Y. Relative error bound:
852 - For P = 2: 1.001 * R ^ (1 - P)
853 - For P = 3: 1.063 * R ^ (1 - P)
854 - For P > 3: 2.001 * R ^ (1 - P)
856 *X = 0 is not permissible. */
857 static void
858 SECTION
859 __inv (const mp_no *x, mp_no *y, int p)
861 long i;
862 double t;
863 mp_no z, w;
864 static const int np1[] =
865 { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
866 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
869 __cpy (x, &z, p);
870 z.e = 0;
871 __mp_dbl (&z, &t, p);
872 t = 1 / t;
873 __dbl_mp (t, y, p);
874 EY -= EX;
876 for (i = 0; i < np1[p]; i++)
878 __cpy (y, &w, p);
879 __mul (x, &w, y, p);
880 __sub (&__mptwo, y, &z, p);
881 __mul (&w, &z, y, p);
885 /* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
886 or Y and Z. Relative error bound:
887 - For P = 2: 2.001 * R ^ (1 - P)
888 - For P = 3: 2.063 * R ^ (1 - P)
889 - For P > 3: 3.001 * R ^ (1 - P)
891 *X = 0 is not permissible. */
892 void
893 SECTION
894 __dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
896 mp_no w;
898 if (X[0] == 0)
899 Z[0] = 0;
900 else
902 __inv (y, &w, p);
903 __mul (x, &w, z, p);