2.9
[glibc/nacl-glibc.git] / sysdeps / powerpc / powerpc32 / power4 / fpu / mpa.c
blob4a232e27bfb1720508036c58d7586ac58766eb2c
2 /*
3 * IBM Accurate Mathematical Library
4 * written by International Business Machines Corp.
5 * Copyright (C) 2001, 2006 Free Software Foundation
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 /************************************************************************/
22 /* MODULE_NAME: mpa.c */
23 /* */
24 /* FUNCTIONS: */
25 /* mcr */
26 /* acr */
27 /* cr */
28 /* cpy */
29 /* cpymn */
30 /* norm */
31 /* denorm */
32 /* mp_dbl */
33 /* dbl_mp */
34 /* add_magnitudes */
35 /* sub_magnitudes */
36 /* add */
37 /* sub */
38 /* mul */
39 /* inv */
40 /* dvd */
41 /* */
42 /* Arithmetic functions for multiple precision numbers. */
43 /* Relative errors are bounded */
44 /************************************************************************/
47 #include "endian.h"
48 #include "mpa.h"
49 #include "mpa2.h"
50 #include <sys/param.h> /* For MIN() */
51 /* mcr() compares the sizes of the mantissas of two multiple precision */
52 /* numbers. Mantissas are compared regardless of the signs of the */
53 /* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also */
54 /* disregarded. */
55 static int mcr(const mp_no *x, const mp_no *y, int p) {
56 long i;
57 long p2 = p;
58 for (i=1; i<=p2; i++) {
59 if (X[i] == Y[i]) continue;
60 else if (X[i] > Y[i]) return 1;
61 else return -1; }
62 return 0;
67 /* acr() compares the absolute values of two multiple precision numbers */
68 int __acr(const mp_no *x, const mp_no *y, int p) {
69 long i;
71 if (X[0] == ZERO) {
72 if (Y[0] == ZERO) i= 0;
73 else i=-1;
75 else if (Y[0] == ZERO) i= 1;
76 else {
77 if (EX > EY) i= 1;
78 else if (EX < EY) i=-1;
79 else i= mcr(x,y,p);
82 return i;
86 /* cr90 compares the values of two multiple precision numbers */
87 int __cr(const mp_no *x, const mp_no *y, int p) {
88 int i;
90 if (X[0] > Y[0]) i= 1;
91 else if (X[0] < Y[0]) i=-1;
92 else if (X[0] < ZERO ) i= __acr(y,x,p);
93 else i= __acr(x,y,p);
95 return i;
99 /* Copy a multiple precision number. Set *y=*x. x=y is permissible. */
100 void __cpy(const mp_no *x, mp_no *y, int p) {
101 long i;
103 EY = EX;
104 for (i=0; i <= p; i++) Y[i] = X[i];
106 return;
110 /* Copy a multiple precision number x of precision m into a */
111 /* multiple precision number y of precision n. In case n>m, */
112 /* the digits of y beyond the m'th are set to zero. In case */
113 /* n<m, the digits of x beyond the n'th are ignored. */
114 /* x=y is permissible. */
116 void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
118 long i,k;
119 long n2 = n;
120 long m2 = m;
122 EY = EX; k=MIN(m2,n2);
123 for (i=0; i <= k; i++) Y[i] = X[i];
124 for ( ; i <= n2; i++) Y[i] = ZERO;
126 return;
129 /* Convert a multiple precision number *x into a double precision */
130 /* number *y, normalized case (|x| >= 2**(-1022))) */
131 static void norm(const mp_no *x, double *y, int p)
133 #define R radixi.d
134 long i;
135 #if 0
136 int k;
137 #endif
138 double a,c,u,v,z[5];
139 if (p<5) {
140 if (p==1) c = X[1];
141 else if (p==2) c = X[1] + R* X[2];
142 else if (p==3) c = X[1] + R*(X[2] + R* X[3]);
143 else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]);
145 else {
146 for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
147 {a *= TWO; z[1] *= TWO; }
149 for (i=2; i<5; i++) {
150 z[i] = X[i]*a;
151 u = (z[i] + CUTTER)-CUTTER;
152 if (u > z[i]) u -= RADIX;
153 z[i] -= u;
154 z[i-1] += u*RADIXI;
157 u = (z[3] + TWO71) - TWO71;
158 if (u > z[3]) u -= TWO19;
159 v = z[3]-u;
161 if (v == TWO18) {
162 if (z[4] == ZERO) {
163 for (i=5; i <= p; i++) {
164 if (X[i] == ZERO) continue;
165 else {z[3] += ONE; break; }
168 else z[3] += ONE;
171 c = (z[1] + R *(z[2] + R * z[3]))/a;
174 c *= X[0];
176 for (i=1; i<EX; i++) c *= RADIX;
177 for (i=1; i>EX; i--) c *= RADIXI;
179 *y = c;
180 return;
181 #undef R
184 /* Convert a multiple precision number *x into a double precision */
185 /* number *y, denormalized case (|x| < 2**(-1022))) */
186 static void denorm(const mp_no *x, double *y, int p)
188 long i,k;
189 long p2 = p;
190 double c,u,z[5];
191 #if 0
192 double a,v;
193 #endif
195 #define R radixi.d
196 if (EX<-44 || (EX==-44 && X[1]<TWO5))
197 { *y=ZERO; return; }
199 if (p2==1) {
200 if (EX==-42) {z[1]=X[1]+TWO10; z[2]=ZERO; z[3]=ZERO; k=3;}
201 else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=ZERO; k=2;}
202 else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;}
204 else if (p2==2) {
205 if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; z[3]=ZERO; k=3;}
206 else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=X[2]; k=2;}
207 else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;}
209 else {
210 if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; k=3;}
211 else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; k=2;}
212 else {z[1]= TWO10; z[2]=ZERO; k=1;}
213 z[3] = X[k];
216 u = (z[3] + TWO57) - TWO57;
217 if (u > z[3]) u -= TWO5;
219 if (u==z[3]) {
220 for (i=k+1; i <= p2; i++) {
221 if (X[i] == ZERO) continue;
222 else {z[3] += ONE; break; }
226 c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10);
228 *y = c*TWOM1032;
229 return;
231 #undef R
234 /* Convert a multiple precision number *x into a double precision number *y. */
235 /* The result is correctly rounded to the nearest/even. *x is left unchanged */
237 void __mp_dbl(const mp_no *x, double *y, int p) {
238 #if 0
239 int i,k;
240 double a,c,u,v,z[5];
241 #endif
243 if (X[0] == ZERO) {*y = ZERO; return; }
245 if (EX> -42) norm(x,y,p);
246 else if (EX==-42 && X[1]>=TWO10) norm(x,y,p);
247 else denorm(x,y,p);
251 /* dbl_mp() converts a double precision number x into a multiple precision */
252 /* number *y. If the precision p is too small the result is truncated. x is */
253 /* left unchanged. */
255 void __dbl_mp(double x, mp_no *y, int p) {
257 long i,n;
258 long p2 = p;
259 double u;
261 /* Sign */
262 if (x == ZERO) {Y[0] = ZERO; return; }
263 else if (x > ZERO) Y[0] = ONE;
264 else {Y[0] = MONE; x=-x; }
266 /* Exponent */
267 for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI;
268 for ( ; x < ONE; EY -= ONE) x *= RADIX;
270 /* Digits */
271 n=MIN(p2,4);
272 for (i=1; i<=n; i++) {
273 u = (x + TWO52) - TWO52;
274 if (u>x) u -= ONE;
275 Y[i] = u; x -= u; x *= RADIX; }
276 for ( ; i<=p2; i++) Y[i] = ZERO;
277 return;
281 /* add_magnitudes() adds the magnitudes of *x & *y assuming that */
282 /* abs(*x) >= abs(*y) > 0. */
283 /* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */
284 /* No guard digit is used. The result equals the exact sum, truncated. */
285 /* *x & *y are left unchanged. */
287 static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
289 long i,j,k;
290 long p2 = p;
292 EZ = EX;
294 i=p2; j=p2+ EY - EX; k=p2+1;
296 if (j<1)
297 {__cpy(x,z,p); return; }
298 else Z[k] = ZERO;
300 for (; j>0; i--,j--) {
301 Z[k] += X[i] + Y[j];
302 if (Z[k] >= RADIX) {
303 Z[k] -= RADIX;
304 Z[--k] = ONE; }
305 else
306 Z[--k] = ZERO;
309 for (; i>0; i--) {
310 Z[k] += X[i];
311 if (Z[k] >= RADIX) {
312 Z[k] -= RADIX;
313 Z[--k] = ONE; }
314 else
315 Z[--k] = ZERO;
318 if (Z[1] == ZERO) {
319 for (i=1; i<=p2; i++) Z[i] = Z[i+1]; }
320 else EZ += ONE;
324 /* sub_magnitudes() subtracts the magnitudes of *x & *y assuming that */
325 /* abs(*x) > abs(*y) > 0. */
326 /* The sign of the difference *z is undefined. x&y may overlap but not x&z */
327 /* or y&z. One guard digit is used. The error is less than one ulp. */
328 /* *x & *y are left unchanged. */
330 static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
332 long i,j,k;
333 long p2 = p;
335 EZ = EX;
337 if (EX == EY) {
338 i=j=k=p2;
339 Z[k] = Z[k+1] = ZERO; }
340 else {
341 j= EX - EY;
342 if (j > p2) {__cpy(x,z,p); return; }
343 else {
344 i=p2; j=p2+1-j; k=p2;
345 if (Y[j] > ZERO) {
346 Z[k+1] = RADIX - Y[j--];
347 Z[k] = MONE; }
348 else {
349 Z[k+1] = ZERO;
350 Z[k] = ZERO; j--;}
354 for (; j>0; i--,j--) {
355 Z[k] += (X[i] - Y[j]);
356 if (Z[k] < ZERO) {
357 Z[k] += RADIX;
358 Z[--k] = MONE; }
359 else
360 Z[--k] = ZERO;
363 for (; i>0; i--) {
364 Z[k] += X[i];
365 if (Z[k] < ZERO) {
366 Z[k] += RADIX;
367 Z[--k] = MONE; }
368 else
369 Z[--k] = ZERO;
372 for (i=1; Z[i] == ZERO; i++) ;
373 EZ = EZ - i + 1;
374 for (k=1; i <= p2+1; )
375 Z[k++] = Z[i++];
376 for (; k <= p2; )
377 Z[k++] = ZERO;
379 return;
383 /* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap */
384 /* but not x&z or y&z. One guard digit is used. The error is less than */
385 /* one ulp. *x & *y are left unchanged. */
387 void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
389 int n;
391 if (X[0] == ZERO) {__cpy(y,z,p); return; }
392 else if (Y[0] == ZERO) {__cpy(x,z,p); return; }
394 if (X[0] == Y[0]) {
395 if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; }
396 else {add_magnitudes(y,x,z,p); Z[0] = Y[0]; }
398 else {
399 if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; }
400 else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; }
401 else Z[0] = ZERO;
403 return;
407 /* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */
408 /* overlap but not x&z or y&z. One guard digit is used. The error is */
409 /* less than one ulp. *x & *y are left unchanged. */
411 void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
413 int n;
415 if (X[0] == ZERO) {__cpy(y,z,p); Z[0] = -Z[0]; return; }
416 else if (Y[0] == ZERO) {__cpy(x,z,p); return; }
418 if (X[0] != Y[0]) {
419 if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; }
420 else {add_magnitudes(y,x,z,p); Z[0] = -Y[0]; }
422 else {
423 if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; }
424 else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; }
425 else Z[0] = ZERO;
427 return;
431 /* Multiply two multiple precision numbers. *z is set to *x * *y. x&y */
432 /* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is */
433 /* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp. */
434 /* *x & *y are left unchanged. */
436 void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
438 long i, i1, i2, j, k, k2;
439 long p2 = p;
440 double u, zk, zk2;
442 /* Is z=0? */
443 if (X[0]*Y[0]==ZERO)
444 { Z[0]=ZERO; return; }
446 /* Multiply, add and carry */
447 k2 = (p2<3) ? p2+p2 : p2+3;
448 zk = Z[k2]=ZERO;
449 for (k=k2; k>1; ) {
450 if (k > p2) {i1=k-p2; i2=p2+1; }
451 else {i1=1; i2=k; }
452 #if 1
453 /* rearange this inner loop to allow the fmadd instructions to be
454 independent and execute in parallel on processors that have
455 dual symetrical FP pipelines. */
456 if (i1 < (i2-1))
458 /* make sure we have at least 2 iterations */
459 if (((i2 - i1) & 1L) == 1L)
461 /* Handle the odd iterations case. */
462 zk2 = x->d[i2-1]*y->d[i1];
464 else
465 zk2 = zero.d;
466 /* Do two multiply/adds per loop iteration, using independent
467 accumulators; zk and zk2. */
468 for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2)
470 zk += x->d[i]*y->d[j];
471 zk2 += x->d[i+1]*y->d[j-1];
473 zk += zk2; /* final sum. */
475 else
477 /* Special case when iterations is 1. */
478 zk += x->d[i1]*y->d[i1];
480 #else
481 /* The orginal code. */
482 for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j];
483 #endif
485 u = (zk + CUTTER)-CUTTER;
486 if (u > zk) u -= RADIX;
487 Z[k] = zk - u;
488 zk = u*RADIXI;
489 --k;
491 Z[k] = zk;
493 /* Is there a carry beyond the most significant digit? */
494 if (Z[1] == ZERO) {
495 for (i=1; i<=p2; i++) Z[i]=Z[i+1];
496 EZ = EX + EY - 1; }
497 else
498 EZ = EX + EY;
500 Z[0] = X[0] * Y[0];
501 return;
505 /* Invert a multiple precision number. Set *y = 1 / *x. */
506 /* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3, */
507 /* 2.001*r**(1-p) for p>3. */
508 /* *x=0 is not permissible. *x is left unchanged. */
510 void __inv(const mp_no *x, mp_no *y, int p) {
511 long i;
512 #if 0
513 int l;
514 #endif
515 double t;
516 mp_no z,w;
517 static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
518 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
519 const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
520 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
521 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
522 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
524 __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p);
525 t=ONE/t; __dbl_mp(t,y,p); EY -= EX;
527 for (i=0; i<np1[p]; i++) {
528 __cpy(y,&w,p);
529 __mul(x,&w,y,p);
530 __sub(&mptwo,y,&z,p);
531 __mul(&w,&z,y,p);
533 return;
537 /* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */
538 /* are left unchanged. x&y may overlap but not x&z or y&z. */
539 /* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3 */
540 /* and 3.001*r**(1-p) for p>3. *y=0 is not permissible. */
542 void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
544 mp_no w;
546 if (X[0] == ZERO) Z[0] = ZERO;
547 else {__inv(y,&w,p); __mul(x,&w,z,p);}
548 return;