Merge commit '7d815089a43a963b49eaddf97e514194ec29805b'
[unleashed.git] / usr / src / lib / libmvec / common / __vsincos.c
blob4db5f5a004a01c6ab563ef72b78be487d58b9a70
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
48 * vsincos.c
50 * Vector sine and cosine function. Just slight modifications to vcos.c.
53 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
55 static const double
56 half[2] = { 0.5, -0.5 },
57 one = 1.0,
58 invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */
59 pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */
60 pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
61 pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
62 pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
63 pp1 = -1.666666666605760465276263943134982554676e-0001,
64 pp2 = 8.333261209690963126718376566146180944442e-0003,
65 qq1 = -4.999999999977710986407023955908711557870e-0001,
66 qq2 = 4.166654863857219350645055881018842089580e-0002,
67 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
68 -4.999999999999931701464060878888294524481e-0001 },
69 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
70 4.166666666394861917535640593963708222319e-0002 },
71 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
72 -1.388888552656142867832756687736851681462e-0003 },
73 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
74 2.478519423681460796618128289454530524759e-0005 };
76 /* Don't __ the following; acomp will handle it */
77 extern double fabs(double);
78 extern void __vlibm_vsincos_big(int, double *, int, double *, int, double *, int, int);
81 * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n.
82 * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
84 * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
85 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
87 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
89 void
90 __vsincos(int n, double * restrict x, int stridex,
91 double * restrict y, int stridey,
92 double * restrict c, int stridec)
94 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
95 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
96 double x0, x1, x2,
97 *py0, *py1, *py2,
98 *pc0, *pc1, *pc2,
99 *xsave, *ysave, *csave;
100 unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2;
101 int i, biguns, nsave, sxsave, sysave, scsave;
102 volatile int v __unused;
103 nsave = n;
104 xsave = x;
105 sxsave = stridex;
106 ysave = y;
107 sysave = stridey;
108 csave = c;
109 scsave = stridec;
110 biguns = 0;
112 do /* MAIN LOOP */
115 /* Gotos here so _break_ exits MAIN LOOP. */
116 LOOP0: /* Find first arg in right range. */
117 xsb0 = HI(x); /* get most significant word */
118 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
119 if (hx0 > 0x3fe921fb) {
120 /* Too big: arg reduction needed, so leave for second part */
121 biguns = 1;
122 x += stridex;
123 y += stridey;
124 c += stridec;
125 i = 0;
126 if (--n <= 0)
127 break;
128 goto LOOP0;
130 if (hx0 < 0x3e400000) {
131 /* Too small. cos x ~ 1, sin x ~ x. */
132 v = *x;
133 *c = 1.0;
134 *y = *x;
135 x += stridex;
136 y += stridey;
137 c += stridec;
138 i = 0;
139 if (--n <= 0)
140 break;
141 goto LOOP0;
143 x0 = *x;
144 py0 = y;
145 pc0 = c;
146 x += stridex;
147 y += stridey;
148 c += stridec;
149 i = 1;
150 if (--n <= 0)
151 break;
153 LOOP1: /* Get second arg, same as above. */
154 xsb1 = HI(x);
155 hx1 = xsb1 & ~0x80000000;
156 if (hx1 > 0x3fe921fb)
158 biguns = 1;
159 x += stridex;
160 y += stridey;
161 c += stridec;
162 i = 1;
163 if (--n <= 0)
164 break;
165 goto LOOP1;
167 if (hx1 < 0x3e400000)
169 v = *x;
170 *c = 1.0;
171 *y = *x;
172 x += stridex;
173 y += stridey;
174 c += stridec;
175 i = 1;
176 if (--n <= 0)
177 break;
178 goto LOOP1;
180 x1 = *x;
181 py1 = y;
182 pc1 = c;
183 x += stridex;
184 y += stridey;
185 c += stridec;
186 i = 2;
187 if (--n <= 0)
188 break;
190 LOOP2: /* Get third arg, same as above. */
191 xsb2 = HI(x);
192 hx2 = xsb2 & ~0x80000000;
193 if (hx2 > 0x3fe921fb)
195 biguns = 1;
196 x += stridex;
197 y += stridey;
198 c += stridec;
199 i = 2;
200 if (--n <= 0)
201 break;
202 goto LOOP2;
204 if (hx2 < 0x3e400000)
206 v = *x;
207 *c = 1.0;
208 *y = *x;
209 x += stridex;
210 y += stridey;
211 c += stridec;
212 i = 2;
213 if (--n <= 0)
214 break;
215 goto LOOP2;
217 x2 = *x;
218 py2 = y;
219 pc2 = c;
222 * 0x3fc40000 = 5/32 ~ 0.15625
223 * Get msb after subtraction. Will be 1 only if
224 * hx0 - 5/32 is negative.
226 i = (hx2 - 0x3fc40000) >> 31;
227 i |= ((hx1 - 0x3fc40000) >> 30) & 2;
228 i |= ((hx0 - 0x3fc40000) >> 29) & 4;
229 switch (i)
231 double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
232 double w0, w1, w2;
233 double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
234 double z0, z1, z2;
235 unsigned j0, j1, j2;
237 case 0: /* All are > 5/32 */
238 j0 = (xsb0 + 0x4000) & 0xffff8000;
239 j1 = (xsb1 + 0x4000) & 0xffff8000;
240 j2 = (xsb2 + 0x4000) & 0xffff8000;
242 HI(&t0) = j0;
243 HI(&t1) = j1;
244 HI(&t2) = j2;
245 LO(&t0) = 0;
246 LO(&t1) = 0;
247 LO(&t2) = 0;
249 x0 -= t0;
250 x1 -= t1;
251 x2 -= t2;
253 z0 = x0 * x0;
254 z1 = x1 * x1;
255 z2 = x2 * x2;
257 t0 = z0 * (qq1 + z0 * qq2);
258 t1 = z1 * (qq1 + z1 * qq2);
259 t2 = z2 * (qq1 + z2 * qq2);
261 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
262 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
263 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
265 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
266 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
267 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
269 xsb0 = (xsb0 >> 30) & 2;
270 xsb1 = (xsb1 >> 30) & 2;
271 xsb2 = (xsb2 >> 30) & 2;
273 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
274 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
275 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
277 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
278 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
279 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
280 /* cos_lo(t) */
281 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
282 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
283 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
285 *pc0 = a2_0 + t2_0;
286 *pc1 = a2_1 + t2_1;
287 *pc2 = a2_2 + t2_2;
289 t1_0 = a2_0*w0 + a1_0*t0;
290 t1_1 = a2_1*w1 + a1_1*t1;
291 t1_2 = a2_2*w2 + a1_2*t2;
293 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
294 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
295 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
297 *py0 = a1_0 + t1_0;
298 *py1 = a1_1 + t1_1;
299 *py2 = a1_2 + t1_2;
301 break;
303 case 1:
304 j0 = (xsb0 + 0x4000) & 0xffff8000;
305 j1 = (xsb1 + 0x4000) & 0xffff8000;
306 HI(&t0) = j0;
307 HI(&t1) = j1;
308 LO(&t0) = 0;
309 LO(&t1) = 0;
310 x0 -= t0;
311 x1 -= t1;
312 z0 = x0 * x0;
313 z1 = x1 * x1;
314 z2 = x2 * x2;
315 t0 = z0 * (qq1 + z0 * qq2);
316 t1 = z1 * (qq1 + z1 * qq2);
317 t2 = z2 * (poly3[1] + z2 * poly4[1]);
318 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
319 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
320 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
321 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
322 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
323 xsb0 = (xsb0 >> 30) & 2;
324 xsb1 = (xsb1 >> 30) & 2;
326 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
327 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
329 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
330 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
331 /* cos_lo(t) */
332 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
333 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
335 *pc0 = a2_0 + t2_0;
336 *pc1 = a2_1 + t2_1;
337 *pc2 = one + t2;
339 t1_0 = a2_0*w0 + a1_0*t0;
340 t1_1 = a2_1*w1 + a1_1*t1;
341 t2 = z2 * (poly3[0] + z2 * poly4[0]);
343 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
344 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
345 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
347 *py0 = a1_0 + t1_0;
348 *py1 = a1_1 + t1_1;
349 t2 = x2 + x2 * t2;
350 *py2 = t2;
352 break;
354 case 2:
355 j0 = (xsb0 + 0x4000) & 0xffff8000;
356 j2 = (xsb2 + 0x4000) & 0xffff8000;
357 HI(&t0) = j0;
358 HI(&t2) = j2;
359 LO(&t0) = 0;
360 LO(&t2) = 0;
361 x0 -= t0;
362 x2 -= t2;
363 z0 = x0 * x0;
364 z1 = x1 * x1;
365 z2 = x2 * x2;
366 t0 = z0 * (qq1 + z0 * qq2);
367 t1 = z1 * (poly3[1] + z1 * poly4[1]);
368 t2 = z2 * (qq1 + z2 * qq2);
369 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
370 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
371 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
372 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
373 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
374 xsb0 = (xsb0 >> 30) & 2;
375 xsb2 = (xsb2 >> 30) & 2;
377 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
378 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
380 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
381 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
382 /* cos_lo(t) */
383 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
384 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
386 *pc0 = a2_0 + t2_0;
387 *pc1 = one + t1;
388 *pc2 = a2_2 + t2_2;
390 t1_0 = a2_0*w0 + a1_0*t0;
391 t1 = z1 * (poly3[0] + z1 * poly4[0]);
392 t1_2 = a2_2*w2 + a1_2*t2;
394 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
395 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
396 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
398 *py0 = a1_0 + t1_0;
399 t1 = x1 + x1 * t1;
400 *py1 = t1;
401 *py2 = a1_2 + t1_2;
403 break;
405 case 3:
406 j0 = (xsb0 + 0x4000) & 0xffff8000;
407 HI(&t0) = j0;
408 LO(&t0) = 0;
409 x0 -= t0;
410 z0 = x0 * x0;
411 z1 = x1 * x1;
412 z2 = x2 * x2;
413 t0 = z0 * (qq1 + z0 * qq2);
414 t1 = z1 * (poly3[1] + z1 * poly4[1]);
415 t2 = z2 * (poly3[1] + z2 * poly4[1]);
416 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
417 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
418 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
419 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
420 xsb0 = (xsb0 >> 30) & 2;
421 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
423 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
425 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
427 *pc0 = a2_0 + t2_0;
428 *pc1 = one + t1;
429 *pc2 = one + t2;
431 t1_0 = a2_0*w0 + a1_0*t0;
432 t1 = z1 * (poly3[0] + z1 * poly4[0]);
433 t2 = z2 * (poly3[0] + z2 * poly4[0]);
435 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
436 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
437 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
439 *py0 = a1_0 + t1_0;
440 t1 = x1 + x1 * t1;
441 *py1 = t1;
442 t2 = x2 + x2 * t2;
443 *py2 = t2;
445 break;
447 case 4:
448 j1 = (xsb1 + 0x4000) & 0xffff8000;
449 j2 = (xsb2 + 0x4000) & 0xffff8000;
450 HI(&t1) = j1;
451 HI(&t2) = j2;
452 LO(&t1) = 0;
453 LO(&t2) = 0;
454 x1 -= t1;
455 x2 -= t2;
456 z0 = x0 * x0;
457 z1 = x1 * x1;
458 z2 = x2 * x2;
459 t0 = z0 * (poly3[1] + z0 * poly4[1]);
460 t1 = z1 * (qq1 + z1 * qq2);
461 t2 = z2 * (qq1 + z2 * qq2);
462 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
463 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
464 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
465 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
466 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
467 xsb1 = (xsb1 >> 30) & 2;
468 xsb2 = (xsb2 >> 30) & 2;
470 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
471 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
473 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
474 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
475 /* cos_lo(t) */
476 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
477 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
479 *pc0 = one + t0;
480 *pc1 = a2_1 + t2_1;
481 *pc2 = a2_2 + t2_2;
483 t0 = z0 * (poly3[0] + z0 * poly4[0]);
484 t1_1 = a2_1*w1 + a1_1*t1;
485 t1_2 = a2_2*w2 + a1_2*t2;
487 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
488 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
489 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
491 t0 = x0 + x0 * t0;
492 *py0 = t0;
493 *py1 = a1_1 + t1_1;
494 *py2 = a1_2 + t1_2;
496 break;
498 case 5:
499 j1 = (xsb1 + 0x4000) & 0xffff8000;
500 HI(&t1) = j1;
501 LO(&t1) = 0;
502 x1 -= t1;
503 z0 = x0 * x0;
504 z1 = x1 * x1;
505 z2 = x2 * x2;
506 t0 = z0 * (poly3[1] + z0 * poly4[1]);
507 t1 = z1 * (qq1 + z1 * qq2);
508 t2 = z2 * (poly3[1] + z2 * poly4[1]);
509 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
510 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
511 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
512 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
513 xsb1 = (xsb1 >> 30) & 2;
515 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
517 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
519 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
521 *pc0 = one + t0;
522 *pc1 = a2_1 + t2_1;
523 *pc2 = one + t2;
525 t0 = z0 * (poly3[0] + z0 * poly4[0]);
526 t1_1 = a2_1*w1 + a1_1*t1;
527 t2 = z2 * (poly3[0] + z2 * poly4[0]);
529 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
530 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
531 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
533 t0 = x0 + x0 * t0;
534 *py0 = t0;
535 *py1 = a1_1 + t1_1;
536 t2 = x2 + x2 * t2;
537 *py2 = t2;
539 break;
541 case 6:
542 j2 = (xsb2 + 0x4000) & 0xffff8000;
543 HI(&t2) = j2;
544 LO(&t2) = 0;
545 x2 -= t2;
546 z0 = x0 * x0;
547 z1 = x1 * x1;
548 z2 = x2 * x2;
549 t0 = z0 * (poly3[1] + z0 * poly4[1]);
550 t1 = z1 * (poly3[1] + z1 * poly4[1]);
551 t2 = z2 * (qq1 + z2 * qq2);
552 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
553 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
554 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
555 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
556 xsb2 = (xsb2 >> 30) & 2;
557 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
559 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
561 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
563 *pc0 = one + t0;
564 *pc1 = one + t1;
565 *pc2 = a2_2 + t2_2;
567 t0 = z0 * (poly3[0] + z0 * poly4[0]);
568 t1 = z1 * (poly3[0] + z1 * poly4[0]);
569 t1_2 = a2_2*w2 + a1_2*t2;
571 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
572 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
573 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
575 t0 = x0 + x0 * t0;
576 *py0 = t0;
577 t1 = x1 + x1 * t1;
578 *py1 = t1;
579 *py2 = a1_2 + t1_2;
581 break;
583 case 7: /* All are < 5/32 */
584 z0 = x0 * x0;
585 z1 = x1 * x1;
586 z2 = x2 * x2;
587 t0 = z0 * (poly3[1] + z0 * poly4[1]);
588 t1 = z1 * (poly3[1] + z1 * poly4[1]);
589 t2 = z2 * (poly3[1] + z2 * poly4[1]);
590 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
591 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
592 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
593 *pc0 = one + t0;
594 *pc1 = one + t1;
595 *pc2 = one + t2;
596 t0 = z0 * (poly3[0] + z0 * poly4[0]);
597 t1 = z1 * (poly3[0] + z1 * poly4[0]);
598 t2 = z2 * (poly3[0] + z2 * poly4[0]);
599 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
600 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
601 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
602 t0 = x0 + x0 * t0;
603 t1 = x1 + x1 * t1;
604 t2 = x2 + x2 * t2;
605 *py0 = t0;
606 *py1 = t1;
607 *py2 = t2;
608 break;
611 x += stridex;
612 y += stridey;
613 c += stridec;
614 i = 0;
615 } while (--n > 0); /* END MAIN LOOP */
618 * CLEAN UP last 0, 1, or 2 elts.
620 if (i > 0) /* Clean up elts at tail. i < 3. */
622 double a1_0, a1_1, a2_0, a2_1;
623 double w0, w1;
624 double t0, t1, t1_0, t1_1, t2_0, t2_1;
625 double z0, z1;
626 unsigned j0, j1;
628 if (i > 1)
630 if (hx1 < 0x3fc40000)
632 z1 = x1 * x1;
633 t1 = z1 * (poly3[1] + z1 * poly4[1]);
634 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
635 t1 = one + t1;
636 *pc1 = t1;
637 t1 = z1 * (poly3[0] + z1 * poly4[0]);
638 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
639 t1 = x1 + x1 * t1;
640 *py1 = t1;
642 else
644 j1 = (xsb1 + 0x4000) & 0xffff8000;
645 HI(&t1) = j1;
646 LO(&t1) = 0;
647 x1 -= t1;
648 z1 = x1 * x1;
649 t1 = z1 * (qq1 + z1 * qq2);
650 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
651 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
652 xsb1 = (xsb1 >> 30) & 2;
653 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
654 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
655 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
656 *pc1 = a2_1 + t2_1;
657 t1_1 = a2_1*w1 + a1_1*t1;
658 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
659 *py1 = a1_1 + t1_1;
662 if (hx0 < 0x3fc40000)
664 z0 = x0 * x0;
665 t0 = z0 * (poly3[1] + z0 * poly4[1]);
666 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
667 t0 = one + t0;
668 *pc0 = t0;
669 t0 = z0 * (poly3[0] + z0 * poly4[0]);
670 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
671 t0 = x0 + x0 * t0;
672 *py0 = t0;
674 else
676 j0 = (xsb0 + 0x4000) & 0xffff8000;
677 HI(&t0) = j0;
678 LO(&t0) = 0;
679 x0 -= t0;
680 z0 = x0 * x0;
681 t0 = z0 * (qq1 + z0 * qq2);
682 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
683 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
684 xsb0 = (xsb0 >> 30) & 2;
685 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
686 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
687 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
688 *pc0 = a2_0 + t2_0;
689 t1_0 = a2_0*w0 + a1_0*t0;
690 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
691 *py0 = a1_0 + t1_0;
693 } /* END CLEAN UP */
695 if (!biguns)
696 return;
699 * Take care of BIGUNS.
701 n = nsave;
702 x = xsave;
703 stridex = sxsave;
704 y = ysave;
705 stridey = sysave;
706 c = csave;
707 stridec = scsave;
708 biguns = 0;
710 x0_or_one[1] = 1.0;
711 x1_or_one[1] = 1.0;
712 x2_or_one[1] = 1.0;
713 x0_or_one[3] = -1.0;
714 x1_or_one[3] = -1.0;
715 x2_or_one[3] = -1.0;
716 y0_or_zero[1] = 0.0;
717 y1_or_zero[1] = 0.0;
718 y2_or_zero[1] = 0.0;
719 y0_or_zero[3] = 0.0;
720 y1_or_zero[3] = 0.0;
721 y2_or_zero[3] = 0.0;
725 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
726 unsigned hx;
727 int n0, n1, n2;
730 * Find 3 more to work on: Not already done, not too big.
732 loop0:
733 hx = HI(x);
734 xsb0 = hx >> 31;
735 hx &= ~0x80000000;
736 if (hx <= 0x3fe921fb) /* Done above. */
738 x += stridex;
739 y += stridey;
740 c += stridec;
741 i = 0;
742 if (--n <= 0)
743 break;
744 goto loop0;
746 if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */
748 if (hx >= 0x7ff00000) /* Inf or NaN */
750 x0 = *x;
751 *y = x0 - x0;
752 *c = x0 - x0;
754 else {
755 biguns = 1;
757 x += stridex;
758 y += stridey;
759 c += stridec;
760 i = 0;
761 if (--n <= 0)
762 break;
763 goto loop0;
765 x0 = *x;
766 py0 = y;
767 pc0 = c;
768 x += stridex;
769 y += stridey;
770 c += stridec;
771 i = 1;
772 if (--n <= 0)
773 break;
775 loop1:
776 hx = HI(x);
777 xsb1 = hx >> 31;
778 hx &= ~0x80000000;
779 if (hx <= 0x3fe921fb)
781 x += stridex;
782 y += stridey;
783 c += stridec;
784 i = 1;
785 if (--n <= 0)
786 break;
787 goto loop1;
789 if (hx > 0x413921fb)
791 if (hx >= 0x7ff00000)
793 x1 = *x;
794 *y = x1 - x1;
795 *c = x1 - x1;
797 else {
798 biguns = 1;
800 x += stridex;
801 y += stridey;
802 c += stridec;
803 i = 1;
804 if (--n <= 0)
805 break;
806 goto loop1;
808 x1 = *x;
809 py1 = y;
810 pc1 = c;
811 x += stridex;
812 y += stridey;
813 c += stridec;
814 i = 2;
815 if (--n <= 0)
816 break;
818 loop2:
819 hx = HI(x);
820 xsb2 = hx >> 31;
821 hx &= ~0x80000000;
822 if (hx <= 0x3fe921fb)
824 x += stridex;
825 y += stridey;
826 c += stridec;
827 i = 2;
828 if (--n <= 0)
829 break;
830 goto loop2;
832 if (hx > 0x413921fb)
834 if (hx >= 0x7ff00000)
836 x2 = *x;
837 *y = x2 - x2;
838 *c = x2 - x2;
840 else {
841 biguns = 1;
843 x += stridex;
844 y += stridey;
845 c += stridec;
846 i = 2;
847 if (--n <= 0)
848 break;
849 goto loop2;
851 x2 = *x;
852 py2 = y;
853 pc2 = c;
855 n0 = (int) (x0 * invpio2 + half[xsb0]);
856 n1 = (int) (x1 * invpio2 + half[xsb1]);
857 n2 = (int) (x2 * invpio2 + half[xsb2]);
858 fn0 = (double) n0;
859 fn1 = (double) n1;
860 fn2 = (double) n2;
861 n0 &= 3;
862 n1 &= 3;
863 n2 &= 3;
864 a0 = x0 - fn0 * pio2_1;
865 a1 = x1 - fn1 * pio2_1;
866 a2 = x2 - fn2 * pio2_1;
867 w0 = fn0 * pio2_2;
868 w1 = fn1 * pio2_2;
869 w2 = fn2 * pio2_2;
870 x0 = a0 - w0;
871 x1 = a1 - w1;
872 x2 = a2 - w2;
873 y0 = (a0 - x0) - w0;
874 y1 = (a1 - x1) - w1;
875 y2 = (a2 - x2) - w2;
876 a0 = x0;
877 a1 = x1;
878 a2 = x2;
879 w0 = fn0 * pio2_3 - y0;
880 w1 = fn1 * pio2_3 - y1;
881 w2 = fn2 * pio2_3 - y2;
882 x0 = a0 - w0;
883 x1 = a1 - w1;
884 x2 = a2 - w2;
885 y0 = (a0 - x0) - w0;
886 y1 = (a1 - x1) - w1;
887 y2 = (a2 - x2) - w2;
888 a0 = x0;
889 a1 = x1;
890 a2 = x2;
891 w0 = fn0 * pio2_3t - y0;
892 w1 = fn1 * pio2_3t - y1;
893 w2 = fn2 * pio2_3t - y2;
894 x0 = a0 - w0;
895 x1 = a1 - w1;
896 x2 = a2 - w2;
897 y0 = (a0 - x0) - w0;
898 y1 = (a1 - x1) - w1;
899 y2 = (a2 - x2) - w2;
900 xsb2 = HI(&x2);
901 i = ((xsb2 & ~0x80000000) - 0x3fc40000) >> 31;
902 xsb1 = HI(&x1);
903 i |= (((xsb1 & ~0x80000000) - 0x3fc40000) >> 30) & 2;
904 xsb0 = HI(&x0);
905 i |= (((xsb0 & ~0x80000000) - 0x3fc40000) >> 29) & 4;
906 switch (i)
908 double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
909 double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
910 double z0, z1, z2;
911 unsigned j0, j1, j2;
913 case 0:
914 j0 = (xsb0 + 0x4000) & 0xffff8000;
915 j1 = (xsb1 + 0x4000) & 0xffff8000;
916 j2 = (xsb2 + 0x4000) & 0xffff8000;
917 HI(&t0) = j0;
918 HI(&t1) = j1;
919 HI(&t2) = j2;
920 LO(&t0) = 0;
921 LO(&t1) = 0;
922 LO(&t2) = 0;
923 x0 = (x0 - t0) + y0;
924 x1 = (x1 - t1) + y1;
925 x2 = (x2 - t2) + y2;
926 z0 = x0 * x0;
927 z1 = x1 * x1;
928 z2 = x2 * x2;
929 t0 = z0 * (qq1 + z0 * qq2);
930 t1 = z1 * (qq1 + z1 * qq2);
931 t2 = z2 * (qq1 + z2 * qq2);
932 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
933 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
934 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
935 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
936 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
937 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
938 xsb0 = (xsb0 >> 30) & 2;
939 xsb1 = (xsb1 >> 30) & 2;
940 xsb2 = (xsb2 >> 30) & 2;
941 n0 ^= (xsb0 & ~(n0 << 1));
942 n1 ^= (xsb1 & ~(n1 << 1));
943 n2 ^= (xsb2 & ~(n2 << 1));
944 xsb0 |= 1;
945 xsb1 |= 1;
946 xsb2 |= 1;
948 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
949 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
950 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
952 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
953 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
954 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
956 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
957 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
958 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
960 w0 *= a2_0;
961 w1 *= a2_1;
962 w2 *= a2_2;
964 *pc0 = a2_0 + t2_0;
965 *pc1 = a2_1 + t2_1;
966 *pc2 = a2_2 + t2_2;
968 t1_0 = w0 + a1_0*t0;
969 t1_1 = w1 + a1_1*t1;
970 t1_2 = w2 + a1_2*t2;
972 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
973 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
974 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
976 *py0 = a1_0 + t1_0;
977 *py1 = a1_1 + t1_1;
978 *py2 = a1_2 + t1_2;
980 break;
982 case 1:
983 j0 = (xsb0 + 0x4000) & 0xffff8000;
984 j1 = (xsb1 + 0x4000) & 0xffff8000;
985 j2 = n2 & 1;
986 HI(&t0) = j0;
987 HI(&t1) = j1;
988 LO(&t0) = 0;
989 LO(&t1) = 0;
990 x2_or_one[0] = x2;
991 x2_or_one[2] = -x2;
992 x0 = (x0 - t0) + y0;
993 x1 = (x1 - t1) + y1;
994 y2_or_zero[0] = y2;
995 y2_or_zero[2] = -y2;
996 z0 = x0 * x0;
997 z1 = x1 * x1;
998 z2 = x2 * x2;
999 t0 = z0 * (qq1 + z0 * qq2);
1000 t1 = z1 * (qq1 + z1 * qq2);
1001 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1002 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1003 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1004 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1005 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1006 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1007 xsb0 = (xsb0 >> 30) & 2;
1008 xsb1 = (xsb1 >> 30) & 2;
1009 n0 ^= (xsb0 & ~(n0 << 1));
1010 n1 ^= (xsb1 & ~(n1 << 1));
1011 xsb0 |= 1;
1012 xsb1 |= 1;
1013 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1014 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1016 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1017 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1019 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1020 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1021 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1023 *pc0 = a2_0 + t2_0;
1024 *pc1 = a2_1 + t2_1;
1025 *py2 = t2;
1027 n2 = (n2 + 1) & 3;
1028 j2 = (j2 + 1) & 1;
1029 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1031 t1_0 = a2_0*w0 + a1_0*t0;
1032 t1_1 = a2_1*w1 + a1_1*t1;
1033 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1035 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1036 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1037 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1039 *py0 = a1_0 + t1_0;
1040 *py1 = a1_1 + t1_1;
1041 *pc2 = t2;
1043 break;
1045 case 2:
1046 j0 = (xsb0 + 0x4000) & 0xffff8000;
1047 j1 = n1 & 1;
1048 j2 = (xsb2 + 0x4000) & 0xffff8000;
1049 HI(&t0) = j0;
1050 HI(&t2) = j2;
1051 LO(&t0) = 0;
1052 LO(&t2) = 0;
1053 x1_or_one[0] = x1;
1054 x1_or_one[2] = -x1;
1055 x0 = (x0 - t0) + y0;
1056 y1_or_zero[0] = y1;
1057 y1_or_zero[2] = -y1;
1058 x2 = (x2 - t2) + y2;
1059 z0 = x0 * x0;
1060 z1 = x1 * x1;
1061 z2 = x2 * x2;
1062 t0 = z0 * (qq1 + z0 * qq2);
1063 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1064 t2 = z2 * (qq1 + z2 * qq2);
1065 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1066 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1067 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
1068 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1069 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1070 xsb0 = (xsb0 >> 30) & 2;
1071 xsb2 = (xsb2 >> 30) & 2;
1072 n0 ^= (xsb0 & ~(n0 << 1));
1073 n2 ^= (xsb2 & ~(n2 << 1));
1074 xsb0 |= 1;
1075 xsb2 |= 1;
1077 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1078 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1080 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1081 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1083 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1084 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1085 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
1087 *pc0 = a2_0 + t2_0;
1088 *py1 = t1;
1089 *pc2 = a2_2 + t2_2;
1091 n1 = (n1 + 1) & 3;
1092 j1 = (j1 + 1) & 1;
1093 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1095 t1_0 = a2_0*w0 + a1_0*t0;
1096 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1097 t1_2 = a2_2*w2 + a1_2*t2;
1099 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1100 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1101 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1103 *py0 = a1_0 + t1_0;
1104 *pc1 = t1;
1105 *py2 = a1_2 + t1_2;
1107 break;
1109 case 3:
1110 j0 = (xsb0 + 0x4000) & 0xffff8000;
1111 j1 = n1 & 1;
1112 j2 = n2 & 1;
1113 HI(&t0) = j0;
1114 LO(&t0) = 0;
1115 x1_or_one[0] = x1;
1116 x1_or_one[2] = -x1;
1117 x2_or_one[0] = x2;
1118 x2_or_one[2] = -x2;
1119 x0 = (x0 - t0) + y0;
1120 y1_or_zero[0] = y1;
1121 y1_or_zero[2] = -y1;
1122 y2_or_zero[0] = y2;
1123 y2_or_zero[2] = -y2;
1124 z0 = x0 * x0;
1125 z1 = x1 * x1;
1126 z2 = x2 * x2;
1127 t0 = z0 * (qq1 + z0 * qq2);
1128 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1129 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1130 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1131 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1132 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1133 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1134 xsb0 = (xsb0 >> 30) & 2;
1135 n0 ^= (xsb0 & ~(n0 << 1));
1136 xsb0 |= 1;
1138 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1139 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1141 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1142 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1143 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1145 *pc0 = a2_0 + t2_0;
1146 *py1 = t1;
1147 *py2 = t2;
1149 n1 = (n1 + 1) & 3;
1150 n2 = (n2 + 1) & 3;
1151 j1 = (j1 + 1) & 1;
1152 j2 = (j2 + 1) & 1;
1154 t1_0 = a2_0*w0 + a1_0*t0;
1155 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1156 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1158 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1159 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1160 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1162 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1163 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1165 *py0 = a1_0 + t1_0;
1166 *pc1 = t1;
1167 *pc2 = t2;
1169 break;
1171 case 4:
1172 j0 = n0 & 1;
1173 j1 = (xsb1 + 0x4000) & 0xffff8000;
1174 j2 = (xsb2 + 0x4000) & 0xffff8000;
1175 HI(&t1) = j1;
1176 HI(&t2) = j2;
1177 LO(&t1) = 0;
1178 LO(&t2) = 0;
1179 x0_or_one[0] = x0;
1180 x0_or_one[2] = -x0;
1181 y0_or_zero[0] = y0;
1182 y0_or_zero[2] = -y0;
1183 x1 = (x1 - t1) + y1;
1184 x2 = (x2 - t2) + y2;
1185 z0 = x0 * x0;
1186 z1 = x1 * x1;
1187 z2 = x2 * x2;
1188 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1189 t1 = z1 * (qq1 + z1 * qq2);
1190 t2 = z2 * (qq1 + z2 * qq2);
1191 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1192 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1193 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
1194 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1195 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1196 xsb1 = (xsb1 >> 30) & 2;
1197 xsb2 = (xsb2 >> 30) & 2;
1198 n1 ^= (xsb1 & ~(n1 << 1));
1199 n2 ^= (xsb2 & ~(n2 << 1));
1200 xsb1 |= 1;
1201 xsb2 |= 1;
1203 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1204 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1206 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1207 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1209 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1210 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1211 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
1213 *py0 = t0;
1214 *pc1 = a2_1 + t2_1;
1215 *pc2 = a2_2 + t2_2;
1217 n0 = (n0 + 1) & 3;
1218 j0 = (j0 + 1) & 1;
1219 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1221 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1222 t1_1 = a2_1*w1 + a1_1*t1;
1223 t1_2 = a2_2*w2 + a1_2*t2;
1225 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1226 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1227 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1229 *py1 = a1_1 + t1_1;
1230 *py2 = a1_2 + t1_2;
1231 *pc0 = t0;
1233 break;
1235 case 5:
1236 j0 = n0 & 1;
1237 j1 = (xsb1 + 0x4000) & 0xffff8000;
1238 j2 = n2 & 1;
1239 HI(&t1) = j1;
1240 LO(&t1) = 0;
1241 x0_or_one[0] = x0;
1242 x0_or_one[2] = -x0;
1243 x2_or_one[0] = x2;
1244 x2_or_one[2] = -x2;
1245 y0_or_zero[0] = y0;
1246 y0_or_zero[2] = -y0;
1247 x1 = (x1 - t1) + y1;
1248 y2_or_zero[0] = y2;
1249 y2_or_zero[2] = -y2;
1250 z0 = x0 * x0;
1251 z1 = x1 * x1;
1252 z2 = x2 * x2;
1253 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1254 t1 = z1 * (qq1 + z1 * qq2);
1255 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1256 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1257 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1258 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1259 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1260 xsb1 = (xsb1 >> 30) & 2;
1261 n1 ^= (xsb1 & ~(n1 << 1));
1262 xsb1 |= 1;
1264 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1265 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1267 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1268 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1269 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1271 *py0 = t0;
1272 *pc1 = a2_1 + t2_1;
1273 *py2 = t2;
1275 n0 = (n0 + 1) & 3;
1276 n2 = (n2 + 1) & 3;
1277 j0 = (j0 + 1) & 1;
1278 j2 = (j2 + 1) & 1;
1280 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1281 t1_1 = a2_1*w1 + a1_1*t1;
1282 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1284 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1285 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1286 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1288 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1289 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1291 *pc0 = t0;
1292 *py1 = a1_1 + t1_1;
1293 *pc2 = t2;
1295 break;
1297 case 6:
1298 j0 = n0 & 1;
1299 j1 = n1 & 1;
1300 j2 = (xsb2 + 0x4000) & 0xffff8000;
1301 HI(&t2) = j2;
1302 LO(&t2) = 0;
1303 x0_or_one[0] = x0;
1304 x0_or_one[2] = -x0;
1305 x1_or_one[0] = x1;
1306 x1_or_one[2] = -x1;
1307 y0_or_zero[0] = y0;
1308 y0_or_zero[2] = -y0;
1309 y1_or_zero[0] = y1;
1310 y1_or_zero[2] = -y1;
1311 x2 = (x2 - t2) + y2;
1312 z0 = x0 * x0;
1313 z1 = x1 * x1;
1314 z2 = x2 * x2;
1315 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1316 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1317 t2 = z2 * (qq1 + z2 * qq2);
1318 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1319 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1320 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
1321 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1322 xsb2 = (xsb2 >> 30) & 2;
1323 n2 ^= (xsb2 & ~(n2 << 1));
1324 xsb2 |= 1;
1326 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1327 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1329 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1330 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1331 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
1333 *py0 = t0;
1334 *py1 = t1;
1335 *pc2 = a2_2 + t2_2;
1337 n0 = (n0 + 1) & 3;
1338 n1 = (n1 + 1) & 3;
1339 j0 = (j0 + 1) & 1;
1340 j1 = (j1 + 1) & 1;
1342 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1343 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1344 t1_2 = a2_2*w2 + a1_2*t2;
1346 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1347 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1348 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1350 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1351 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1353 *pc0 = t0;
1354 *pc1 = t1;
1355 *py2 = a1_2 + t1_2;
1357 break;
1359 case 7:
1360 j0 = n0 & 1;
1361 j1 = n1 & 1;
1362 j2 = n2 & 1;
1363 x0_or_one[0] = x0;
1364 x0_or_one[2] = -x0;
1365 x1_or_one[0] = x1;
1366 x1_or_one[2] = -x1;
1367 x2_or_one[0] = x2;
1368 x2_or_one[2] = -x2;
1369 y0_or_zero[0] = y0;
1370 y0_or_zero[2] = -y0;
1371 y1_or_zero[0] = y1;
1372 y1_or_zero[2] = -y1;
1373 y2_or_zero[0] = y2;
1374 y2_or_zero[2] = -y2;
1375 z0 = x0 * x0;
1376 z1 = x1 * x1;
1377 z2 = x2 * x2;
1378 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1379 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1380 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1381 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1382 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1383 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1384 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1385 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1386 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1387 *py0 = t0;
1388 *py1 = t1;
1389 *py2 = t2;
1391 n0 = (n0 + 1) & 3;
1392 n1 = (n1 + 1) & 3;
1393 n2 = (n2 + 1) & 3;
1394 j0 = (j0 + 1) & 1;
1395 j1 = (j1 + 1) & 1;
1396 j2 = (j2 + 1) & 1;
1397 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1398 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1399 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1400 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1401 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1402 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1403 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1404 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1405 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1406 *pc0 = t0;
1407 *pc1 = t1;
1408 *pc2 = t2;
1409 break;
1412 x += stridex;
1413 y += stridey;
1414 c += stridec;
1415 i = 0;
1416 } while (--n > 0);
1418 if (i > 0)
1420 double a1_0, a1_1, a2_0, a2_1;
1421 double t0, t1, t1_0, t1_1, t2_0, t2_1;
1422 double fn0, fn1, a0, a1, w0, w1, y0, y1;
1423 double z0, z1;
1424 unsigned j0, j1;
1425 int n0, n1;
1427 if (i > 1)
1429 n1 = (int) (x1 * invpio2 + half[xsb1]);
1430 fn1 = (double) n1;
1431 n1 &= 3;
1432 a1 = x1 - fn1 * pio2_1;
1433 w1 = fn1 * pio2_2;
1434 x1 = a1 - w1;
1435 y1 = (a1 - x1) - w1;
1436 a1 = x1;
1437 w1 = fn1 * pio2_3 - y1;
1438 x1 = a1 - w1;
1439 y1 = (a1 - x1) - w1;
1440 a1 = x1;
1441 w1 = fn1 * pio2_3t - y1;
1442 x1 = a1 - w1;
1443 y1 = (a1 - x1) - w1;
1444 xsb1 = HI(&x1);
1445 if ((xsb1 & ~0x80000000) < 0x3fc40000)
1447 j1 = n1 & 1;
1448 x1_or_one[0] = x1;
1449 x1_or_one[2] = -x1;
1450 y1_or_zero[0] = y1;
1451 y1_or_zero[2] = -y1;
1452 z1 = x1 * x1;
1453 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1454 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1455 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1456 *py1 = t1;
1457 n1 = (n1 + 1) & 3;
1458 j1 = (j1 + 1) & 1;
1459 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1460 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1461 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1462 *pc1 = t1;
1464 else
1466 j1 = (xsb1 + 0x4000) & 0xffff8000;
1467 HI(&t1) = j1;
1468 LO(&t1) = 0;
1469 x1 = (x1 - t1) + y1;
1470 z1 = x1 * x1;
1471 t1 = z1 * (qq1 + z1 * qq2);
1472 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1473 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1474 xsb1 = (xsb1 >> 30) & 2;
1475 n1 ^= (xsb1 & ~(n1 << 1));
1476 xsb1 |= 1;
1477 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1478 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1479 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1480 *pc1 = a2_1 + t2_1;
1481 t1_1 = a2_1*w1 + a1_1*t1;
1482 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1483 *py1 = a1_1 + t1_1;
1486 n0 = (int) (x0 * invpio2 + half[xsb0]);
1487 fn0 = (double) n0;
1488 n0 &= 3;
1489 a0 = x0 - fn0 * pio2_1;
1490 w0 = fn0 * pio2_2;
1491 x0 = a0 - w0;
1492 y0 = (a0 - x0) - w0;
1493 a0 = x0;
1494 w0 = fn0 * pio2_3 - y0;
1495 x0 = a0 - w0;
1496 y0 = (a0 - x0) - w0;
1497 a0 = x0;
1498 w0 = fn0 * pio2_3t - y0;
1499 x0 = a0 - w0;
1500 y0 = (a0 - x0) - w0;
1501 xsb0 = HI(&x0);
1502 if ((xsb0 & ~0x80000000) < 0x3fc40000)
1504 j0 = n0 & 1;
1505 x0_or_one[0] = x0;
1506 x0_or_one[2] = -x0;
1507 y0_or_zero[0] = y0;
1508 y0_or_zero[2] = -y0;
1509 z0 = x0 * x0;
1510 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1511 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1512 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1513 *py0 = t0;
1514 n0 = (n0 + 1) & 3;
1515 j0 = (j0 + 1) & 1;
1516 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1517 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1518 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1519 *pc0 = t0;
1521 else
1523 j0 = (xsb0 + 0x4000) & 0xffff8000;
1524 HI(&t0) = j0;
1525 LO(&t0) = 0;
1526 x0 = (x0 - t0) + y0;
1527 z0 = x0 * x0;
1528 t0 = z0 * (qq1 + z0 * qq2);
1529 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1530 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1531 xsb0 = (xsb0 >> 30) & 2;
1532 n0 ^= (xsb0 & ~(n0 << 1));
1533 xsb0 |= 1;
1534 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1535 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1536 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1537 *pc0 = a2_0 + t2_0;
1538 t1_0 = a2_0*w0 + a1_0*t0;
1539 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1540 *py0 = a1_0 + t1_0;
1544 if (biguns) {
1545 __vlibm_vsincos_big(nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb);