8572 ccompile.h: rename __GNU_UNUSED to __unused
[unleashed.git] / usr / src / lib / libmvec / common / __vsin.c
blob04e3c0a4a64f724c70703abcd3426b8268418174
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
47 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
49 static const double
50 half[2] = { 0.5, -0.5 },
51 one = 1.0,
52 invpio2 = 0.636619772367581343075535,
53 pio2_1 = 1.570796326734125614166,
54 pio2_2 = 6.077100506303965976596e-11,
55 pio2_3 = 2.022266248711166455796e-21,
56 pio2_3t = 8.478427660368899643959e-32,
57 pp1 = -1.666666666605760465276263943134982554676e-0001,
58 pp2 = 8.333261209690963126718376566146180944442e-0003,
59 qq1 = -4.999999999977710986407023955908711557870e-0001,
60 qq2 = 4.166654863857219350645055881018842089580e-0002,
61 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
62 -4.999999999999931701464060878888294524481e-0001 },
63 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
64 4.166666666394861917535640593963708222319e-0002 },
65 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
66 -1.388888552656142867832756687736851681462e-0003 },
67 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
68 2.478519423681460796618128289454530524759e-0005 };
70 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
72 /* Don't __ the following; acomp will handle it */
73 extern double fabs(double);
74 extern void __vlibm_vsin_big(int, double *, int, double *, int, int);
76 void
77 __vsin(int n, double * restrict x, int stridex, double * restrict y,
78 int stridey)
80 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
81 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
82 double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
83 unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
84 int i, biguns, nsave, sxsave, sysave;
85 volatile int v __unused;
86 nsave = n;
87 xsave = x;
88 sxsave = stridex;
89 ysave = y;
90 sysave = stridey;
91 biguns = 0;
95 LOOP0:
96 xsb0 = HI(x);
97 hx0 = xsb0 & ~0x80000000;
98 if (hx0 > 0x3fe921fb)
100 biguns = 1;
101 goto MEDIUM;
103 if (hx0 < 0x3e400000)
105 v = *x;
106 *y = *x;
107 x += stridex;
108 y += stridey;
109 i = 0;
110 if (--n <= 0)
111 break;
112 goto LOOP0;
114 x0 = *x;
115 py0 = y;
116 x += stridex;
117 y += stridey;
118 i = 1;
119 if (--n <= 0)
120 break;
122 LOOP1:
123 xsb1 = HI(x);
124 hx1 = xsb1 & ~0x80000000;
125 if (hx1 > 0x3fe921fb)
127 biguns = 2;
128 goto MEDIUM;
130 if (hx1 < 0x3e400000)
132 v = *x;
133 *y = *x;
134 x += stridex;
135 y += stridey;
136 i = 1;
137 if (--n <= 0)
138 break;
139 goto LOOP1;
141 x1 = *x;
142 py1 = y;
143 x += stridex;
144 y += stridey;
145 i = 2;
146 if (--n <= 0)
147 break;
149 LOOP2:
150 xsb2 = HI(x);
151 hx2 = xsb2 & ~0x80000000;
152 if (hx2 > 0x3fe921fb)
154 biguns = 3;
155 goto MEDIUM;
157 if (hx2 < 0x3e400000)
159 v = *x;
160 *y = *x;
161 x += stridex;
162 y += stridey;
163 i = 2;
164 if (--n <= 0)
165 break;
166 goto LOOP2;
168 x2 = *x;
169 py2 = y;
171 i = (hx0 - 0x3fc90000) >> 31;
172 i |= ((hx1 - 0x3fc90000) >> 30) & 2;
173 i |= ((hx2 - 0x3fc90000) >> 29) & 4;
174 switch (i)
176 double a0, a1, a2, w0, w1, w2;
177 double t0, t1, t2, z0, z1, z2;
178 unsigned j0, j1, j2;
180 case 0:
181 j0 = (xsb0 + 0x4000) & 0xffff8000;
182 j1 = (xsb1 + 0x4000) & 0xffff8000;
183 j2 = (xsb2 + 0x4000) & 0xffff8000;
184 HI(&t0) = j0;
185 HI(&t1) = j1;
186 HI(&t2) = j2;
187 LO(&t0) = 0;
188 LO(&t1) = 0;
189 LO(&t2) = 0;
190 x0 -= t0;
191 x1 -= t1;
192 x2 -= t2;
193 z0 = x0 * x0;
194 z1 = x1 * x1;
195 z2 = x2 * x2;
196 t0 = z0 * (qq1 + z0 * qq2);
197 t1 = z1 * (qq1 + z1 * qq2);
198 t2 = z2 * (qq1 + z2 * qq2);
199 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
200 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
201 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
202 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
203 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
204 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
205 xsb0 = (xsb0 >> 30) & 2;
206 xsb1 = (xsb1 >> 30) & 2;
207 xsb2 = (xsb2 >> 30) & 2;
208 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
209 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
210 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
211 t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
212 t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
213 t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
214 *py0 = a0 + t0;
215 *py1 = a1 + t1;
216 *py2 = a2 + t2;
217 break;
219 case 1:
220 j1 = (xsb1 + 0x4000) & 0xffff8000;
221 j2 = (xsb2 + 0x4000) & 0xffff8000;
222 HI(&t1) = j1;
223 HI(&t2) = j2;
224 LO(&t1) = 0;
225 LO(&t2) = 0;
226 x1 -= t1;
227 x2 -= t2;
228 z0 = x0 * x0;
229 z1 = x1 * x1;
230 z2 = x2 * x2;
231 t0 = z0 * (poly3[0] + z0 * poly4[0]);
232 t1 = z1 * (qq1 + z1 * qq2);
233 t2 = z2 * (qq1 + z2 * qq2);
234 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
235 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
236 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
237 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
238 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
239 xsb1 = (xsb1 >> 30) & 2;
240 xsb2 = (xsb2 >> 30) & 2;
241 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
242 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
243 t0 = x0 + x0 * t0;
244 t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
245 t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
246 *py0 = t0;
247 *py1 = a1 + t1;
248 *py2 = a2 + t2;
249 break;
251 case 2:
252 j0 = (xsb0 + 0x4000) & 0xffff8000;
253 j2 = (xsb2 + 0x4000) & 0xffff8000;
254 HI(&t0) = j0;
255 HI(&t2) = j2;
256 LO(&t0) = 0;
257 LO(&t2) = 0;
258 x0 -= t0;
259 x2 -= t2;
260 z0 = x0 * x0;
261 z1 = x1 * x1;
262 z2 = x2 * x2;
263 t0 = z0 * (qq1 + z0 * qq2);
264 t1 = z1 * (poly3[0] + z1 * poly4[0]);
265 t2 = z2 * (qq1 + z2 * qq2);
266 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
267 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
268 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
269 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
270 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
271 xsb0 = (xsb0 >> 30) & 2;
272 xsb2 = (xsb2 >> 30) & 2;
273 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
274 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
275 t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
276 t1 = x1 + x1 * t1;
277 t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
278 *py0 = a0 + t0;
279 *py1 = t1;
280 *py2 = a2 + t2;
281 break;
283 case 3:
284 j2 = (xsb2 + 0x4000) & 0xffff8000;
285 HI(&t2) = j2;
286 LO(&t2) = 0;
287 x2 -= t2;
288 z0 = x0 * x0;
289 z1 = x1 * x1;
290 z2 = x2 * x2;
291 t0 = z0 * (poly3[0] + z0 * poly4[0]);
292 t1 = z1 * (poly3[0] + z1 * poly4[0]);
293 t2 = z2 * (qq1 + z2 * qq2);
294 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
295 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
296 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
297 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298 xsb2 = (xsb2 >> 30) & 2;
299 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
300 t0 = x0 + x0 * t0;
301 t1 = x1 + x1 * t1;
302 t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
303 *py0 = t0;
304 *py1 = t1;
305 *py2 = a2 + t2;
306 break;
308 case 4:
309 j0 = (xsb0 + 0x4000) & 0xffff8000;
310 j1 = (xsb1 + 0x4000) & 0xffff8000;
311 HI(&t0) = j0;
312 HI(&t1) = j1;
313 LO(&t0) = 0;
314 LO(&t1) = 0;
315 x0 -= t0;
316 x1 -= t1;
317 z0 = x0 * x0;
318 z1 = x1 * x1;
319 z2 = x2 * x2;
320 t0 = z0 * (qq1 + z0 * qq2);
321 t1 = z1 * (qq1 + z1 * qq2);
322 t2 = z2 * (poly3[0] + z2 * poly4[0]);
323 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
324 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
325 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
326 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
327 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
328 xsb0 = (xsb0 >> 30) & 2;
329 xsb1 = (xsb1 >> 30) & 2;
330 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
331 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
332 t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
333 t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
334 t2 = x2 + x2 * t2;
335 *py0 = a0 + t0;
336 *py1 = a1 + t1;
337 *py2 = t2;
338 break;
340 case 5:
341 j1 = (xsb1 + 0x4000) & 0xffff8000;
342 HI(&t1) = j1;
343 LO(&t1) = 0;
344 x1 -= t1;
345 z0 = x0 * x0;
346 z1 = x1 * x1;
347 z2 = x2 * x2;
348 t0 = z0 * (poly3[0] + z0 * poly4[0]);
349 t1 = z1 * (qq1 + z1 * qq2);
350 t2 = z2 * (poly3[0] + z2 * poly4[0]);
351 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
352 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
353 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
354 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
355 xsb1 = (xsb1 >> 30) & 2;
356 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
357 t0 = x0 + x0 * t0;
358 t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
359 t2 = x2 + x2 * t2;
360 *py0 = t0;
361 *py1 = a1 + t1;
362 *py2 = t2;
363 break;
365 case 6:
366 j0 = (xsb0 + 0x4000) & 0xffff8000;
367 HI(&t0) = j0;
368 LO(&t0) = 0;
369 x0 -= t0;
370 z0 = x0 * x0;
371 z1 = x1 * x1;
372 z2 = x2 * x2;
373 t0 = z0 * (qq1 + z0 * qq2);
374 t1 = z1 * (poly3[0] + z1 * poly4[0]);
375 t2 = z2 * (poly3[0] + z2 * poly4[0]);
376 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
377 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
378 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
379 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
380 xsb0 = (xsb0 >> 30) & 2;
381 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
382 t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
383 t1 = x1 + x1 * t1;
384 t2 = x2 + x2 * t2;
385 *py0 = a0 + t0;
386 *py1 = t1;
387 *py2 = t2;
388 break;
390 case 7:
391 z0 = x0 * x0;
392 z1 = x1 * x1;
393 z2 = x2 * x2;
394 t0 = z0 * (poly3[0] + z0 * poly4[0]);
395 t1 = z1 * (poly3[0] + z1 * poly4[0]);
396 t2 = z2 * (poly3[0] + z2 * poly4[0]);
397 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
398 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
399 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
400 t0 = x0 + x0 * t0;
401 t1 = x1 + x1 * t1;
402 t2 = x2 + x2 * t2;
403 *py0 = t0;
404 *py1 = t1;
405 *py2 = t2;
406 break;
409 x += stridex;
410 y += stridey;
411 i = 0;
412 } while (--n > 0);
414 if (i > 0)
416 double a0, a1, w0, w1;
417 double t0, t1, z0, z1;
418 unsigned j0, j1;
420 if (i > 1)
422 if (hx1 < 0x3fc90000)
424 z1 = x1 * x1;
425 t1 = z1 * (poly3[0] + z1 * poly4[0]);
426 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
427 t1 = x1 + x1 * t1;
428 *py1 = t1;
430 else
432 j1 = (xsb1 + 0x4000) & 0xffff8000;
433 HI(&t1) = j1;
434 LO(&t1) = 0;
435 x1 -= t1;
436 z1 = x1 * x1;
437 t1 = z1 * (qq1 + z1 * qq2);
438 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
439 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
440 xsb1 = (xsb1 >> 30) & 2;
441 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
442 t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
443 *py1 = a1 + t1;
446 if (hx0 < 0x3fc90000)
448 z0 = x0 * x0;
449 t0 = z0 * (poly3[0] + z0 * poly4[0]);
450 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
451 t0 = x0 + x0 * t0;
452 *py0 = t0;
454 else
456 j0 = (xsb0 + 0x4000) & 0xffff8000;
457 HI(&t0) = j0;
458 LO(&t0) = 0;
459 x0 -= t0;
460 z0 = x0 * x0;
461 t0 = z0 * (qq1 + z0 * qq2);
462 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
463 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
464 xsb0 = (xsb0 >> 30) & 2;
465 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
466 t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
467 *py0 = a0 + t0;
471 return;
474 * MEDIUM RANGE PROCESSING
475 * Jump here at first sign of medium range argument. We are a bit
476 * confused due to the jump.. fix up several variables and jump into
477 * the nth loop, same as was being processed above.
480 MEDIUM:
482 x0_or_one[1] = 1.0;
483 x1_or_one[1] = 1.0;
484 x2_or_one[1] = 1.0;
485 x0_or_one[3] = -1.0;
486 x1_or_one[3] = -1.0;
487 x2_or_one[3] = -1.0;
488 y0_or_zero[1] = 0.0;
489 y1_or_zero[1] = 0.0;
490 y2_or_zero[1] = 0.0;
491 y0_or_zero[3] = 0.0;
492 y1_or_zero[3] = 0.0;
493 y2_or_zero[3] = 0.0;
495 if (biguns == 3)
497 biguns = 0;
498 xsb0 = xsb0 >> 31;
499 xsb1 = xsb1 >> 31;
500 goto loop2;
502 else if (biguns == 2)
504 xsb0 = xsb0 >> 31;
505 biguns = 0;
506 goto loop1;
508 biguns = 0;
512 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
513 unsigned hx;
514 int n0, n1, n2;
516 loop0:
517 hx = HI(x);
518 xsb0 = hx >> 31;
519 hx &= ~0x80000000;
520 if (hx < 0x3e400000)
522 v = *x;
523 *y = *x;
524 x += stridex;
525 y += stridey;
526 i = 0;
527 if (--n <= 0)
528 break;
529 goto loop0;
531 if (hx > 0x413921fb)
533 if (hx >= 0x7ff00000)
535 x0 = *x;
536 *y = x0 - x0;
538 else
539 biguns = 1;
540 x += stridex;
541 y += stridey;
542 i = 0;
543 if (--n <= 0)
544 break;
545 goto loop0;
547 x0 = *x;
548 py0 = y;
549 x += stridex;
550 y += stridey;
551 i = 1;
552 if (--n <= 0)
553 break;
555 loop1:
556 hx = HI(x);
557 xsb1 = hx >> 31;
558 hx &= ~0x80000000;
559 if (hx < 0x3e400000)
561 v = *x;
562 *y = *x;
563 x += stridex;
564 y += stridey;
565 i = 1;
566 if (--n <= 0)
567 break;
568 goto loop1;
570 if (hx > 0x413921fb)
572 if (hx >= 0x7ff00000)
574 x1 = *x;
575 *y = x1 - x1;
577 else
578 biguns = 1;
579 x += stridex;
580 y += stridey;
581 i = 1;
582 if (--n <= 0)
583 break;
584 goto loop1;
586 x1 = *x;
587 py1 = y;
588 x += stridex;
589 y += stridey;
590 i = 2;
591 if (--n <= 0)
592 break;
594 loop2:
595 hx = HI(x);
596 xsb2 = hx >> 31;
597 hx &= ~0x80000000;
598 if (hx < 0x3e400000)
600 v = *x;
601 *y = *x;
602 x += stridex;
603 y += stridey;
604 i = 2;
605 if (--n <= 0)
606 break;
607 goto loop2;
609 if (hx > 0x413921fb)
611 if (hx >= 0x7ff00000)
613 x2 = *x;
614 *y = x2 - x2;
616 else
617 biguns = 1;
618 x += stridex;
619 y += stridey;
620 i = 2;
621 if (--n <= 0)
622 break;
623 goto loop2;
625 x2 = *x;
626 py2 = y;
628 n0 = (int) (x0 * invpio2 + half[xsb0]);
629 n1 = (int) (x1 * invpio2 + half[xsb1]);
630 n2 = (int) (x2 * invpio2 + half[xsb2]);
631 fn0 = (double) n0;
632 fn1 = (double) n1;
633 fn2 = (double) n2;
634 n0 &= 3;
635 n1 &= 3;
636 n2 &= 3;
637 a0 = x0 - fn0 * pio2_1;
638 a1 = x1 - fn1 * pio2_1;
639 a2 = x2 - fn2 * pio2_1;
640 w0 = fn0 * pio2_2;
641 w1 = fn1 * pio2_2;
642 w2 = fn2 * pio2_2;
643 x0 = a0 - w0;
644 x1 = a1 - w1;
645 x2 = a2 - w2;
646 y0 = (a0 - x0) - w0;
647 y1 = (a1 - x1) - w1;
648 y2 = (a2 - x2) - w2;
649 a0 = x0;
650 a1 = x1;
651 a2 = x2;
652 w0 = fn0 * pio2_3 - y0;
653 w1 = fn1 * pio2_3 - y1;
654 w2 = fn2 * pio2_3 - y2;
655 x0 = a0 - w0;
656 x1 = a1 - w1;
657 x2 = a2 - w2;
658 y0 = (a0 - x0) - w0;
659 y1 = (a1 - x1) - w1;
660 y2 = (a2 - x2) - w2;
661 a0 = x0;
662 a1 = x1;
663 a2 = x2;
664 w0 = fn0 * pio2_3t - y0;
665 w1 = fn1 * pio2_3t - y1;
666 w2 = fn2 * pio2_3t - y2;
667 x0 = a0 - w0;
668 x1 = a1 - w1;
669 x2 = a2 - w2;
670 y0 = (a0 - x0) - w0;
671 y1 = (a1 - x1) - w1;
672 y2 = (a2 - x2) - w2;
673 xsb0 = HI(&x0);
674 i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
675 xsb1 = HI(&x1);
676 i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
677 xsb2 = HI(&x2);
678 i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
679 switch (i)
681 double t0, t1, t2, z0, z1, z2;
682 unsigned j0, j1, j2;
684 case 0:
685 j0 = (xsb0 + 0x4000) & 0xffff8000;
686 j1 = (xsb1 + 0x4000) & 0xffff8000;
687 j2 = (xsb2 + 0x4000) & 0xffff8000;
688 HI(&t0) = j0;
689 HI(&t1) = j1;
690 HI(&t2) = j2;
691 LO(&t0) = 0;
692 LO(&t1) = 0;
693 LO(&t2) = 0;
694 x0 = (x0 - t0) + y0;
695 x1 = (x1 - t1) + y1;
696 x2 = (x2 - t2) + y2;
697 z0 = x0 * x0;
698 z1 = x1 * x1;
699 z2 = x2 * x2;
700 t0 = z0 * (qq1 + z0 * qq2);
701 t1 = z1 * (qq1 + z1 * qq2);
702 t2 = z2 * (qq1 + z2 * qq2);
703 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
704 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
705 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
706 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
707 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
708 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
709 xsb0 = (xsb0 >> 30) & 2;
710 xsb1 = (xsb1 >> 30) & 2;
711 xsb2 = (xsb2 >> 30) & 2;
712 n0 ^= (xsb0 & ~(n0 << 1));
713 n1 ^= (xsb1 & ~(n1 << 1));
714 n2 ^= (xsb2 & ~(n2 << 1));
715 xsb0 |= 1;
716 xsb1 |= 1;
717 xsb2 |= 1;
718 a0 = __vlibm_TBL_sincos_hi[j0+n0];
719 a1 = __vlibm_TBL_sincos_hi[j1+n1];
720 a2 = __vlibm_TBL_sincos_hi[j2+n2];
721 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
722 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
723 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
724 *py0 = ( a0 + t0 );
725 *py1 = ( a1 + t1 );
726 *py2 = ( a2 + t2 );
727 break;
729 case 1:
730 j0 = n0 & 1;
731 j1 = (xsb1 + 0x4000) & 0xffff8000;
732 j2 = (xsb2 + 0x4000) & 0xffff8000;
733 HI(&t1) = j1;
734 HI(&t2) = j2;
735 LO(&t1) = 0;
736 LO(&t2) = 0;
737 x0_or_one[0] = x0;
738 x0_or_one[2] = -x0;
739 y0_or_zero[0] = y0;
740 y0_or_zero[2] = -y0;
741 x1 = (x1 - t1) + y1;
742 x2 = (x2 - t2) + y2;
743 z0 = x0 * x0;
744 z1 = x1 * x1;
745 z2 = x2 * x2;
746 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
747 t1 = z1 * (qq1 + z1 * qq2);
748 t2 = z2 * (qq1 + z2 * qq2);
749 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
750 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
751 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
752 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
753 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
754 xsb1 = (xsb1 >> 30) & 2;
755 xsb2 = (xsb2 >> 30) & 2;
756 n1 ^= (xsb1 & ~(n1 << 1));
757 n2 ^= (xsb2 & ~(n2 << 1));
758 xsb1 |= 1;
759 xsb2 |= 1;
760 a1 = __vlibm_TBL_sincos_hi[j1+n1];
761 a2 = __vlibm_TBL_sincos_hi[j2+n2];
762 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
763 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
764 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
765 *py0 = t0;
766 *py1 = ( a1 + t1 );
767 *py2 = ( a2 + t2 );
768 break;
770 case 2:
771 j0 = (xsb0 + 0x4000) & 0xffff8000;
772 j1 = n1 & 1;
773 j2 = (xsb2 + 0x4000) & 0xffff8000;
774 HI(&t0) = j0;
775 HI(&t2) = j2;
776 LO(&t0) = 0;
777 LO(&t2) = 0;
778 x1_or_one[0] = x1;
779 x1_or_one[2] = -x1;
780 x0 = (x0 - t0) + y0;
781 y1_or_zero[0] = y1;
782 y1_or_zero[2] = -y1;
783 x2 = (x2 - t2) + y2;
784 z0 = x0 * x0;
785 z1 = x1 * x1;
786 z2 = x2 * x2;
787 t0 = z0 * (qq1 + z0 * qq2);
788 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
789 t2 = z2 * (qq1 + z2 * qq2);
790 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
791 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
792 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
793 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
794 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
795 xsb0 = (xsb0 >> 30) & 2;
796 xsb2 = (xsb2 >> 30) & 2;
797 n0 ^= (xsb0 & ~(n0 << 1));
798 n2 ^= (xsb2 & ~(n2 << 1));
799 xsb0 |= 1;
800 xsb2 |= 1;
801 a0 = __vlibm_TBL_sincos_hi[j0+n0];
802 a2 = __vlibm_TBL_sincos_hi[j2+n2];
803 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
804 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
805 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
806 *py0 = ( a0 + t0 );
807 *py1 = t1;
808 *py2 = ( a2 + t2 );
809 break;
811 case 3:
812 j0 = n0 & 1;
813 j1 = n1 & 1;
814 j2 = (xsb2 + 0x4000) & 0xffff8000;
815 HI(&t2) = j2;
816 LO(&t2) = 0;
817 x0_or_one[0] = x0;
818 x0_or_one[2] = -x0;
819 x1_or_one[0] = x1;
820 x1_or_one[2] = -x1;
821 y0_or_zero[0] = y0;
822 y0_or_zero[2] = -y0;
823 y1_or_zero[0] = y1;
824 y1_or_zero[2] = -y1;
825 x2 = (x2 - t2) + y2;
826 z0 = x0 * x0;
827 z1 = x1 * x1;
828 z2 = x2 * x2;
829 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
830 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
831 t2 = z2 * (qq1 + z2 * qq2);
832 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
833 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
834 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
835 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
836 xsb2 = (xsb2 >> 30) & 2;
837 n2 ^= (xsb2 & ~(n2 << 1));
838 xsb2 |= 1;
839 a2 = __vlibm_TBL_sincos_hi[j2+n2];
840 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
841 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
842 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
843 *py0 = t0;
844 *py1 = t1;
845 *py2 = ( a2 + t2 );
846 break;
848 case 4:
849 j0 = (xsb0 + 0x4000) & 0xffff8000;
850 j1 = (xsb1 + 0x4000) & 0xffff8000;
851 j2 = n2 & 1;
852 HI(&t0) = j0;
853 HI(&t1) = j1;
854 LO(&t0) = 0;
855 LO(&t1) = 0;
856 x2_or_one[0] = x2;
857 x2_or_one[2] = -x2;
858 x0 = (x0 - t0) + y0;
859 x1 = (x1 - t1) + y1;
860 y2_or_zero[0] = y2;
861 y2_or_zero[2] = -y2;
862 z0 = x0 * x0;
863 z1 = x1 * x1;
864 z2 = x2 * x2;
865 t0 = z0 * (qq1 + z0 * qq2);
866 t1 = z1 * (qq1 + z1 * qq2);
867 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
868 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
869 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
870 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
871 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
872 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
873 xsb0 = (xsb0 >> 30) & 2;
874 xsb1 = (xsb1 >> 30) & 2;
875 n0 ^= (xsb0 & ~(n0 << 1));
876 n1 ^= (xsb1 & ~(n1 << 1));
877 xsb0 |= 1;
878 xsb1 |= 1;
879 a0 = __vlibm_TBL_sincos_hi[j0+n0];
880 a1 = __vlibm_TBL_sincos_hi[j1+n1];
881 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
882 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
883 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
884 *py0 = ( a0 + t0 );
885 *py1 = ( a1 + t1 );
886 *py2 = t2;
887 break;
889 case 5:
890 j0 = n0 & 1;
891 j1 = (xsb1 + 0x4000) & 0xffff8000;
892 j2 = n2 & 1;
893 HI(&t1) = j1;
894 LO(&t1) = 0;
895 x0_or_one[0] = x0;
896 x0_or_one[2] = -x0;
897 x2_or_one[0] = x2;
898 x2_or_one[2] = -x2;
899 y0_or_zero[0] = y0;
900 y0_or_zero[2] = -y0;
901 x1 = (x1 - t1) + y1;
902 y2_or_zero[0] = y2;
903 y2_or_zero[2] = -y2;
904 z0 = x0 * x0;
905 z1 = x1 * x1;
906 z2 = x2 * x2;
907 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
908 t1 = z1 * (qq1 + z1 * qq2);
909 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
910 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
911 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
912 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
913 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
914 xsb1 = (xsb1 >> 30) & 2;
915 n1 ^= (xsb1 & ~(n1 << 1));
916 xsb1 |= 1;
917 a1 = __vlibm_TBL_sincos_hi[j1+n1];
918 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
919 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
920 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
921 *py0 = t0;
922 *py1 = ( a1 + t1 );
923 *py2 = t2;
924 break;
926 case 6:
927 j0 = (xsb0 + 0x4000) & 0xffff8000;
928 j1 = n1 & 1;
929 j2 = n2 & 1;
930 HI(&t0) = j0;
931 LO(&t0) = 0;
932 x1_or_one[0] = x1;
933 x1_or_one[2] = -x1;
934 x2_or_one[0] = x2;
935 x2_or_one[2] = -x2;
936 x0 = (x0 - t0) + y0;
937 y1_or_zero[0] = y1;
938 y1_or_zero[2] = -y1;
939 y2_or_zero[0] = y2;
940 y2_or_zero[2] = -y2;
941 z0 = x0 * x0;
942 z1 = x1 * x1;
943 z2 = x2 * x2;
944 t0 = z0 * (qq1 + z0 * qq2);
945 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
946 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
947 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
948 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
949 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
950 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
951 xsb0 = (xsb0 >> 30) & 2;
952 n0 ^= (xsb0 & ~(n0 << 1));
953 xsb0 |= 1;
954 a0 = __vlibm_TBL_sincos_hi[j0+n0];
955 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
956 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
957 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
958 *py0 = ( a0 + t0 );
959 *py1 = t1;
960 *py2 = t2;
961 break;
963 case 7:
964 j0 = n0 & 1;
965 j1 = n1 & 1;
966 j2 = n2 & 1;
967 x0_or_one[0] = x0;
968 x0_or_one[2] = -x0;
969 x1_or_one[0] = x1;
970 x1_or_one[2] = -x1;
971 x2_or_one[0] = x2;
972 x2_or_one[2] = -x2;
973 y0_or_zero[0] = y0;
974 y0_or_zero[2] = -y0;
975 y1_or_zero[0] = y1;
976 y1_or_zero[2] = -y1;
977 y2_or_zero[0] = y2;
978 y2_or_zero[2] = -y2;
979 z0 = x0 * x0;
980 z1 = x1 * x1;
981 z2 = x2 * x2;
982 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
983 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
984 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
985 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
986 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
987 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
988 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
989 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
990 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
991 *py0 = t0;
992 *py1 = t1;
993 *py2 = t2;
994 break;
997 x += stridex;
998 y += stridey;
999 i = 0;
1000 } while (--n > 0);
1002 if (i > 0)
1004 double fn0, fn1, a0, a1, w0, w1, y0, y1;
1005 double t0, t1, z0, z1;
1006 unsigned j0, j1;
1007 int n0, n1;
1009 if (i > 1)
1011 n1 = (int) (x1 * invpio2 + half[xsb1]);
1012 fn1 = (double) n1;
1013 n1 &= 3;
1014 a1 = x1 - fn1 * pio2_1;
1015 w1 = fn1 * pio2_2;
1016 x1 = a1 - w1;
1017 y1 = (a1 - x1) - w1;
1018 a1 = x1;
1019 w1 = fn1 * pio2_3 - y1;
1020 x1 = a1 - w1;
1021 y1 = (a1 - x1) - w1;
1022 a1 = x1;
1023 w1 = fn1 * pio2_3t - y1;
1024 x1 = a1 - w1;
1025 y1 = (a1 - x1) - w1;
1026 xsb1 = HI(&x1);
1027 if ((xsb1 & ~0x80000000) < thresh[n1&1])
1029 j1 = n1 & 1;
1030 x1_or_one[0] = x1;
1031 x1_or_one[2] = -x1;
1032 y1_or_zero[0] = y1;
1033 y1_or_zero[2] = -y1;
1034 z1 = x1 * x1;
1035 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1036 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1037 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1038 *py1 = t1;
1040 else
1042 j1 = (xsb1 + 0x4000) & 0xffff8000;
1043 HI(&t1) = j1;
1044 LO(&t1) = 0;
1045 x1 = (x1 - t1) + y1;
1046 z1 = x1 * x1;
1047 t1 = z1 * (qq1 + z1 * qq2);
1048 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1049 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1050 xsb1 = (xsb1 >> 30) & 2;
1051 n1 ^= (xsb1 & ~(n1 << 1));
1052 xsb1 |= 1;
1053 a1 = __vlibm_TBL_sincos_hi[j1+n1];
1054 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
1055 *py1 = ( a1 + t1 );
1058 n0 = (int) (x0 * invpio2 + half[xsb0]);
1059 fn0 = (double) n0;
1060 n0 &= 3;
1061 a0 = x0 - fn0 * pio2_1;
1062 w0 = fn0 * pio2_2;
1063 x0 = a0 - w0;
1064 y0 = (a0 - x0) - w0;
1065 a0 = x0;
1066 w0 = fn0 * pio2_3 - y0;
1067 x0 = a0 - w0;
1068 y0 = (a0 - x0) - w0;
1069 a0 = x0;
1070 w0 = fn0 * pio2_3t - y0;
1071 x0 = a0 - w0;
1072 y0 = (a0 - x0) - w0;
1073 xsb0 = HI(&x0);
1074 if ((xsb0 & ~0x80000000) < thresh[n0&1])
1076 j0 = n0 & 1;
1077 x0_or_one[0] = x0;
1078 x0_or_one[2] = -x0;
1079 y0_or_zero[0] = y0;
1080 y0_or_zero[2] = -y0;
1081 z0 = x0 * x0;
1082 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1083 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1084 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1085 *py0 = t0;
1087 else
1089 j0 = (xsb0 + 0x4000) & 0xffff8000;
1090 HI(&t0) = j0;
1091 LO(&t0) = 0;
1092 x0 = (x0 - t0) + y0;
1093 z0 = x0 * x0;
1094 t0 = z0 * (qq1 + z0 * qq2);
1095 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1096 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1097 xsb0 = (xsb0 >> 30) & 2;
1098 n0 ^= (xsb0 & ~(n0 << 1));
1099 xsb0 |= 1;
1100 a0 = __vlibm_TBL_sincos_hi[j0+n0];
1101 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
1102 *py0 = ( a0 + t0 );
1106 if (biguns)
1107 __vlibm_vsin_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);