4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #include <sys/isa_defs.h>
31 #include "libm_inlines.h"
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
42 #define restrict _Restrict
47 /* float rhypotf(float x, float y)
51 * for x or y = Inf => 0;
52 * for x or y = NaN => QNaN;
53 * for x and y = 0 => +Inf + divide-by-zero;
54 * 2. Computes d = x * x + y * y;
55 * 3. Computes reciprocal square root from:
59 * n = ((exponent + 1) & ~1).
61 * rsqrtf(d) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m))
62 * 4. Computes 1/sqrt(m) from:
63 * 1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm))
66 * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63];
67 * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
69 * 1/sqrt(m0), 1/m0 are looked up in a table,
70 * 1/sqrt(1 + (1/m0)*dm) is computed using approximation:
71 * 1/sqrt(1 + z) = ((a3 * z + a2) * z + a1) * z + a0
72 * where z = [-1/64, 1/64].
75 * The maximum relative error for the approximating
76 * polynomial is 2**(-27.87).
77 * Maximum error observed: less than 0.535 ulp after 3.000.000.000
81 #pragma align 32 (__vlibm_TBL_rhypotf)
83 static const double __vlibm_TBL_rhypotf
[] = {
86 TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
87 TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
88 TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
89 TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
91 1.0000000000000000000e+00, 3.5355339059327378637e-01,
92 9.8461538461538467004e-01, 3.5082320772281166965e-01,
93 9.6969696969696972388e-01, 3.4815531191139570399e-01,
94 9.5522388059701490715e-01, 3.4554737023254405992e-01,
95 9.4117647058823528106e-01, 3.4299717028501769400e-01,
96 9.2753623188405798228e-01, 3.4050261230349943009e-01,
97 9.1428571428571425717e-01, 3.3806170189140660742e-01,
98 9.0140845070422537244e-01, 3.3567254331867563133e-01,
99 8.8888888888888883955e-01, 3.3333333333333331483e-01,
100 8.7671232876712323900e-01, 3.3104235544094717802e-01,
101 8.6486486486486491287e-01, 3.2879797461071458287e-01,
102 8.5333333333333338810e-01, 3.2659863237109043599e-01,
103 8.4210526315789469010e-01, 3.2444284226152508843e-01,
104 8.3116883116883122362e-01, 3.2232918561015211356e-01,
105 8.2051282051282048435e-01, 3.2025630761017426229e-01,
106 8.1012658227848100001e-01, 3.1822291367029204023e-01,
107 8.0000000000000004441e-01, 3.1622776601683794118e-01,
108 7.9012345679012341293e-01, 3.1426968052735443360e-01,
109 7.8048780487804880757e-01, 3.1234752377721214378e-01,
110 7.7108433734939763049e-01, 3.1046021028253312224e-01,
111 7.6190476190476186247e-01, 3.0860669992418382490e-01,
112 7.5294117647058822484e-01, 3.0678599553894819740e-01,
113 7.4418604651162789665e-01, 3.0499714066520933198e-01,
114 7.3563218390804596680e-01, 3.0323921743156134756e-01,
115 7.2727272727272729291e-01, 3.0151134457776362918e-01,
116 7.1910112359550559802e-01, 2.9981267559834456904e-01,
117 7.1111111111111113825e-01, 2.9814239699997197031e-01,
118 7.0329670329670335160e-01, 2.9649972666444046610e-01,
119 6.9565217391304345895e-01, 2.9488391230979427160e-01,
120 6.8817204301075274309e-01, 2.9329423004270660513e-01,
121 6.8085106382978721751e-01, 2.9172998299578911663e-01,
122 6.7368421052631577428e-01, 2.9019050004400465115e-01,
123 6.6666666666666662966e-01, 2.8867513459481286553e-01,
124 6.5979381443298967813e-01, 2.8718326344709527165e-01,
125 6.5306122448979586625e-01, 2.8571428571428569843e-01,
126 6.4646464646464651960e-01, 2.8426762180748055275e-01,
127 6.4000000000000001332e-01, 2.8284271247461900689e-01,
128 6.3366336633663367106e-01, 2.8143901789211672737e-01,
129 6.2745098039215685404e-01, 2.8005601680560193723e-01,
130 6.2135922330097081989e-01, 2.7869320571664707442e-01,
131 6.1538461538461541878e-01, 2.7735009811261457369e-01,
132 6.0952380952380957879e-01, 2.7602622373694168934e-01,
133 6.0377358490566035432e-01, 2.7472112789737807015e-01,
134 5.9813084112149528249e-01, 2.7343437080986532361e-01,
135 5.9259259259259255970e-01, 2.7216552697590867815e-01,
136 5.8715596330275232617e-01, 2.7091418459143856712e-01,
137 5.8181818181818178992e-01, 2.6967994498529684888e-01,
138 5.7657657657657657158e-01, 2.6846242208560971987e-01,
139 5.7142857142857139685e-01, 2.6726124191242439654e-01,
140 5.6637168141592919568e-01, 2.6607604209509572168e-01,
141 5.6140350877192979340e-01, 2.6490647141300877054e-01,
142 5.5652173913043478937e-01, 2.6375218935831479250e-01,
143 5.5172413793103447510e-01, 2.6261286571944508772e-01,
144 5.4700854700854706358e-01, 2.6148818018424535570e-01,
145 5.4237288135593220151e-01, 2.6037782196164771520e-01,
146 5.3781512605042014474e-01, 2.5928148942086576278e-01,
147 5.3333333333333332593e-01, 2.5819888974716115326e-01,
148 5.2892561983471075848e-01, 2.5712973861329002645e-01,
149 5.2459016393442625681e-01, 2.5607375986579195004e-01,
150 5.2032520325203257539e-01, 2.5503068522533534068e-01,
151 5.1612903225806450180e-01, 2.5400025400038100942e-01,
152 5.1200000000000001066e-01, 2.5298221281347033074e-01,
153 5.0793650793650790831e-01, 2.5197631533948483540e-01,
154 5.0393700787401574104e-01, 2.5098232205526344041e-01,
155 1.0000000000000000000e+00, 2.5000000000000000000e-01,
156 9.8461538461538467004e-01, 2.4806946917841690703e-01,
157 9.6969696969696972388e-01, 2.4618298195866547551e-01,
158 9.5522388059701490715e-01, 2.4433888871261044695e-01,
159 9.4117647058823528106e-01, 2.4253562503633296910e-01,
160 9.2753623188405798228e-01, 2.4077170617153839660e-01,
161 9.1428571428571425717e-01, 2.3904572186687872426e-01,
162 9.0140845070422537244e-01, 2.3735633163877067897e-01,
163 8.8888888888888883955e-01, 2.3570226039551583908e-01,
164 8.7671232876712323900e-01, 2.3408229439226113655e-01,
165 8.6486486486486491287e-01, 2.3249527748763856860e-01,
166 8.5333333333333338810e-01, 2.3094010767585029797e-01,
167 8.4210526315789469010e-01, 2.2941573387056177213e-01,
168 8.3116883116883122362e-01, 2.2792115291927589338e-01,
169 8.2051282051282048435e-01, 2.2645540682891915352e-01,
170 8.1012658227848100001e-01, 2.2501758018520479077e-01,
171 8.0000000000000004441e-01, 2.2360679774997896385e-01,
172 7.9012345679012341293e-01, 2.2222222222222220989e-01,
173 7.8048780487804880757e-01, 2.2086305214969309541e-01,
174 7.7108433734939763049e-01, 2.1952851997938069295e-01,
175 7.6190476190476186247e-01, 2.1821789023599238999e-01,
176 7.5294117647058822484e-01, 2.1693045781865616384e-01,
177 7.4418604651162789665e-01, 2.1566554640687682354e-01,
178 7.3563218390804596680e-01, 2.1442250696755896233e-01,
179 7.2727272727272729291e-01, 2.1320071635561044232e-01,
180 7.1910112359550559802e-01, 2.1199957600127200541e-01,
181 7.1111111111111113825e-01, 2.1081851067789195153e-01,
182 7.0329670329670335160e-01, 2.0965696734438366011e-01,
183 6.9565217391304345895e-01, 2.0851441405707477061e-01,
184 6.8817204301075274309e-01, 2.0739033894608505104e-01,
185 6.8085106382978721751e-01, 2.0628424925175867233e-01,
186 6.7368421052631577428e-01, 2.0519567041703082322e-01,
187 6.6666666666666662966e-01, 2.0412414523193150862e-01,
188 6.5979381443298967813e-01, 2.0306923302672380549e-01,
189 6.5306122448979586625e-01, 2.0203050891044216364e-01,
190 6.4646464646464651960e-01, 2.0100756305184241945e-01,
191 6.4000000000000001332e-01, 2.0000000000000001110e-01,
192 6.3366336633663367106e-01, 1.9900743804199783060e-01,
193 6.2745098039215685404e-01, 1.9802950859533485772e-01,
194 6.2135922330097081989e-01, 1.9706585563285863860e-01,
195 6.1538461538461541878e-01, 1.9611613513818404453e-01,
196 6.0952380952380957879e-01, 1.9518001458970662965e-01,
197 6.0377358490566035432e-01, 1.9425717247145282696e-01,
198 5.9813084112149528249e-01, 1.9334729780913270658e-01,
199 5.9259259259259255970e-01, 1.9245008972987526219e-01,
200 5.8715596330275232617e-01, 1.9156525704423027490e-01,
201 5.8181818181818178992e-01, 1.9069251784911847580e-01,
202 5.7657657657657657158e-01, 1.8983159915049979682e-01,
203 5.7142857142857139685e-01, 1.8898223650461362655e-01,
204 5.6637168141592919568e-01, 1.8814417367671945613e-01,
205 5.6140350877192979340e-01, 1.8731716231633879777e-01,
206 5.5652173913043478937e-01, 1.8650096164806276300e-01,
207 5.5172413793103447510e-01, 1.8569533817705186074e-01,
208 5.4700854700854706358e-01, 1.8490006540840969729e-01,
209 5.4237288135593220151e-01, 1.8411492357966466327e-01,
210 5.3781512605042014474e-01, 1.8333969940564226464e-01,
211 5.3333333333333332593e-01, 1.8257418583505535814e-01,
212 5.2892561983471075848e-01, 1.8181818181818182323e-01,
213 5.2459016393442625681e-01, 1.8107149208503706128e-01,
214 5.2032520325203257539e-01, 1.8033392693348646030e-01,
215 5.1612903225806450180e-01, 1.7960530202677491007e-01,
216 5.1200000000000001066e-01, 1.7888543819998317663e-01,
217 5.0793650793650790831e-01, 1.7817416127494958844e-01,
218 5.0393700787401574104e-01, 1.7747130188322274291e-01,
221 extern float fabsf(float);
224 A0
= 9.99999997962321453275e-01,
225 A1
=-4.99999998166077580600e-01,
226 A2
= 3.75066768969515586277e-01,
227 A3
=-3.12560092408808548438e-01;
230 __vrhypotf_n(int n
, float * restrict px
, int stridex
, float * restrict py
,
231 int stridey
, float * restrict pz
, int stridez
);
233 #pragma no_inline(__vrhypotf_n)
235 #define RETURN(ret) \
241 spx = px; spy = py; spz = pz; \
251 __vrhypotf(int n
, float * restrict px
, int stridex
, float * restrict py
,
252 int stridey
, float * restrict pz
, int stridez
)
254 float *spx
, *spy
, *spz
;
273 if (ax0
>= 0x7f800000 || ay0
>= 0x7f800000) /* X or Y = NaN or Inf */
275 x0
= *(px
- stridex
);
277 res
= fabsf(x0
) + fabsf(y0
);
278 if (ax0
== 0x7f800000) res
= 0.0f
;
279 else if (ay0
== 0x7f800000) res
= 0.0f
;
286 if (ay0
== 0) /* Y = 0 */
288 int tx
= *(int*)(px
- stridex
) & 0x7fffffff;
289 if (tx
== 0) /* X = 0 */
299 __vrhypotf_n(n_n
, spx
, stridex
, spy
, stridey
, spz
, stridez
);
311 if (ax0
>= 0x7f800000 || ay0
>= 0x7f800000) /* X or Y = NaN or Inf */
313 res
= fabsf(x0
) + fabsf(y0
);
314 if (ax0
== 0x7f800000) res
= 0.0f
;
315 else if (ay0
== 0x7f800000) res
= 0.0f
;
318 else if (ax0
== 0 && ay0
== 0) /* X and Y = 0 */
324 double xx0
, res0
, hyp0
, h_hi0
= 0, dbase0
= 0;
325 int ibase0
, si0
, hyp0h
;
327 hyp0
= x0
* (double)x0
+ y0
* (double)y0
;
331 HI(&dbase0
) = (0x60000000 - ((ibase0
& 0x7fe00000) >> 1));
333 hyp0h
= (ibase0
& 0x000fffff) | 0x3ff00000;
335 HI(&h_hi0
) = hyp0h
& 0x7fffc000;
338 si0
= ibase0
& 0x7f0;
339 xx0
= ((double*)((char*)__vlibm_TBL_rhypotf
+ si0
))[0];
341 xx0
= (hyp0
- h_hi0
) * xx0
;
342 res0
= ((double*)((char*)__vlibm_TBL_rhypotf
+ si0
))[1];
343 res0
*= (((A3
* xx0
+ A2
) * xx0
+ A1
) * xx0
+ A0
);
351 __vrhypotf_n(int n
, float * restrict px
, int stridex
, float * restrict py
,
352 int stridey
, float * restrict pz
, int stridez
)
354 double xx0
, res0
, hyp0
, h_hi0
= 0, dbase0
= 0;
355 double xx1
, res1
, hyp1
, h_hi1
= 0, dbase1
= 0;
356 double xx2
, res2
, hyp2
, h_hi2
= 0, dbase2
= 0;
360 int ibase0
, si0
, hyp0h
;
361 int ibase1
, si1
, hyp1h
;
362 int ibase2
, si2
, hyp2h
;
364 for (; n
> 2 ; n
-= 3)
380 hyp0
= x0
* (double)x0
+ y0
* (double)y0
;
381 hyp1
= x1
* (double)x1
+ y1
* (double)y1
;
382 hyp2
= x2
* (double)x2
+ y2
* (double)y2
;
388 HI(&dbase0
) = (0x60000000 - ((ibase0
& 0x7fe00000) >> 1));
389 HI(&dbase1
) = (0x60000000 - ((ibase1
& 0x7fe00000) >> 1));
390 HI(&dbase2
) = (0x60000000 - ((ibase2
& 0x7fe00000) >> 1));
392 hyp0h
= (ibase0
& 0x000fffff) | 0x3ff00000;
393 hyp1h
= (ibase1
& 0x000fffff) | 0x3ff00000;
394 hyp2h
= (ibase2
& 0x000fffff) | 0x3ff00000;
398 HI(&h_hi0
) = hyp0h
& 0x7fffc000;
399 HI(&h_hi1
) = hyp1h
& 0x7fffc000;
400 HI(&h_hi2
) = hyp2h
& 0x7fffc000;
405 si0
= ibase0
& 0x7f0;
406 si1
= ibase1
& 0x7f0;
407 si2
= ibase2
& 0x7f0;
408 xx0
= ((double*)((char*)__vlibm_TBL_rhypotf
+ si0
))[0];
409 xx1
= ((double*)((char*)__vlibm_TBL_rhypotf
+ si1
))[0];
410 xx2
= ((double*)((char*)__vlibm_TBL_rhypotf
+ si2
))[0];
412 xx0
= (hyp0
- h_hi0
) * xx0
;
413 xx1
= (hyp1
- h_hi1
) * xx1
;
414 xx2
= (hyp2
- h_hi2
) * xx2
;
415 res0
= ((double*)((char*)__vlibm_TBL_rhypotf
+ si0
))[1];
416 res1
= ((double*)((char*)__vlibm_TBL_rhypotf
+ si1
))[1];
417 res2
= ((double*)((char*)__vlibm_TBL_rhypotf
+ si2
))[1];
418 res0
*= (((A3
* xx0
+ A2
) * xx0
+ A1
) * xx0
+ A0
);
419 res1
*= (((A3
* xx1
+ A2
) * xx1
+ A1
) * xx1
+ A0
);
420 res2
*= (((A3
* xx2
+ A2
) * xx2
+ A1
) * xx2
+ A0
);
440 hyp0
= x0
* (double)x0
+ y0
* (double)y0
;
444 HI(&dbase0
) = (0x60000000 - ((ibase0
& 0x7fe00000) >> 1));
446 hyp0h
= (ibase0
& 0x000fffff) | 0x3ff00000;
448 HI(&h_hi0
) = hyp0h
& 0x7fffc000;
451 si0
= ibase0
& 0x7f0;
452 xx0
= ((double*)((char*)__vlibm_TBL_rhypotf
+ si0
))[0];
454 xx0
= (hyp0
- h_hi0
) * xx0
;
455 res0
= ((double*)((char*)__vlibm_TBL_rhypotf
+ si0
))[1];
456 res0
*= (((A3
* xx0
+ A2
) * xx0
+ A1
) * xx0
+ A0
);