* gcc-interface/decl.c (gnat_to_gnu_field): Do not set the alignment
[official-gcc.git] / libgcc / config / libbid / bid_sqrt_macros.h
blobfddbe3ba0046163cae88f411f5759e864dd4f07d
1 /* Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _SQRT_MACROS_H_
25 #define _SQRT_MACROS_H_
27 #define FENCE __fence
29 #if DOUBLE_EXTENDED_ON
31 extern BINARY80 SQRT80 (BINARY80);
34 __BID_INLINE__ UINT64
35 short_sqrt128 (UINT128 A10) {
36 BINARY80 lx, ly, l64;
37 int_float f64;
39 // 2^64
40 f64.i = 0x5f800000;
41 l64 = (BINARY80) f64.d;
42 lx = (BINARY80) A10.w[1] * l64 + (BINARY80) A10.w[0];
43 ly = SQRT80 (lx);
44 return (UINT64) ly;
48 __BID_INLINE__ void
49 long_sqrt128 (UINT128 * pCS, UINT256 C256) {
50 UINT256 C4;
51 UINT128 CS;
52 UINT64 X;
53 SINT64 SE;
54 BINARY80 l64, lm64, l128, lxL, lx, ly, lS, lSH, lSL, lE, l3, l2,
55 l1, l0, lp, lCl;
56 int_float fx, f64, fm64;
57 int *ple = (int *) &lx;
59 // 2^64
60 f64.i = 0x5f800000;
61 l64 = (BINARY80) f64.d;
63 l128 = l64 * l64;
64 lx = l3 = (BINARY80) C256.w[3] * l64 * l128;
65 l2 = (BINARY80) C256.w[2] * l128;
66 lx = FENCE (lx + l2);
67 l1 = (BINARY80) C256.w[1] * l64;
68 lx = FENCE (lx + l1);
69 l0 = (BINARY80) C256.w[0];
70 lx = FENCE (lx + l0);
71 // sqrt(C256)
72 lS = SQRT80 (lx);
74 // get coefficient
75 // 2^(-64)
76 fm64.i = 0x1f800000;
77 lm64 = (BINARY80) fm64.d;
78 CS.w[1] = (UINT64) (lS * lm64);
79 CS.w[0] = (UINT64) (lS - (BINARY80) CS.w[1] * l64);
81 ///////////////////////////////////////
82 // CAUTION!
83 // little endian code only
84 // add solution for big endian
85 //////////////////////////////////////
86 lSH = lS;
87 *((UINT64 *) & lSH) &= 0xffffffff00000000ull;
89 // correction for C256 rounding
90 lCl = FENCE (l3 - lx);
91 lCl = FENCE (lCl + l2);
92 lCl = FENCE (lCl + l1);
93 lCl = FENCE (lCl + l0);
95 lSL = lS - lSH;
97 //////////////////////////////////////////
98 // Watch for compiler re-ordering
100 /////////////////////////////////////////
101 // C256-S^2
102 lxL = FENCE (lx - lSH * lSH);
103 lp = lSH * lSL;
104 lp += lp;
105 lxL = FENCE (lxL - lp);
106 lSL *= lSL;
107 lxL = FENCE (lxL - lSL);
108 lCl += lxL;
110 // correction term
111 lE = lCl / (lS + lS);
113 // get low part of coefficient
114 X = CS.w[0];
115 if (lCl >= 0) {
116 SE = (SINT64) (lE);
117 CS.w[0] += SE;
118 if (CS.w[0] < X)
119 CS.w[1]++;
120 } else {
121 SE = (SINT64) (-lE);
122 CS.w[0] -= SE;
123 if (CS.w[0] > X)
124 CS.w[1]--;
127 pCS->w[0] = CS.w[0];
128 pCS->w[1] = CS.w[1];
131 #else
133 extern double sqrt (double);
135 __BID_INLINE__ UINT64
136 short_sqrt128 (UINT128 A10) {
137 UINT256 ARS, ARS0, AE0, AE, S;
139 UINT64 MY, ES, CY;
140 double lx, l64;
141 int_double f64, ly;
142 int ey, k;
144 // 2^64
145 f64.i = 0x43f0000000000000ull;
146 l64 = f64.d;
147 lx = (double) A10.w[1] * l64 + (double) A10.w[0];
148 ly.d = 1.0 / sqrt (lx);
150 MY = (ly.i & 0x000fffffffffffffull) | 0x0010000000000000ull;
151 ey = 0x3ff - (ly.i >> 52);
153 // A10*RS^2
154 __mul_64x128_to_192 (ARS0, MY, A10);
155 __mul_64x192_to_256 (ARS, MY, ARS0);
157 // shr by 2*ey+40, to get a 64-bit value
158 k = (ey << 1) + 104 - 64;
159 if (k >= 128) {
160 if (k > 128)
161 ES = (ARS.w[2] >> (k - 128)) | (ARS.w[3] << (192 - k));
162 else
163 ES = ARS.w[2];
164 } else {
165 if (k >= 64) {
166 ARS.w[0] = ARS.w[1];
167 ARS.w[1] = ARS.w[2];
168 k -= 64;
170 if (k) {
171 __shr_128 (ARS, ARS, k);
173 ES = ARS.w[0];
176 ES = ((SINT64) ES) >> 1;
178 if (((SINT64) ES) < 0) {
179 ES = -ES;
181 // A*RS*eps (scaled by 2^64)
182 __mul_64x192_to_256 (AE0, ES, ARS0);
184 AE.w[0] = AE0.w[1];
185 AE.w[1] = AE0.w[2];
186 AE.w[2] = AE0.w[3];
188 __add_carry_out (S.w[0], CY, ARS0.w[0], AE.w[0]);
189 __add_carry_in_out (S.w[1], CY, ARS0.w[1], AE.w[1], CY);
190 S.w[2] = ARS0.w[2] + AE.w[2] + CY;
191 } else {
192 // A*RS*eps (scaled by 2^64)
193 __mul_64x192_to_256 (AE0, ES, ARS0);
195 AE.w[0] = AE0.w[1];
196 AE.w[1] = AE0.w[2];
197 AE.w[2] = AE0.w[3];
199 __sub_borrow_out (S.w[0], CY, ARS0.w[0], AE.w[0]);
200 __sub_borrow_in_out (S.w[1], CY, ARS0.w[1], AE.w[1], CY);
201 S.w[2] = ARS0.w[2] - AE.w[2] - CY;
204 k = ey + 51;
206 if (k >= 64) {
207 if (k >= 128) {
208 S.w[0] = S.w[2];
209 S.w[1] = 0;
210 k -= 128;
211 } else {
212 S.w[0] = S.w[1];
213 S.w[1] = S.w[2];
215 k -= 64;
217 if (k) {
218 __shr_128 (S, S, k);
222 return (UINT64) ((S.w[0] + 1) >> 1);
228 __BID_INLINE__ void
229 long_sqrt128 (UINT128 * pCS, UINT256 C256) {
230 UINT512 ARS0, ARS;
231 UINT256 ARS00, AE, AE2, S;
232 UINT128 ES, ES2, ARS1;
233 UINT64 ES32, CY, MY;
234 double l64, l128, lx, l2, l1, l0;
235 int_double f64, ly;
236 int ey, k, k2;
238 // 2^64
239 f64.i = 0x43f0000000000000ull;
240 l64 = f64.d;
242 l128 = l64 * l64;
243 lx = (double) C256.w[3] * l64 * l128;
244 l2 = (double) C256.w[2] * l128;
245 lx = FENCE (lx + l2);
246 l1 = (double) C256.w[1] * l64;
247 lx = FENCE (lx + l1);
248 l0 = (double) C256.w[0];
249 lx = FENCE (lx + l0);
250 // sqrt(C256)
251 ly.d = 1.0 / sqrt (lx);
253 MY = (ly.i & 0x000fffffffffffffull) | 0x0010000000000000ull;
254 ey = 0x3ff - (ly.i >> 52);
256 // A10*RS^2, scaled by 2^(2*ey+104)
257 __mul_64x256_to_320 (ARS0, MY, C256);
258 __mul_64x320_to_384 (ARS, MY, ARS0);
260 // shr by k=(2*ey+104)-128
261 // expect k is in the range (192, 256) if result in [10^33, 10^34)
262 // apply an additional signed shift by 1 at the same time (to get eps=eps0/2)
263 k = (ey << 1) + 104 - 128 - 192;
264 k2 = 64 - k;
265 ES.w[0] = (ARS.w[3] >> (k + 1)) | (ARS.w[4] << (k2 - 1));
266 ES.w[1] = (ARS.w[4] >> k) | (ARS.w[5] << k2);
267 ES.w[1] = ((SINT64) ES.w[1]) >> 1;
269 // A*RS >> 192 (for error term computation)
270 ARS1.w[0] = ARS0.w[3];
271 ARS1.w[1] = ARS0.w[4];
273 // A*RS>>64
274 ARS00.w[0] = ARS0.w[1];
275 ARS00.w[1] = ARS0.w[2];
276 ARS00.w[2] = ARS0.w[3];
277 ARS00.w[3] = ARS0.w[4];
279 if (((SINT64) ES.w[1]) < 0) {
280 ES.w[0] = -ES.w[0];
281 ES.w[1] = -ES.w[1];
282 if (ES.w[0])
283 ES.w[1]--;
285 // A*RS*eps
286 __mul_128x128_to_256 (AE, ES, ARS1);
288 __add_carry_out (S.w[0], CY, ARS00.w[0], AE.w[0]);
289 __add_carry_in_out (S.w[1], CY, ARS00.w[1], AE.w[1], CY);
290 __add_carry_in_out (S.w[2], CY, ARS00.w[2], AE.w[2], CY);
291 S.w[3] = ARS00.w[3] + AE.w[3] + CY;
292 } else {
293 // A*RS*eps
294 __mul_128x128_to_256 (AE, ES, ARS1);
296 __sub_borrow_out (S.w[0], CY, ARS00.w[0], AE.w[0]);
297 __sub_borrow_in_out (S.w[1], CY, ARS00.w[1], AE.w[1], CY);
298 __sub_borrow_in_out (S.w[2], CY, ARS00.w[2], AE.w[2], CY);
299 S.w[3] = ARS00.w[3] - AE.w[3] - CY;
302 // 3/2*eps^2, scaled by 2^128
303 ES32 = ES.w[1] + (ES.w[1] >> 1);
304 __mul_64x64_to_128 (ES2, ES32, ES.w[1]);
305 // A*RS*3/2*eps^2
306 __mul_128x128_to_256 (AE2, ES2, ARS1);
308 // result, scaled by 2^(ey+52-64)
309 __add_carry_out (S.w[0], CY, S.w[0], AE2.w[0]);
310 __add_carry_in_out (S.w[1], CY, S.w[1], AE2.w[1], CY);
311 __add_carry_in_out (S.w[2], CY, S.w[2], AE2.w[2], CY);
312 S.w[3] = S.w[3] + AE2.w[3] + CY;
314 // k in (0, 64)
315 k = ey + 51 - 128;
316 k2 = 64 - k;
317 S.w[0] = (S.w[1] >> k) | (S.w[2] << k2);
318 S.w[1] = (S.w[2] >> k) | (S.w[3] << k2);
320 // round to nearest
321 S.w[0]++;
322 if (!S.w[0])
323 S.w[1]++;
325 pCS->w[0] = (S.w[1] << 63) | (S.w[0] >> 1);
326 pCS->w[1] = S.w[1] >> 1;
330 #endif
331 #endif