[Powerpc] Tune/optimize powerpc{32,64}/power7/memchr.S.
[glibc.git] / stdlib / gmp-impl.h
blob7e94624859dd701795aa9e5ab3ff66e9fb2874ac
1 /* Include file for internal GNU MP types and definitions.
3 Copyright (C) 1991-2012 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, see
19 <http://www.gnu.org/licenses/>. */
21 /* When using gcc, make sure to use its builtin alloca. */
22 #if ! defined (alloca) && defined (__GNUC__)
23 #define alloca __builtin_alloca
24 #define HAVE_ALLOCA
25 #endif
27 /* When using cc, do whatever necessary to allow use of alloca. For many
28 machines, this means including alloca.h. IBM's compilers need a #pragma
29 in "each module that needs to use alloca". */
30 #if ! defined (alloca)
31 /* We need lots of variants for MIPS, to cover all versions and perversions
32 of OSes for MIPS. */
33 #if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
34 || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
35 || defined (__alpha) || defined (__sparc) || defined (sparc) \
36 || defined (__ksr__)
37 #include <alloca.h>
38 #define HAVE_ALLOCA
39 #endif
40 #if defined (_IBMR2)
41 #pragma alloca
42 #define HAVE_ALLOCA
43 #endif
44 #if defined (__DECC)
45 #define alloca(x) __ALLOCA(x)
46 #define HAVE_ALLOCA
47 #endif
48 #endif
50 #if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
51 #include "stack-alloc.h"
52 #else
53 #define TMP_DECL(m)
54 #define TMP_ALLOC(x) alloca(x)
55 #define TMP_MARK(m)
56 #define TMP_FREE(m)
57 #endif
59 #ifndef NULL
60 #define NULL ((void *) 0)
61 #endif
63 #if ! defined (__GNUC__)
64 #define inline /* Empty */
65 #endif
67 #define ABS(x) (x >= 0 ? x : -x)
68 #ifndef MIN
69 #define MIN(l,o) ((l) < (o) ? (l) : (o))
70 #endif
71 #ifndef MAX
72 #define MAX(h,i) ((h) > (i) ? (h) : (i))
73 #endif
75 /* Field access macros. */
76 #define SIZ(x) ((x)->_mp_size)
77 #define ABSIZ(x) ABS (SIZ (x))
78 #define PTR(x) ((x)->_mp_d)
79 #define EXP(x) ((x)->_mp_exp)
80 #define PREC(x) ((x)->_mp_prec)
81 #define ALLOC(x) ((x)->_mp_alloc)
83 #include "gmp-mparam.h"
84 /* #include "longlong.h" */
86 #if defined (__STDC__) || defined (__cplusplus)
87 void *malloc (size_t);
88 void *realloc (void *, size_t);
89 void free (void *);
91 extern void * (*_mp_allocate_func) (size_t);
92 extern void * (*_mp_reallocate_func) (void *, size_t, size_t);
93 extern void (*_mp_free_func) (void *, size_t);
95 void *_mp_default_allocate (size_t);
96 void *_mp_default_reallocate (void *, size_t, size_t);
97 void _mp_default_free (void *, size_t);
99 #else
101 #define const /* Empty */
102 #define signed /* Empty */
104 void *malloc ();
105 void *realloc ();
106 void free ();
108 extern void * (*_mp_allocate_func) ();
109 extern void * (*_mp_reallocate_func) ();
110 extern void (*_mp_free_func) ();
112 void *_mp_default_allocate ();
113 void *_mp_default_reallocate ();
114 void _mp_default_free ();
115 #endif
117 /* Copy NLIMBS *limbs* from SRC to DST. */
118 #define MPN_COPY_INCR(DST, SRC, NLIMBS) \
119 do { \
120 mp_size_t __i; \
121 for (__i = 0; __i < (NLIMBS); __i++) \
122 (DST)[__i] = (SRC)[__i]; \
123 } while (0)
124 #define MPN_COPY_DECR(DST, SRC, NLIMBS) \
125 do { \
126 mp_size_t __i; \
127 for (__i = (NLIMBS) - 1; __i >= 0; __i--) \
128 (DST)[__i] = (SRC)[__i]; \
129 } while (0)
130 #define MPN_COPY MPN_COPY_INCR
132 /* Zero NLIMBS *limbs* AT DST. */
133 #define MPN_ZERO(DST, NLIMBS) \
134 do { \
135 mp_size_t __i; \
136 for (__i = 0; __i < (NLIMBS); __i++) \
137 (DST)[__i] = 0; \
138 } while (0)
140 #define MPN_NORMALIZE(DST, NLIMBS) \
141 do { \
142 while (NLIMBS > 0) \
144 if ((DST)[(NLIMBS) - 1] != 0) \
145 break; \
146 NLIMBS--; \
148 } while (0)
149 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
150 do { \
151 while (1) \
153 if ((DST)[(NLIMBS) - 1] != 0) \
154 break; \
155 NLIMBS--; \
157 } while (0)
159 /* Initialize the MP_INT X with space for NLIMBS limbs.
160 X should be a temporary variable, and it will be automatically
161 cleared out when the running function returns.
162 We use __x here to make it possible to accept both mpz_ptr and mpz_t
163 arguments. */
164 #define MPZ_TMP_INIT(X, NLIMBS) \
165 do { \
166 mpz_ptr __x = (X); \
167 __x->_mp_alloc = (NLIMBS); \
168 __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
169 } while (0)
171 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
172 do { \
173 if ((size) < KARATSUBA_THRESHOLD) \
174 impn_mul_n_basecase (prodp, up, vp, size); \
175 else \
176 impn_mul_n (prodp, up, vp, size, tspace); \
177 } while (0);
178 #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
179 do { \
180 if ((size) < KARATSUBA_THRESHOLD) \
181 impn_sqr_n_basecase (prodp, up, size); \
182 else \
183 impn_sqr_n (prodp, up, size, tspace); \
184 } while (0);
186 /* Structure for conversion between internal binary format and
187 strings in base 2..36. */
188 struct bases
190 /* Number of digits in the conversion base that always fits in an mp_limb_t.
191 For example, for base 10 on a machine where a mp_limb_t has 32 bits this
192 is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
193 int chars_per_limb;
195 /* log(2)/log(conversion_base) */
196 float chars_per_bit_exactly;
198 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
199 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
200 i.e. the number of bits used to represent each digit in the base. */
201 mp_limb_t big_base;
203 /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
204 fixed-point number. Instead of dividing by big_base an application can
205 choose to multiply by big_base_inverted. */
206 mp_limb_t big_base_inverted;
209 extern const struct bases __mp_bases[];
210 extern mp_size_t __gmp_default_fp_limb_precision;
212 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
213 limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
214 If this would yield overflow, DI should be the largest possible number
215 (i.e., only ones). For correct operation, the most significant bit of D
216 has to be set. Put the quotient in Q and the remainder in R. */
217 #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
218 do { \
219 mp_limb_t _ql __attribute__ ((unused)); \
220 mp_limb_t _q, _r; \
221 mp_limb_t _xh, _xl; \
222 umul_ppmm (_q, _ql, (nh), (di)); \
223 _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */\
224 umul_ppmm (_xh, _xl, _q, (d)); \
225 sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
226 if (_xh != 0) \
228 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
229 _q += 1; \
230 if (_xh != 0) \
232 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
233 _q += 1; \
236 if (_r >= (d)) \
238 _r -= (d); \
239 _q += 1; \
241 (r) = _r; \
242 (q) = _q; \
243 } while (0)
244 /* Like udiv_qrnnd_preinv, but for any value D. DNORM is D shifted left
245 so that its most significant bit is set. LGUP is ceil(log2(D)). */
246 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
247 do { \
248 mp_limb_t n2, n10, n1, nadj, q1; \
249 mp_limb_t _xh, _xl; \
250 n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
251 n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
252 n1 = ((mp_limb_signed_t) n10 >> (BITS_PER_MP_LIMB - 1)); \
253 nadj = n10 + (n1 & (dnorm)); \
254 umul_ppmm (_xh, _xl, di, n2 - n1); \
255 add_ssaaaa (_xh, _xl, _xh, _xl, 0, nadj); \
256 q1 = ~(n2 + _xh); \
257 umul_ppmm (_xh, _xl, q1, d); \
258 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
259 _xh -= (d); \
260 (r) = _xl + ((d) & _xh); \
261 (q) = _xh - q1; \
262 } while (0)
263 /* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
264 version to use. */
265 #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
266 do { \
267 mp_limb_t n2, n10, n1, nadj, q1; \
268 mp_limb_t _xh, _xl; \
269 n2 = (nh); \
270 n10 = (nl); \
271 n1 = ((mp_limb_signed_t) n10 >> (BITS_PER_MP_LIMB - 1)); \
272 nadj = n10 + (n1 & (d)); \
273 umul_ppmm (_xh, _xl, di, n2 - n1); \
274 add_ssaaaa (_xh, _xl, _xh, _xl, 0, nadj); \
275 q1 = ~(n2 + _xh); \
276 umul_ppmm (_xh, _xl, q1, d); \
277 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
278 _xh -= (d); \
279 (r) = _xl + ((d) & _xh); \
280 (q) = _xh - q1; \
281 } while (0)
283 #if defined (__GNUC__)
284 /* Define stuff for longlong.h. */
285 typedef unsigned int UQItype __attribute__ ((mode (QI)));
286 typedef int SItype __attribute__ ((mode (SI)));
287 typedef unsigned int USItype __attribute__ ((mode (SI)));
288 typedef int DItype __attribute__ ((mode (DI)));
289 typedef unsigned int UDItype __attribute__ ((mode (DI)));
290 #else
291 typedef unsigned char UQItype;
292 typedef long SItype;
293 typedef unsigned long USItype;
294 #endif
296 typedef mp_limb_t UWtype;
297 typedef unsigned int UHWtype;
298 #define W_TYPE_SIZE BITS_PER_MP_LIMB
300 /* Internal mpn calls */
301 #define impn_mul_n_basecase __MPN(impn_mul_n_basecase)
302 #define impn_mul_n __MPN(impn_mul_n)
303 #define impn_sqr_n_basecase __MPN(impn_sqr_n_basecase)
304 #define impn_sqr_n __MPN(impn_sqr_n)
306 #ifndef _PROTO
307 #if defined (__STDC__) || defined (__cplusplus)
308 #define _PROTO(x) x
309 #else
310 #define _PROTO(x) ()
311 #endif
312 #endif
314 /* Prototypes for internal mpn calls. */
315 extern void impn_mul_n_basecase _PROTO ((mp_ptr prodp, mp_srcptr up,
316 mp_srcptr vp, mp_size_t size));
317 extern void impn_mul_n _PROTO ((mp_ptr prodp, mp_srcptr up, mp_srcptr vp,
318 mp_size_t size, mp_ptr tspace));
319 extern void impn_sqr_n_basecase _PROTO ((mp_ptr prodp, mp_srcptr up,
320 mp_size_t size));
321 extern void impn_sqr_n _PROTO ((mp_ptr prodp, mp_srcptr up, mp_size_t size,
322 mp_ptr tspace));
326 #ifndef IEEE_DOUBLE_BIG_ENDIAN
327 #define IEEE_DOUBLE_BIG_ENDIAN 1
328 #endif
330 #ifndef IEEE_DOUBLE_MIXED_ENDIAN
331 #define IEEE_DOUBLE_MIXED_ENDIAN 0
332 #endif
334 #if IEEE_DOUBLE_MIXED_ENDIAN
335 union ieee_double_extract
337 struct
339 unsigned int manh:20;
340 unsigned int exp:11;
341 unsigned int sig:1;
342 unsigned int manl:32;
343 } s;
344 double d;
346 #else
347 #if IEEE_DOUBLE_BIG_ENDIAN
348 union ieee_double_extract
350 struct
352 unsigned int sig:1;
353 unsigned int exp:11;
354 unsigned int manh:20;
355 unsigned int manl:32;
356 } s;
357 double d;
359 #else
360 union ieee_double_extract
362 struct
364 unsigned int manl:32;
365 unsigned int manh:20;
366 unsigned int exp:11;
367 unsigned int sig:1;
368 } s;
369 double d;
371 #endif
372 #endif