32bit memcmp/strcmp/strncmp optimized for SSSE3/SSS4.2
[glibc.git] / stdlib / gmp-impl.h
blob85e419d36f99829e60503eb9219a80e57370aa5a
1 /* Include file for internal GNU MP types and definitions.
3 Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20 MA 02111-1307, USA. */
22 /* When using gcc, make sure to use its builtin alloca. */
23 #if ! defined (alloca) && defined (__GNUC__)
24 #define alloca __builtin_alloca
25 #define HAVE_ALLOCA
26 #endif
28 /* When using cc, do whatever necessary to allow use of alloca. For many
29 machines, this means including alloca.h. IBM's compilers need a #pragma
30 in "each module that needs to use alloca". */
31 #if ! defined (alloca)
32 /* We need lots of variants for MIPS, to cover all versions and perversions
33 of OSes for MIPS. */
34 #if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
35 || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
36 || defined (__alpha) || defined (__sparc) || defined (sparc) \
37 || defined (__ksr__)
38 #include <alloca.h>
39 #define HAVE_ALLOCA
40 #endif
41 #if defined (_IBMR2)
42 #pragma alloca
43 #define HAVE_ALLOCA
44 #endif
45 #if defined (__DECC)
46 #define alloca(x) __ALLOCA(x)
47 #define HAVE_ALLOCA
48 #endif
49 #endif
51 #if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
52 #include "stack-alloc.h"
53 #else
54 #define TMP_DECL(m)
55 #define TMP_ALLOC(x) alloca(x)
56 #define TMP_MARK(m)
57 #define TMP_FREE(m)
58 #endif
60 #ifndef NULL
61 #define NULL ((void *) 0)
62 #endif
64 #if ! defined (__GNUC__)
65 #define inline /* Empty */
66 #endif
68 #define ABS(x) (x >= 0 ? x : -x)
69 #ifndef MIN
70 #define MIN(l,o) ((l) < (o) ? (l) : (o))
71 #endif
72 #ifndef MAX
73 #define MAX(h,i) ((h) > (i) ? (h) : (i))
74 #endif
76 /* Field access macros. */
77 #define SIZ(x) ((x)->_mp_size)
78 #define ABSIZ(x) ABS (SIZ (x))
79 #define PTR(x) ((x)->_mp_d)
80 #define EXP(x) ((x)->_mp_exp)
81 #define PREC(x) ((x)->_mp_prec)
82 #define ALLOC(x) ((x)->_mp_alloc)
84 #include "gmp-mparam.h"
85 /* #include "longlong.h" */
87 #if defined (__STDC__) || defined (__cplusplus)
88 void *malloc (size_t);
89 void *realloc (void *, size_t);
90 void free (void *);
92 extern void * (*_mp_allocate_func) (size_t);
93 extern void * (*_mp_reallocate_func) (void *, size_t, size_t);
94 extern void (*_mp_free_func) (void *, size_t);
96 void *_mp_default_allocate (size_t);
97 void *_mp_default_reallocate (void *, size_t, size_t);
98 void _mp_default_free (void *, size_t);
100 #else
102 #define const /* Empty */
103 #define signed /* Empty */
105 void *malloc ();
106 void *realloc ();
107 void free ();
109 extern void * (*_mp_allocate_func) ();
110 extern void * (*_mp_reallocate_func) ();
111 extern void (*_mp_free_func) ();
113 void *_mp_default_allocate ();
114 void *_mp_default_reallocate ();
115 void _mp_default_free ();
116 #endif
118 /* Copy NLIMBS *limbs* from SRC to DST. */
119 #define MPN_COPY_INCR(DST, SRC, NLIMBS) \
120 do { \
121 mp_size_t __i; \
122 for (__i = 0; __i < (NLIMBS); __i++) \
123 (DST)[__i] = (SRC)[__i]; \
124 } while (0)
125 #define MPN_COPY_DECR(DST, SRC, NLIMBS) \
126 do { \
127 mp_size_t __i; \
128 for (__i = (NLIMBS) - 1; __i >= 0; __i--) \
129 (DST)[__i] = (SRC)[__i]; \
130 } while (0)
131 #define MPN_COPY MPN_COPY_INCR
133 /* Zero NLIMBS *limbs* AT DST. */
134 #define MPN_ZERO(DST, NLIMBS) \
135 do { \
136 mp_size_t __i; \
137 for (__i = 0; __i < (NLIMBS); __i++) \
138 (DST)[__i] = 0; \
139 } while (0)
141 #define MPN_NORMALIZE(DST, NLIMBS) \
142 do { \
143 while (NLIMBS > 0) \
145 if ((DST)[(NLIMBS) - 1] != 0) \
146 break; \
147 NLIMBS--; \
149 } while (0)
150 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
151 do { \
152 while (1) \
154 if ((DST)[(NLIMBS) - 1] != 0) \
155 break; \
156 NLIMBS--; \
158 } while (0)
160 /* Initialize the MP_INT X with space for NLIMBS limbs.
161 X should be a temporary variable, and it will be automatically
162 cleared out when the running function returns.
163 We use __x here to make it possible to accept both mpz_ptr and mpz_t
164 arguments. */
165 #define MPZ_TMP_INIT(X, NLIMBS) \
166 do { \
167 mpz_ptr __x = (X); \
168 __x->_mp_alloc = (NLIMBS); \
169 __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
170 } while (0)
172 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
173 do { \
174 if ((size) < KARATSUBA_THRESHOLD) \
175 impn_mul_n_basecase (prodp, up, vp, size); \
176 else \
177 impn_mul_n (prodp, up, vp, size, tspace); \
178 } while (0);
179 #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
180 do { \
181 if ((size) < KARATSUBA_THRESHOLD) \
182 impn_sqr_n_basecase (prodp, up, size); \
183 else \
184 impn_sqr_n (prodp, up, size, tspace); \
185 } while (0);
187 /* Structure for conversion between internal binary format and
188 strings in base 2..36. */
189 struct bases
191 /* Number of digits in the conversion base that always fits in an mp_limb_t.
192 For example, for base 10 on a machine where a mp_limb_t has 32 bits this
193 is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
194 int chars_per_limb;
196 /* log(2)/log(conversion_base) */
197 float chars_per_bit_exactly;
199 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
200 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
201 i.e. the number of bits used to represent each digit in the base. */
202 mp_limb_t big_base;
204 /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
205 fixed-point number. Instead of dividing by big_base an application can
206 choose to multiply by big_base_inverted. */
207 mp_limb_t big_base_inverted;
210 extern const struct bases __mp_bases[];
211 extern mp_size_t __gmp_default_fp_limb_precision;
213 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
214 limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
215 If this would yield overflow, DI should be the largest possible number
216 (i.e., only ones). For correct operation, the most significant bit of D
217 has to be set. Put the quotient in Q and the remainder in R. */
218 #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
219 do { \
220 mp_limb_t _q, _ql, _r; \
221 mp_limb_t _xh, _xl; \
222 umul_ppmm (_q, _ql, (nh), (di)); \
223 _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */\
224 umul_ppmm (_xh, _xl, _q, (d)); \
225 sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
226 if (_xh != 0) \
228 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
229 _q += 1; \
230 if (_xh != 0) \
232 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
233 _q += 1; \
236 if (_r >= (d)) \
238 _r -= (d); \
239 _q += 1; \
241 (r) = _r; \
242 (q) = _q; \
243 } while (0)
244 /* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left
245 so that its most significant bit is set. LGUP is ceil(log2(D)). */
246 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
247 do { \
248 mp_limb_t n2, n10, n1, nadj, q1; \
249 mp_limb_t _xh, _xl; \
250 n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
251 n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
252 n1 = ((mp_limb_signed_t) n10 >> (BITS_PER_MP_LIMB - 1)); \
253 nadj = n10 + (n1 & (dnorm)); \
254 umul_ppmm (_xh, _xl, di, n2 - n1); \
255 add_ssaaaa (_xh, _xl, _xh, _xl, 0, nadj); \
256 q1 = ~(n2 + _xh); \
257 umul_ppmm (_xh, _xl, q1, d); \
258 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
259 _xh -= (d); \
260 (r) = _xl + ((d) & _xh); \
261 (q) = _xh - q1; \
262 } while (0)
263 /* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
264 version to use. */
265 #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
266 do { \
267 mp_limb_t n2, n10, n1, nadj, q1; \
268 mp_limb_t _xh, _xl; \
269 n2 = (nh); \
270 n10 = (nl); \
271 n1 = ((mp_limb_signed_t) n10 >> (BITS_PER_MP_LIMB - 1)); \
272 nadj = n10 + (n1 & (d)); \
273 umul_ppmm (_xh, _xl, di, n2 - n1); \
274 add_ssaaaa (_xh, _xl, _xh, _xl, 0, nadj); \
275 q1 = ~(n2 + _xh); \
276 umul_ppmm (_xh, _xl, q1, d); \
277 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
278 _xh -= (d); \
279 (r) = _xl + ((d) & _xh); \
280 (q) = _xh - q1; \
281 } while (0)
283 #if defined (__GNUC__)
284 /* Define stuff for longlong.h. */
285 typedef unsigned int UQItype __attribute__ ((mode (QI)));
286 typedef int SItype __attribute__ ((mode (SI)));
287 typedef unsigned int USItype __attribute__ ((mode (SI)));
288 typedef int DItype __attribute__ ((mode (DI)));
289 typedef unsigned int UDItype __attribute__ ((mode (DI)));
290 #else
291 typedef unsigned char UQItype;
292 typedef long SItype;
293 typedef unsigned long USItype;
294 #endif
296 typedef mp_limb_t UWtype;
297 typedef unsigned int UHWtype;
298 #define W_TYPE_SIZE BITS_PER_MP_LIMB
300 /* Internal mpn calls */
301 #define impn_mul_n_basecase __MPN(impn_mul_n_basecase)
302 #define impn_mul_n __MPN(impn_mul_n)
303 #define impn_sqr_n_basecase __MPN(impn_sqr_n_basecase)
304 #define impn_sqr_n __MPN(impn_sqr_n)
306 #ifndef _PROTO
307 #if defined (__STDC__) || defined (__cplusplus)
308 #define _PROTO(x) x
309 #else
310 #define _PROTO(x) ()
311 #endif
312 #endif
314 /* Prototypes for internal mpn calls. */
315 extern void impn_mul_n_basecase _PROTO ((mp_ptr prodp, mp_srcptr up,
316 mp_srcptr vp, mp_size_t size));
317 extern void impn_mul_n _PROTO ((mp_ptr prodp, mp_srcptr up, mp_srcptr vp,
318 mp_size_t size, mp_ptr tspace));
319 extern void impn_sqr_n_basecase _PROTO ((mp_ptr prodp, mp_srcptr up,
320 mp_size_t size));
321 extern void impn_sqr_n _PROTO ((mp_ptr prodp, mp_srcptr up, mp_size_t size,
322 mp_ptr tspace));
326 #ifndef IEEE_DOUBLE_BIG_ENDIAN
327 #define IEEE_DOUBLE_BIG_ENDIAN 1
328 #endif
330 #ifndef IEEE_DOUBLE_MIXED_ENDIAN
331 #define IEEE_DOUBLE_MIXED_ENDIAN 0
332 #endif
334 #if IEEE_DOUBLE_MIXED_ENDIAN
335 union ieee_double_extract
337 struct
339 unsigned int manh:20;
340 unsigned int exp:11;
341 unsigned int sig:1;
342 unsigned int manl:32;
343 } s;
344 double d;
346 #else
347 #if IEEE_DOUBLE_BIG_ENDIAN
348 union ieee_double_extract
350 struct
352 unsigned int sig:1;
353 unsigned int exp:11;
354 unsigned int manh:20;
355 unsigned int manl:32;
356 } s;
357 double d;
359 #else
360 union ieee_double_extract
362 struct
364 unsigned int manl:32;
365 unsigned int manh:20;
366 unsigned int exp:11;
367 unsigned int sig:1;
368 } s;
369 double d;
371 #endif
372 #endif