beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / sparc64 / sparc64.h
blob09fc16d46a4417c4733815d6b4e7edbd7e7d8dac
1 /* UltraSPARC 64 support macros.
3 THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
4 CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5 FUTURE GNU MP RELEASES.
7 Copyright 2003 Free Software Foundation, Inc.
9 This file is part of the GNU MP Library.
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of either:
14 * the GNU Lesser General Public License as published by the Free
15 Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
20 * the GNU General Public License as published by the Free Software
21 Foundation; either version 2 of the License, or (at your option) any
22 later version.
24 or both in parallel, as here.
26 The GNU MP Library is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 for more details.
31 You should have received copies of the GNU General Public License and the
32 GNU Lesser General Public License along with the GNU MP Library. If not,
33 see https://www.gnu.org/licenses/. */
36 #define LOW32(x) ((x) & 0xFFFFFFFF)
37 #define HIGH32(x) ((x) >> 32)
40 /* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].
41 Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the
42 effect of swapping the two halves in this case. */
43 #if HAVE_LIMB_BIG_ENDIAN
44 #define HALF_ENDIAN_ADJ(i) (1 - (((i) & 1) << 1)) /* +1 even, -1 odd */
45 #endif
46 #if HAVE_LIMB_LITTLE_ENDIAN
47 #define HALF_ENDIAN_ADJ(i) 0 /* no adjust */
48 #endif
49 #ifndef HALF_ENDIAN_ADJ
50 Error, error, unknown limb endianness;
51 #endif
54 /* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb
55 of that product is equal to l. dh and dl are the 32-bit halves of d.
57 |-----high----||----low-----|
58 +------+------+
59 | | ph = qh * dh
60 +------+------+
61 +------+------+
62 | | pm1 = ql * dh
63 +------+------+
64 +------+------+
65 | | pm2 = qh * dl
66 +------+------+
67 +------+------+
68 | | pl = ql * dl (not calculated)
69 +------+------+
71 Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)
72 + HIGH(pl) == HIGH(l). The only thing we need from those product parts
73 is whether they produce a carry into the high.
75 pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only
76 time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >
77 HIGH(l). pl is never actually calculated. */
79 #define umul_ppmm_lowequal(h, q, d, dh, dl, l) \
80 do { \
81 mp_limb_t ql, qh, ph, pm1, pm2, pm_l; \
82 ASSERT (dh == HIGH32(d)); \
83 ASSERT (dl == LOW32(d)); \
84 ASSERT (q*d == l); \
86 ql = LOW32 (q); \
87 qh = HIGH32 (q); \
89 pm1 = ql * dh; \
90 pm2 = qh * dl; \
91 ph = qh * dh; \
93 pm_l = LOW32 (pm1) + LOW32 (pm2); \
95 (h) = ph + HIGH32 (pm1) + HIGH32 (pm2) \
96 + HIGH32 (pm_l) + ((pm_l << 32) > l); \
98 ASSERT_HIGH_PRODUCT (h, q, d); \
99 } while (0)
102 /* Set h to the high of q*d, assuming the low limb of that product is equal
103 to l, and that d fits in 32-bits.
105 |-----high----||----low-----|
106 +------+------+
107 | | pm = qh * dl
108 +------+------+
109 +------+------+
110 | | pl = ql * dl (not calculated)
111 +------+------+
113 Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only
114 time there's a carry from that sum is when LOW(pm) > HIGH(l). There's no
115 need to calculate pl to determine this. */
117 #define umul_ppmm_half_lowequal(h, q, d, l) \
118 do { \
119 mp_limb_t pm; \
120 ASSERT (q*d == l); \
121 ASSERT (HIGH32(d) == 0); \
123 pm = HIGH32(q) * d; \
124 (h) = HIGH32(pm) + ((pm << 32) > l); \
125 ASSERT_HIGH_PRODUCT (h, q, d); \
126 } while (0)
129 /* check that h is the high limb of x*y */
130 #if WANT_ASSERT
131 #define ASSERT_HIGH_PRODUCT(h, x, y) \
132 do { \
133 mp_limb_t want_h, dummy; \
134 umul_ppmm (want_h, dummy, x, y); \
135 ASSERT (h == want_h); \
136 } while (0)
137 #else
138 #define ASSERT_HIGH_PRODUCT(h, q, d) \
139 do { } while (0)
140 #endif
143 /* Multiply u anv v, where v < 2^32. */
144 #define umul_ppmm_s(w1, w0, u, v) \
145 do { \
146 UWtype __x0, __x2; \
147 UWtype __ul, __vl, __uh; \
148 UWtype __u = (u), __v = (v); \
150 __ul = __ll_lowpart (__u); \
151 __uh = __ll_highpart (__u); \
152 __vl = __ll_lowpart (__v); \
154 __x0 = (UWtype) __ul * __vl; \
155 __x2 = (UWtype) __uh * __vl; \
157 (w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2; \
158 (w0) = (__x2 << W_TYPE_SIZE/2) + __x0; \
159 } while (0)
161 /* Count the leading zeros on a limb, but assuming it fits in 32 bits.
162 The count returned will be in the range 32 to 63.
163 This is the 32-bit generic C count_leading_zeros from longlong.h. */
164 #define count_leading_zeros_32(count, x) \
165 do { \
166 mp_limb_t __xr = (x); \
167 unsigned __a; \
168 ASSERT ((x) != 0); \
169 ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF)); \
170 __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1) \
171 : (__xr < ((UWtype) 1 << 24) ? 16 + 1 : 24 + 1); \
173 (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \
174 } while (0)
177 /* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits
178 32 bits and is normalized (high bit set). */
179 #define invert_half_limb(inv, d) \
180 do { \
181 mp_limb_t _n; \
182 ASSERT ((d) <= 0xFFFFFFFF); \
183 ASSERT ((d) & 0x80000000); \
184 _n = (((mp_limb_t) -(d)) << 32) - 1; \
185 (inv) = (mp_limb_t) (unsigned) (_n / (d)); \
186 } while (0)
189 /* Divide nh:nl by d, setting q to the quotient and r to the remainder.
190 q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,
191 dinv_limb is similarly a 32-bit inverse but in an mp_limb_t. */
193 #define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb) \
194 do { \
195 unsigned _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q; \
196 mp_limb_t _n, _x; \
197 ASSERT (d_limb <= 0xFFFFFFFF); \
198 ASSERT (dinv_limb <= 0xFFFFFFFF); \
199 ASSERT (d_limb & 0x80000000); \
200 ASSERT (nh < d_limb); \
201 _n10 = (nl); \
202 _n2 = (nh); \
203 _n1 = (int) _n10 >> 31; \
204 _nadj = _n10 + (_n1 & d_limb); \
205 _x = dinv_limb * (_n2 - _n1) + _nadj; \
206 _q11n = ~(_n2 + HIGH32 (_x)); /* -q1-1 */ \
207 _n = ((mp_limb_t) _n2 << 32) + _n10; \
208 _x = _n + d_limb * _q11n; /* n-q1*d-d */ \
209 _xh = HIGH32 (_x) - d_limb; /* high(n-q1*d-d) */ \
210 ASSERT (_xh == 0 || _xh == ~0); \
211 _r = _x + (d_limb & _xh); /* addback */ \
212 _q = _xh - _q11n; /* q1+1-addback */ \
213 ASSERT (_r < d_limb); \
214 ASSERT (d_limb * _q + _r == _n); \
215 (r) = _r; \
216 (q) = _q; \
217 } while (0)