3 Contributed to the GNU project by Niels Möller
5 THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10 Copyright 2013 Free Software Foundation, Inc.
12 This file is part of the GNU MP Library.
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
27 or both in parallel, as here.
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library. If not,
36 see https://www.gnu.org/licenses/. */
43 #error Nail bits not supported
46 #ifndef DIV_QR_1N_METHOD
47 #define DIV_QR_1N_METHOD 2
50 /* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */
52 #if defined (__GNUC__) && ! defined (NO_ASM)
54 #if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
55 #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
56 __asm__ ( "add %6, %k2\n\t" \
59 : "=r" (m), "=r" (s1), "=&r" (s0) \
60 : "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
61 "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
64 #if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
65 #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
66 __asm__ ( "add %6, %q2\n\t" \
69 : "=r" (m), "=r" (s1), "=&r" (s0) \
70 : "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
71 "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
74 #if defined (__sparc__) && W_TYPE_SIZE == 32
75 #define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
76 __asm__ ( "addcc %r5, %6, %2\n\t" \
77 "addxcc %r3, %4, %1\n\t" \
78 "subx %%g0, %%g0, %0" \
79 : "=r" (m), "=r" (sh), "=&r" (sl) \
80 : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \
84 #if defined (__sparc__) && W_TYPE_SIZE == 64
85 #define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
86 __asm__ ( "addcc %r5, %6, %2\n\t" \
87 "addccc %r7, %8, %%g0\n\t" \
88 "addccc %r3, %4, %1\n\t" \
90 "movcs %%xcc, -1, %0" \
91 : "=r" (m), "=r" (sh), "=&r" (sl) \
92 : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl), \
93 "rJ" ((al) >> 32), "rI" ((bl) >> 32) \
97 #define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
98 __asm__ ( "addcc %r5, %6, %2\n\t" \
99 "addxccc %r3, %4, %1\n\t" \
101 "movcs %%xcc, -1, %0" \
102 : "=r" (m), "=r" (sh), "=&r" (sl) \
103 : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \
108 #if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
109 /* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
110 processor running in 32-bit mode, since the carry flag then gets the 32-bit
112 #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
113 __asm__ ( "add%I6c %2, %5, %6\n\t" \
114 "adde %1, %3, %4\n\t" \
115 "subfe %0, %0, %0\n\t" \
117 : "=r" (m), "=r" (s1), "=&r" (s0) \
118 : "r" (a1), "r" (b1), "%r" (a0), "rI" (b0))
121 #if defined (__s390x__) && W_TYPE_SIZE == 64
122 #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
123 __asm__ ( "algr %2, %6\n\t" \
128 : "=r" (m), "=r" (s1), "=&r" (s0) \
129 : "1" ((UDItype)(a1)), "r" ((UDItype)(b1)), \
130 "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
133 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
134 #define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
135 __asm__ ( "adds %2, %5, %6\n\t" \
136 "adcs %1, %3, %4\n\t" \
139 : "=r" (m), "=r" (sh), "=&r" (sl) \
140 : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
142 #endif /* defined (__GNUC__) */
145 #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
147 UWtype __s0, __s1, __c0, __c1; \
148 __s0 = (a0) + (b0); \
149 __s1 = (a1) + (b1); \
150 __c0 = __s0 < (a0); \
151 __c1 = __s1 < (a1); \
153 __s1 = __s1 + __c0; \
155 (m) = - (__c1 + (__s1 < __c0)); \
159 #if DIV_QR_1N_METHOD == 1
161 /* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}.
162 Requires that uh < d. */
164 mpn_div_qr_1n_pi1 (mp_ptr qp
, mp_srcptr up
, mp_size_t n
, mp_limb_t uh
,
165 mp_limb_t d
, mp_limb_t dinv
)
169 ASSERT (d
& GMP_NUMB_HIGHBIT
);
170 ASSERT (MPN_SAME_OR_SEPARATE_P (qp
, up
, n
));
177 udiv_qrnnd_preinv (q
, uh
, uh
, ul
, d
, dinv
);
185 #elif DIV_QR_1N_METHOD == 2
188 mpn_div_qr_1n_pi1 (mp_ptr qp
, mp_srcptr up
, mp_size_t n
, mp_limb_t u1
,
189 mp_limb_t d
, mp_limb_t dinv
)
198 ASSERT (d
& GMP_LIMB_HIGHBIT
);
204 udiv_qrnnd_preinv (qp
[0], u1
, u1
, up
[0], d
, dinv
);
208 /* FIXME: Could be precomputed */
211 umul_ppmm (q1
, q0
, dinv
, u1
);
212 umul_ppmm (p1
, p0
, B2
, u1
);
215 u0
= up
[n
-1]; /* Early read, to allow qp == up. */
218 add_mssaaaa (u2
, u1
, u0
, u0
, up
[n
-2], p1
, p0
);
220 /* FIXME: Keep q1 in a variable between iterations, to reduce number
221 of memory accesses. */
222 for (j
= n
-2; j
-- > 0; )
226 /* Additions for the q update:
232 * | 1 | v | (conditional on u2)
234 * | 1 | (conditional on u0 + u2 B2 carry)
241 umul_ppmm (p1
, t
, u1
, dinv
);
242 add_ssaaaa (q2
, q1
, -u2
, u2
& dinv
, CNST_LIMB(0), u1
);
243 add_ssaaaa (q2
, q1
, q2
, q1
, CNST_LIMB(0), p1
);
244 add_ssaaaa (q2
, q1
, q2
, q1
, CNST_LIMB(0), q0
);
247 umul_ppmm (p1
, p0
, u1
, B2
);
248 ADDC_LIMB (cy
, u0
, u0
, u2
& B2
);
252 add_ssaaaa (q2
, q1
, q2
, q1
, CNST_LIMB(0), cy
);
254 MPN_INCR_U (qp
+j
+2, n
-j
-2, q2
);
256 add_mssaaaa (u2
, u1
, u0
, u0
, up
[j
], p1
, p0
);
266 udiv_qrnnd_preinv (t
, u0
, u1
, u0
, d
, dinv
);
267 add_ssaaaa (q1
, q0
, q1
, q0
, CNST_LIMB(0), t
);
269 MPN_INCR_U (qp
+1, n
-1, q1
);
276 #error Unknown DIV_QR_1N_METHOD