1 dnl AMD64 mpn_bdiv_q_1
, mpn_pi1_bdiv_q_1
-- schoolbook Hensel division by
2 dnl
1-limb divisor
, returning quotient only.
4 dnl Copyright
2001, 2002, 2004-2006, 2009, 2011, 2012 Free Software
7 dnl
This file is part of the GNU MP Library.
9 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
10 dnl it under the terms of
either:
12 dnl
* the GNU Lesser General
Public License as published by the Free
13 dnl Software Foundation
; either version 3 of the License, or (at your
14 dnl option
) any later version.
18 dnl
* the GNU General
Public License as published by the Free Software
19 dnl Foundation
; either version 2 of the License, or (at your option) any
22 dnl
or both
in parallel
, as here.
24 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
25 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
26 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
29 dnl You should have received copies of the GNU General
Public License
and the
30 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
31 dnl see
https://www.gnu.
org/licenses
/.
33 include(`..
/config.m4
')
51 define(`di', `
%r8
') C just mpn_pi1_bdiv_q_1
52 define(`ncnt', `
%r9
') C just mpn_pi1_bdiv_q_1
60 PROLOGUE(mpn_bdiv_q_1)
65 xor R32(%rcx), R32(%rcx) C ncnt count
69 jnc L(evn) C skip bsfq unless divisor is even
71 L(odd): mov %rax, %rbx
73 and $127, R32(%rax) C d/2, 7 bits
75 LEA( binvert_limb_table, %rdx)
77 movzbl (%rdx,%rax), R32(%rax) C inv 8 bits
79 mov %rbx, %r11 C d without twos
81 lea (%rax,%rax), R32(%rdx) C 2*inv
82 imul R32(%rax), R32(%rax) C inv*inv
83 imul R32(%rbx), R32(%rax) C inv*inv*d
84 sub R32(%rax), R32(%rdx) C inv = 2*inv - inv*inv*d, 16 bits
86 lea (%rdx,%rdx), R32(%rax) C 2*inv
87 imul R32(%rdx), R32(%rdx) C inv*inv
88 imul R32(%rbx), R32(%rdx) C inv*inv*d
89 sub R32(%rdx), R32(%rax) C inv = 2*inv - inv*inv*d, 32 bits
91 lea (%rax,%rax), %r8 C 2*inv
92 imul %rax, %rax C inv*inv
93 imul %rbx, %rax C inv*inv*d
94 sub %rax, %r8 C inv = 2*inv - inv*inv*d, 64 bits
98 L(evn): bsf %rax, %rcx
103 PROLOGUE(mpn_pi1_bdiv_q_1)
105 IFDOS(` mov 56(%rsp), %r8 ')
106 IFDOS
(`
mov 64(%rsp
), %r9
')
113 L(com): mov (up), %rax C up[0]
118 mov 8(up), %rdx C up[1]
119 lea (up,%r10,8), up C up end
120 lea (rp,%r10,8), rp C rp end
123 shrd R8(%rcx), %rdx, %rax
125 xor R32(%rbx), R32(%rbx)
131 C rbx carry bit, 0 or 1
134 C r10 counter, limbs, negative
136 mul %r11 C carry limb in rdx
137 mov (up,%r10,8), %rax
138 mov 8(up,%r10,8), %r9
139 shrd R8(%rcx), %r9, %rax
141 sub %rbx, %rax C apply carry bit
143 sub %rdx, %rax C apply carry limb
145 L(ent): imul %r8, %rax
146 mov %rax, (rp,%r10,8)
150 mul %r11 C carry limb in rdx
151 mov (up), %rax C up high limb
153 sub %rbx, %rax C apply carry bit
154 sub %rdx, %rax C apply carry limb
161 L(one): shr R8(%rcx), %rax