beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / bdiv_q_1.asm
blob02eacbe6a8b8325cea99c41ef37f268e97b47691
1 dnl AMD64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by
2 dnl 1-limb divisor, returning quotient only.
4 dnl Copyright 2001, 2002, 2004-2006, 2009, 2011, 2012 Free Software
5 dnl Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
36 C cycles/limb
37 C AMD K8,K9 10
38 C AMD K10 10
39 C Intel P4 33
40 C Intel core2 13.25
41 C Intel corei 14
42 C Intel atom 42
43 C VIA nano ?
46 C INPUT PARAMETERS
47 define(`rp', `%rdi')
48 define(`up', `%rsi')
49 define(`n', `%rdx')
50 define(`d', `%rcx')
51 define(`di', `%r8') C just mpn_pi1_bdiv_q_1
52 define(`ncnt', `%r9') C just mpn_pi1_bdiv_q_1
54 ABI_SUPPORT(DOS64)
55 ABI_SUPPORT(STD64)
57 ASM_START()
58 TEXT
59 ALIGN(16)
60 PROLOGUE(mpn_bdiv_q_1)
61 FUNC_ENTRY(4)
62 push %rbx
64 mov %rcx, %rax
65 xor R32(%rcx), R32(%rcx) C ncnt count
66 mov %rdx, %r10
68 bt $0, R32(%rax)
69 jnc L(evn) C skip bsfq unless divisor is even
71 L(odd): mov %rax, %rbx
72 shr R32(%rax)
73 and $127, R32(%rax) C d/2, 7 bits
75 LEA( binvert_limb_table, %rdx)
77 movzbl (%rdx,%rax), R32(%rax) C inv 8 bits
79 mov %rbx, %r11 C d without twos
81 lea (%rax,%rax), R32(%rdx) C 2*inv
82 imul R32(%rax), R32(%rax) C inv*inv
83 imul R32(%rbx), R32(%rax) C inv*inv*d
84 sub R32(%rax), R32(%rdx) C inv = 2*inv - inv*inv*d, 16 bits
86 lea (%rdx,%rdx), R32(%rax) C 2*inv
87 imul R32(%rdx), R32(%rdx) C inv*inv
88 imul R32(%rbx), R32(%rdx) C inv*inv*d
89 sub R32(%rdx), R32(%rax) C inv = 2*inv - inv*inv*d, 32 bits
91 lea (%rax,%rax), %r8 C 2*inv
92 imul %rax, %rax C inv*inv
93 imul %rbx, %rax C inv*inv*d
94 sub %rax, %r8 C inv = 2*inv - inv*inv*d, 64 bits
96 jmp L(com)
98 L(evn): bsf %rax, %rcx
99 shr R8(%rcx), %rax
100 jmp L(odd)
101 EPILOGUE()
103 PROLOGUE(mpn_pi1_bdiv_q_1)
104 FUNC_ENTRY(4)
105 IFDOS(` mov 56(%rsp), %r8 ')
106 IFDOS(` mov 64(%rsp), %r9 ')
107 push %rbx
109 mov %rcx, %r11 C d
110 mov %rdx, %r10 C n
111 mov %r9, %rcx C ncnt
113 L(com): mov (up), %rax C up[0]
115 dec %r10
116 jz L(one)
118 mov 8(up), %rdx C up[1]
119 lea (up,%r10,8), up C up end
120 lea (rp,%r10,8), rp C rp end
121 neg %r10 C -n
123 shrd R8(%rcx), %rdx, %rax
125 xor R32(%rbx), R32(%rbx)
126 jmp L(ent)
128 ALIGN(8)
129 L(top):
130 C rax q
131 C rbx carry bit, 0 or 1
132 C rcx ncnt
133 C rdx
134 C r10 counter, limbs, negative
136 mul %r11 C carry limb in rdx
137 mov (up,%r10,8), %rax
138 mov 8(up,%r10,8), %r9
139 shrd R8(%rcx), %r9, %rax
141 sub %rbx, %rax C apply carry bit
142 setc R8(%rbx)
143 sub %rdx, %rax C apply carry limb
144 adc $0, %rbx
145 L(ent): imul %r8, %rax
146 mov %rax, (rp,%r10,8)
147 inc %r10
148 jnz L(top)
150 mul %r11 C carry limb in rdx
151 mov (up), %rax C up high limb
152 shr R8(%rcx), %rax
153 sub %rbx, %rax C apply carry bit
154 sub %rdx, %rax C apply carry limb
155 imul %r8, %rax
156 mov %rax, (rp)
157 pop %rbx
158 FUNC_EXIT()
161 L(one): shr R8(%rcx), %rax
162 imul %r8, %rax
163 mov %rax, (rp)
164 pop %rbx
165 FUNC_EXIT()
167 EPILOGUE()