beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / divrem_2.asm
blob296c9b673c332b92e86a5eb3cc871b348271c736
1 dnl x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
3 dnl Copyright 2007, 2008, 2010, 2014 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C cycles/limb best
35 C AMD K8,K9 18
36 C AMD K10 18
37 C AMD bull
38 C AMD pile
39 C AMD bobcat
40 C AMD jaguar
41 C Intel P4 68
42 C Intel core 34
43 C Intel NHM 30.25
44 C Intel SBR 21.3
45 C Intel IBR 21.4
46 C Intel HWL 20.6
47 C Intel BWL
48 C Intel atom 73
49 C VIA nano 33
52 C INPUT PARAMETERS
53 define(`qp', `%rdi')
54 define(`fn', `%rsi')
55 define(`up_param', `%rdx')
56 define(`un_param', `%rcx')
57 define(`dp', `%r8')
59 ABI_SUPPORT(DOS64)
60 ABI_SUPPORT(STD64)
62 ASM_START()
63 TEXT
64 ALIGN(16)
65 PROLOGUE(mpn_divrem_2)
66 FUNC_ENTRY(4)
67 IFDOS(` mov 56(%rsp), %r8 ')
68 push %r15
69 push %r14
70 push %r13
71 push %r12
72 lea -24(%rdx,%rcx,8), %r12 C r12 = &up[un-1]
73 mov %rsi, %r13
74 push %rbp
75 mov %rdi, %rbp
76 push %rbx
77 mov 8(%r8), %r11 C d1
78 mov 16(%r12), %rbx
79 mov (%r8), %r8 C d0
80 mov 8(%r12), %r10
82 xor R32(%r15), R32(%r15)
83 cmp %rbx, %r11
84 ja L(2)
85 setb %dl
86 cmp %r10, %r8
87 setbe %al
88 orb %al, %dl C "orb" form to placate Sun tools
89 je L(2)
90 inc R32(%r15)
91 sub %r8, %r10
92 sbb %r11, %rbx
93 L(2):
94 lea -3(%rcx,%r13), %r14 C un + fn - 3
95 test %r14, %r14
96 js L(end)
98 push %r8
99 push %r10
100 push %r11
101 IFSTD(` mov %r11, %rdi ')
102 IFDOS(` mov %r11, %rcx ')
103 ASSERT(nz, `test $15, %rsp')
104 CALL( mpn_invert_limb)
105 pop %r11
106 pop %r10
107 pop %r8
109 mov %r11, %rdx
110 mov %rax, %rdi
111 imul %rax, %rdx
112 mov %rdx, %r9
113 mul %r8
114 xor R32(%rcx), R32(%rcx)
115 add %r8, %r9
116 adc $-1, %rcx
117 add %rdx, %r9
118 adc $0, %rcx
119 js 2f
120 1: dec %rdi
121 sub %r11, %r9
122 sbb $0, %rcx
123 jns 1b
126 lea (%rbp,%r14,8), %rbp
127 mov %r11, %rsi
128 neg %rsi C -d1
130 C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
131 C n2 un -d1 dinv qp d0 q0 d1 up fn msl
133 ALIGN(16)
134 L(top): mov %rdi, %rax C di ncp
135 mul %rbx C 0, 17
136 mov %r10, %rcx C
137 add %rax, %rcx C 4
138 adc %rbx, %rdx C 5
139 mov %rdx, %r9 C q 6
140 imul %rsi, %rdx C 6
141 mov %r8, %rax C ncp
142 lea (%rdx, %r10), %rbx C n1 -= ... 10
143 xor R32(%r10), R32(%r10) C
144 mul %r9 C 7
145 cmp %r14, %r13 C
146 jg L(19) C
147 mov (%r12), %r10 C
148 sub $8, %r12 C
149 L(19): sub %r8, %r10 C ncp
150 sbb %r11, %rbx C 11
151 sub %rax, %r10 C 11
152 sbb %rdx, %rbx C 12
153 xor R32(%rax), R32(%rax) C
154 xor R32(%rdx), R32(%rdx) C
155 cmp %rcx, %rbx C 13
156 cmovnc %r8, %rax C 14
157 cmovnc %r11, %rdx C 14
158 adc $0, %r9 C adjust q 14
160 add %rax, %r10 C 15
161 adc %rdx, %rbx C 16
162 cmp %r11, %rbx C
163 jae L(fix) C
164 L(bck): mov %r9, (%rbp) C
165 sub $8, %rbp C
166 dec %r14
167 jns L(top)
169 L(end): mov %r10, 8(%r12)
170 mov %rbx, 16(%r12)
171 pop %rbx
172 pop %rbp
173 pop %r12
174 pop %r13
175 pop %r14
176 mov %r15, %rax
177 pop %r15
178 FUNC_EXIT()
181 L(fix): seta %dl
182 cmp %r8, %r10
183 setae %al
184 orb %dl, %al C "orb" form to placate Sun tools
185 je L(bck)
186 inc %r9
187 sub %r8, %r10
188 sbb %r11, %rbx
189 jmp L(bck)
190 EPILOGUE()