beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / divrem_2.asm
blob4c38ad0acb92eeee7297f7ededc0286b23a25860
1 dnl x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
3 dnl Copyright 2007, 2008 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C norm frac
35 C 486
36 C P5
37 C P6-13 29.2
38 C P6-15 *26
39 C K6
40 C K7 22
41 C K8 *19
42 C P4-f1
43 C P4-f2 *65
44 C P4-f3
45 C P4-f4 *72
47 C A star means numbers not updated for the latest version of the code.
50 C TODO
51 C * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0.
52 C * The loop has not been carefully tuned. We should at the very least do
53 C some local insn swapping.
54 C * The code outside the main loop is what gcc generated. Clean up!
55 C * Clean up stack slot usage.
57 C INPUT PARAMETERS
58 C qp
59 C fn
60 C up_param
61 C un_param
62 C dp
65 C eax ebx ecx edx esi edi ebp
66 C cnt qp
68 ASM_START()
69 TEXT
70 ALIGN(16)
71 PROLOGUE(mpn_divrem_2)
72 push %ebp
73 push %edi
74 push %esi
75 push %ebx
76 sub $36, %esp
77 mov 68(%esp), %ecx C un
78 mov 72(%esp), %esi C dp
79 movl $0, 32(%esp)
80 lea 0(,%ecx,4), %edi
81 add 64(%esp), %edi C up
82 mov (%esi), %ebx
83 mov 4(%esi), %eax
84 mov %ebx, 20(%esp)
85 sub $12, %edi
86 mov %eax, 24(%esp)
87 mov %edi, 12(%esp)
88 mov 8(%edi), %ebx
89 mov 4(%edi), %ebp
90 cmp %eax, %ebx
91 jb L(8)
92 seta %dl
93 cmp 20(%esp), %ebp
94 setae %al
95 orb %dl, %al C "orb" form to placate Sun tools
96 jne L(35)
97 L(8):
98 mov 60(%esp), %esi C fn
99 lea -3(%esi,%ecx), %edi
100 test %edi, %edi
101 js L(9)
102 mov 24(%esp), %edx
103 mov $-1, %esi
104 mov %esi, %eax
105 mov %esi, %ecx
106 not %edx
107 divl 24(%esp)
108 mov %eax, %esi
109 imul 24(%esp), %eax
110 mov %eax, (%esp)
111 mov %esi, %eax
112 mull 20(%esp)
113 mov (%esp), %eax
114 add 20(%esp), %eax
115 adc $0, %ecx
116 add %eax, %edx
117 adc $0, %ecx
118 mov %ecx, %eax
119 js L(32)
120 L(36): dec %esi
121 sub 24(%esp), %edx
122 sbb $0, %eax
123 jns L(36)
124 L(32):
125 mov %esi, 16(%esp) C di
126 mov %edi, %ecx C un
127 mov 12(%esp), %esi C up
128 mov 24(%esp), %eax
129 neg %eax
130 mov %eax, 4(%esp) C -d1
131 ALIGN(16)
134 C eax ebx ecx edx esi edi ebp 0 4 8 12 16 20 24 28 32 56 60
135 C n2 un up n1 q0 -d1 di d0 d1 msl qp fn
137 L(loop):
138 mov 16(%esp), %eax C di
139 mul %ebx
140 add %ebp, %eax
141 mov %eax, (%esp) C q0
142 adc %ebx, %edx
143 mov %edx, %edi C q
144 imul 4(%esp), %edx
145 mov 20(%esp), %eax
146 lea (%edx, %ebp), %ebx C n1 -= ...
147 mul %edi
148 xor %ebp, %ebp
149 cmp 60(%esp), %ecx
150 jl L(19)
151 mov (%esi), %ebp
152 sub $4, %esi
153 L(19): sub 20(%esp), %ebp
154 sbb 24(%esp), %ebx
155 sub %eax, %ebp
156 sbb %edx, %ebx
157 mov 20(%esp), %eax C d1
158 inc %edi
159 xor %edx, %edx
160 cmp (%esp), %ebx
161 adc $-1, %edx C mask
162 add %edx, %edi C q--
163 and %edx, %eax C d0 or 0
164 and 24(%esp), %edx C d1 or 0
165 add %eax, %ebp
166 adc %edx, %ebx
167 cmp 24(%esp), %ebx
168 jae L(fix)
169 L(bck): mov 56(%esp), %edx
170 mov %edi, (%edx, %ecx, 4)
171 dec %ecx
172 jns L(loop)
174 L(9): mov 64(%esp), %esi C up
175 mov %ebp, (%esi)
176 mov %ebx, 4(%esi)
177 mov 32(%esp), %eax
178 add $36, %esp
179 pop %ebx
180 pop %esi
181 pop %edi
182 pop %ebp
185 L(fix): seta %dl
186 cmp 20(%esp), %ebp
187 setae %al
188 orb %dl, %al C "orb" form to placate Sun tools
189 je L(bck)
190 inc %edi
191 sub 20(%esp), %ebp
192 sbb 24(%esp), %ebx
193 jmp L(bck)
195 L(35): sub 20(%esp), %ebp
196 sbb 24(%esp), %ebx
197 movl $1, 32(%esp)
198 jmp L(8)
199 EPILOGUE()