beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / k7 / mul_1.asm
blob755cd2ed50ca4a216fd6bf18cf6eb0e0d42f074f
1 dnl AMD K7 mpn_mul_1.
3 dnl Copyright 1999-2002, 2005, 2008 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C cycles/limb
35 C P5
36 C P6 model 0-8,10-12)
37 C P6 model 9 (Banias)
38 C P6 model 13 (Dothan)
39 C P4 model 0 (Willamette)
40 C P4 model 1 (?)
41 C P4 model 2 (Northwood)
42 C P4 model 3 (Prescott)
43 C P4 model 4 (Nocona)
44 C AMD K6
45 C AMD K7 3.25
46 C AMD K8
48 C TODO
49 C * Improve feed-in and wind-down code. We beat the old code for all n != 1,
50 C but we might be able to do even better.
51 C * The feed-in code for mul_1c is crude.
53 ASM_START()
54 TEXT
55 ALIGN(16)
56 PROLOGUE(mpn_mul_1c)
57 add $-16, %esp
58 mov %ebp, (%esp)
59 mov %ebx, 4(%esp)
60 mov %esi, 8(%esp)
61 mov %edi, 12(%esp)
63 mov 20(%esp), %edi
64 mov 24(%esp), %esi
65 mov 28(%esp), %ebp
66 mov 32(%esp), %ecx
67 mov %ebp, %ebx
68 shr $2, %ebp
69 mov %ebp, 28(%esp)
70 mov (%esi), %eax
71 and $3, %ebx
72 jz L(c0)
73 cmp $2, %ebx
74 mov 36(%esp), %ebx
75 jz L(c2)
76 jg L(c3)
78 L(c1): lea -4(%edi), %edi
79 mul %ecx
80 test %ebp, %ebp
81 jnz 1f
82 add %ebx, %eax
83 mov %eax, 4(%edi)
84 mov %edx, %eax
85 adc %ebp, %eax
86 jmp L(rt)
87 1: add %eax, %ebx
88 mov $0, %ebp
89 adc %edx, %ebp
90 mov 4(%esi), %eax
91 jmp L(1)
93 L(c2): lea 4(%esi), %esi
94 mul %ecx
95 test %ebp, %ebp
96 mov %ebx, %ebp
97 jnz 2f
98 add %eax, %ebp
99 mov $0, %ebx
100 adc %edx, %ebx
101 mov (%esi), %eax
102 jmp L(cj2)
103 2: add %eax, %ebp
104 mov $0, %ebx
105 adc %edx, %ebx
106 mov (%esi), %eax
107 jmp L(2)
109 L(c3): lea 8(%esi), %esi
110 lea -12(%edi), %edi
111 mul %ecx
112 add %eax, %ebx
113 mov $0, %ebp
114 adc %edx, %ebp
115 mov -4(%esi), %eax
116 incl 28(%esp)
117 jmp L(3)
119 L(c0): mov 36(%esp), %ebx
120 lea -4(%esi), %esi
121 lea -8(%edi), %edi
122 mul %ecx
123 mov %ebx, %ebp
124 add %eax, %ebp
125 mov $0, %ebx
126 adc %edx, %ebx
127 mov 8(%esi), %eax
128 jmp L(0)
130 EPILOGUE()
131 ALIGN(16)
132 PROLOGUE(mpn_mul_1)
133 add $-16, %esp
134 mov %ebp, (%esp)
135 mov %ebx, 4(%esp)
136 mov %esi, 8(%esp)
137 mov %edi, 12(%esp)
139 mov 20(%esp), %edi
140 mov 24(%esp), %esi
141 mov 28(%esp), %ebp
142 mov 32(%esp), %ecx
143 mov %ebp, %ebx
144 shr $2, %ebp
145 mov %ebp, 28(%esp)
146 mov (%esi), %eax
147 and $3, %ebx
148 jz L(b0)
149 cmp $2, %ebx
150 jz L(b2)
151 jg L(b3)
153 L(b1): lea -4(%edi), %edi
154 mul %ecx
155 test %ebp, %ebp
156 jnz L(gt1)
157 mov %eax, 4(%edi)
158 mov %edx, %eax
159 jmp L(rt)
160 L(gt1): mov %eax, %ebx
161 mov %edx, %ebp
162 mov 4(%esi), %eax
163 jmp L(1)
165 L(b2): lea 4(%esi), %esi
166 mul %ecx
167 test %ebp, %ebp
168 mov %eax, %ebp
169 mov %edx, %ebx
170 mov (%esi), %eax
171 jnz L(2)
172 jmp L(cj2)
174 L(b3): lea 8(%esi), %esi
175 lea -12(%edi), %edi
176 mul %ecx
177 mov %eax, %ebx
178 mov %edx, %ebp
179 mov -4(%esi), %eax
180 incl 28(%esp)
181 jmp L(3)
183 L(b0): lea -4(%esi), %esi
184 lea -8(%edi), %edi
185 mul %ecx
186 mov %eax, %ebp
187 mov %edx, %ebx
188 mov 8(%esi), %eax
189 jmp L(0)
191 ALIGN(16)
192 L(top): mov $0, %ebx
193 adc %edx, %ebx
194 L(2): mul %ecx
195 add %eax, %ebx
196 mov %ebp, 0(%edi)
197 mov 4(%esi), %eax
198 mov $0, %ebp
199 adc %edx, %ebp
200 L(1): mul %ecx
201 add %eax, %ebp
202 mov 8(%esi), %eax
203 mov %ebx, 4(%edi)
204 mov $0, %ebx
205 adc %edx, %ebx
206 L(0): mov %ebp, 8(%edi)
207 mul %ecx
208 add %eax, %ebx
209 mov 12(%esi), %eax
210 lea 16(%esi), %esi
211 mov $0, %ebp
212 adc %edx, %ebp
213 L(3): mov %ebx, 12(%edi)
214 mul %ecx
215 lea 16(%edi), %edi
216 add %eax, %ebp
217 decl 28(%esp)
218 mov 0(%esi), %eax
219 jnz L(top)
221 L(end): mov $0, %ebx
222 adc %edx, %ebx
223 L(cj2): mul %ecx
224 add %eax, %ebx
225 mov %ebp, (%edi)
226 L(cj1): mov %ebx, 4(%edi)
227 adc $0, %edx
228 mov %edx, %eax
230 L(rt): mov (%esp), %ebp
231 mov 4(%esp), %ebx
232 mov 8(%esp), %esi
233 mov 12(%esp), %edi
234 add $16, %esp
236 EPILOGUE()
237 ASM_END()