1 dnl Alpha mpn_bdiv_dbm1c.
3 dnl Copyright
2008 Free Software Foundation
, Inc.
5 dnl
This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
8 dnl it under the terms of
either:
10 dnl
* the GNU Lesser General
Public License as published by the Free
11 dnl Software Foundation
; either version 3 of the License, or (at your
12 dnl option
) any later version.
16 dnl
* the GNU General
Public License as published by the Free Software
17 dnl Foundation
; either version 2 of the License, or (at your option) any
20 dnl
or both
in parallel
, as here.
22 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
23 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
24 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
27 dnl You should have received copies of the GNU General
Public License
and the
28 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
29 dnl see
https://www.gnu.
org/licenses
/.
31 include(`..
/config.m4
')
39 C * Try less unrolling, 2-way should give the same performance.
40 C * Optimize feed-in and wind-down code, for speed, and perhaps further for
42 C * This runs optimally given the algorithm, r8 is on a 3 operation recurrency
43 C path. We have not tried very hard to find a better algorithm. Perhaps
44 C it would be a good task for the GNU superoptimizer.
55 PROLOGUE(mpn_bdiv_dbm1c)
72 mulq r24, r19, r5 C U1
73 umulh r24, r19, r21 C U1
75 umulh r2, r19, r22 C U1
77 umulh r3, r19, r23 C U1
81 L(gt3): ldq r0, 24(r17)
82 mulq r24, r19, r5 C U1
83 umulh r24, r19, r21 C U1
86 umulh r2, r19, r22 C U1
89 umulh r3, r19, r23 C U1
102 mulq r24, r19, r6 C U1
103 umulh r24, r19, r22 C U1
104 mulq r3, r19, r7 C U1
105 umulh r3, r19, r23 C U1
109 L(gt2): ldq r0, 16(r17)
111 mulq r24, r19, r6 C U1
112 umulh r24, r19, r22 C U1
114 mulq r3, r19, r7 C U1
115 umulh r3, r19, r23 C U1
119 mulq r0, r19, r4 C U1
120 umulh r0, r19, r20 C U1
124 mulq r1, r19, r5 C U1
127 L(gt6): ldq r0, 0(r17)
128 mulq r1, r19, r5 C U1
132 L(b1): bgt r18, L(gt1)
134 mulq r24, r19, r7 C U1
135 umulh r24, r19, r23 C U1
139 L(gt1): ldq r0, 8(r17)
142 mulq r24, r19, r7 C U1
143 umulh r24, r19, r23 C U1
147 mulq r0, r19, r4 C U1
148 umulh r0, r19, r20 C U1
152 mulq r1, r19, r5 C U1
153 umulh r1, r19, r21 C U1
154 mulq r2, r19, r6 C U1
157 L(gt5): ldq r0, 0(r17)
158 mulq r1, r19, r5 C U1
159 umulh r1, r19, r21 C U1
161 mulq r2, r19, r6 C U1
165 L(b0): ldq r1, 8(r17)
170 mulq r24, r19, r4 C U1
171 umulh r24, r19, r20 C U1
174 mulq r1, r19, r5 C U1
175 umulh r1, r19, r21 C U1
176 mulq r2, r19, r6 C U1
177 umulh r2, r19, r22 C U1
178 mulq r3, r19, r7 C U1
181 L(gt4): ldq r0, 0(r17)
182 mulq r1, r19, r5 C U1
183 umulh r1, r19, r21 C U1
185 mulq r2, r19, r6 C U1
186 umulh r2, r19, r22 C U1
188 mulq r3, r19, r7 C U1
191 C *** MAIN LOOP START ***
193 L(top): mulq r0, r19, r4 C U1
195 L(L3): umulh r0, r19, r20 C U1
202 mulq r1, r19, r5 C U1
204 L(L2): umulh r1, r19, r21 C U1
211 mulq r2, r19, r6 C U1
213 L(L1): umulh r2, r19, r22 C U1
220 mulq r3, r19, r7 C U1
222 L(L0): umulh r3, r19, r23 C U1
233 C *** MAIN LOOP END ***
235 mulq r0, r19, r4 C U1
237 L(cj7): umulh r0, r19, r20 C U1
242 mulq r1, r19, r5 C U1
244 L(cj6): umulh r1, r19, r21 C U1
249 mulq r2, r19, r6 C U1
251 L(cj5): umulh r2, r19, r22 C U1
256 mulq r3, r19, r7 C U1
258 L(cj4): umulh r3, r19, r23 C U1
264 L(cj3): cmpult r8, r5, r28
269 L(cj2): cmpult r8, r6, r28
274 L(cj1): cmpult r8, r7, r28