1 dnl Alpha mpn_divexact_by3c
-- mpn division by
3, expecting no remainder.
3 dnl Copyright
2004, 2005, 2009 Free Software Foundation
, Inc.
5 dnl
This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
8 dnl it under the terms of
either:
10 dnl
* the GNU Lesser General
Public License as published by the Free
11 dnl Software Foundation
; either version 3 of the License, or (at your
12 dnl option
) any later version.
16 dnl
* the GNU General
Public License as published by the Free Software
17 dnl Foundation
; either version 2 of the License, or (at your option) any
20 dnl
or both
in parallel
, as here.
22 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
23 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
24 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
27 dnl You should have received copies of the GNU General
Public License
and the
28 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
29 dnl see
https://www.gnu.
org/licenses
/.
31 include(`..
/config.m4
')
36 C EV6: 6.3 Note that mpn_bdiv_dbm1c is faster
39 C * Remove the unops, they benefit just ev6, which no longer uses this file.
40 C * Try prefetch for destination, using lds.
41 C * Improve feed-in code, by moving initial mulq earlier; make initial load
42 C to u0/u0 to save some copying.
43 C * Combine u0 and u2, u1 and u3.
54 .quad 0xAAAAAAAAAAAAAAAB
55 .quad 0x5555555555555555
56 .quad 0xAAAAAAAAAAAAAAAA
59 define(`xAAAAAAAAAAAAAAAB', `r20
')
60 define(`x5555555555555555', `r21
')
61 define(`xAAAAAAAAAAAAAAAA', `r22
')
62 define(`u0', `r0
') define(`u1', `r1
')
63 define(`u2', `r2
') define(`u3', `r3
')
64 define(`l0', `r25
') define(`x', `r8
')
65 define(`q0', `r4
') define(`q1', `r5
')
66 define(`p6', `r6
') define(`p7', `r7
')
67 define(`t0', `r23
') define(`t1', `r24
')
68 define(`cymask',`r28
')
71 PROLOGUE(mpn_divexact_by3c,gp)
73 ldq r28, 0(up) C load first limb early
75 C Put magic constants in registers
77 ldq xAAAAAAAAAAAAAAAB, 0(r0)
78 ldq x5555555555555555, 8(r0)
79 ldq xAAAAAAAAAAAAAAAA, 16(r0)
81 C Compute initial l0 value
85 and p6, x5555555555555555, l0
86 cmovne p7, xAAAAAAAAAAAAAAAA, l0
88 C Feed-in depending on (n mod 4)
100 mulq r28, xAAAAAAAAAAAAAAAB, q0
107 mulq r28, xAAAAAAAAAAAAAAAB, q1
111 $Lb01: lda rp, -8(rp)
112 mulq r28, xAAAAAAAAAAAAAAAB, q0
120 mulq r28, xAAAAAAAAAAAAAAAB, q1
127 cmpult u3, cy, cy C L0
128 mulq u0, xAAAAAAAAAAAAAAAB, q0 C U1
135 cmpult x5555555555555555, x, p6 C U0
137 cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
143 and cymask, x5555555555555555, l0 C U1
145 and t0, x5555555555555555, t0
147 and t1, x5555555555555555, t1
158 cmpult u0, cy, cy C L0
159 mulq u1, xAAAAAAAAAAAAAAAB, q1 C U1
166 cmpult x5555555555555555, x, p6 C U0
168 cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
174 and cymask, x5555555555555555, l0 C U1
176 and t0, x5555555555555555, t0
178 and t1, x5555555555555555, t1
189 cmpult u1, cy, cy C L0
190 mulq u2, xAAAAAAAAAAAAAAAB, q0 C U1
197 cmpult x5555555555555555, x, p6 C U0
199 cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
205 and cymask, x5555555555555555, l0 C U1
207 and t0, x5555555555555555, t0
209 and t1, x5555555555555555, t1
220 cmpult u2, cy, cy C L0
221 mulq u3, xAAAAAAAAAAAAAAAB, q1 C U1
228 cmpult x5555555555555555, x, p6 C U0
230 cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
231 lda n, -4(n) C L1 bookkeeping
236 and cymask, x5555555555555555, l0 C U1
238 and t0, x5555555555555555, t0
240 and t1, x5555555555555555, t1
250 ldl r31, 256(up) C prefetch
254 C *** MAIN LOOP END ***
257 cmpult u3, cy, cy C L0
258 mulq u0, xAAAAAAAAAAAAAAAB, q0 C U1
265 cmpult x5555555555555555, x, p6 C U0
267 cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
273 and cymask, x5555555555555555, l0 C U1
275 and t0, x5555555555555555, t0
277 and t1, x5555555555555555, t1
287 cmpult u0, cy, cy C L0
289 cmpult x5555555555555555, x, p6 C U0
290 cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
299 C This is useful for playing with various schedules.
300 C Expand as: one(0)one(1)one(2)one(3)
303 cmpult `$
'eval(($1+3)%4), cy, cy C L0
304 mulq `$'$1, xAAAAAAAAAAAAAAAB
, `$
'eval(4+$1%2) C U1
305 ldq `$'eval
(($1+1)%4), eval
($1*8+16)(up
) C L1
306 addq `$
'eval(4+($1+1)%2), l0, x C U0
311 cmpult x5555555555555555, x, p6 C U0
313 cmpult xAAAAAAAAAAAAAAAA, x, p7 C U1
319 and cymask, x5555555555555555, l0 C U1
321 and t0, x5555555555555555, t0
323 and t1, x5555555555555555, t1
330 stq x, eval($1*8)(rp) C L1