beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / alpha / mod_34lsub1.asm
blob1b03b637d8d5b035a96203cd9e50d8c3ca24f878
1 dnl Alpha mpn_mod_34lsub1.
3 dnl Copyright 2002 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C EV4: 4 (?)
35 C EV5: 2.67
36 C EV6: 1.67
39 dnl INPUT PARAMETERS
40 dnl up r16
41 dnl n r17
43 define(`l0',`r18')
44 define(`l1',`r19')
45 define(`l2',`r20')
46 define(`a0',`r21')
47 define(`a1',`r22')
48 define(`a2',`r23')
49 define(`c0',`r24')
50 define(`c1',`r5')
51 define(`c2',`r6')
53 ASM_START()
54 PROLOGUE(mpn_mod_34lsub1)
55 bis r31, r31, c0
56 bis r31, r31, c1
57 bis r31, r31, c2
59 lda r17, -3(r17)
60 bge r17, $L_3_or_more
61 bis r31, r31, a0
62 bis r31, r31, a1
63 bis r31, r31, a2
64 br r31, $L_012
66 $L_3_or_more:
67 ldq a0, 0(r16)
68 ldq a1, 8(r16)
69 ldq a2, 16(r16)
70 lda r16, 24(r16)
71 lda r17, -3(r17)
72 blt r17, $L_012
74 $L_6_or_more:
75 ldq l0, 0(r16)
76 ldq l1, 8(r16)
77 ldq l2, 16(r16)
78 addq l0, a0, a0
80 lda r16, 24(r16)
81 lda r17, -3(r17)
82 blt r17, $L_end
84 ALIGN(16)
85 C Main loop
86 $L_9_or_more:
87 $Loop: cmpult a0, l0, r0
88 ldq l0, 0(r16)
89 addq r0, c0, c0
90 addq l1, a1, a1
91 cmpult a1, l1, r0
92 ldq l1, 8(r16)
93 addq r0, c1, c1
94 addq l2, a2, a2
95 cmpult a2, l2, r0
96 ldq l2, 16(r16)
97 addq r0, c2, c2
98 addq l0, a0, a0
99 lda r16, 24(r16)
100 lda r17, -3(r17)
101 bge r17, $Loop
103 $L_end: cmpult a0, l0, r0
104 addq r0, c0, c0
105 addq l1, a1, a1
106 cmpult a1, l1, r0
107 addq r0, c1, c1
108 addq l2, a2, a2
109 cmpult a2, l2, r0
110 addq r0, c2, c2
112 C Handle the last (n mod 3) limbs
113 $L_012: lda r17, 2(r17)
114 blt r17, $L_0
115 ldq l0, 0(r16)
116 addq l0, a0, a0
117 cmpult a0, l0, r0
118 addq r0, c0, c0
119 beq r17, $L_0
120 ldq l1, 8(r16)
121 addq l1, a1, a1
122 cmpult a1, l1, r0
123 addq r0, c1, c1
125 C Align and sum our 3 main accumulators and 3 carry accumulators
126 $L_0: srl a0, 48, r2
127 srl a1, 32, r4
128 ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
129 ` insll a1, 2, r1', C (a1 & 0xffffffff) << 16
130 ` zapnot a1, 15, r25
131 sll r25, 16, r1')
132 zapnot a0, 63, r0 C a0 & 0xffffffffffff
133 srl a2, 16, a1
134 ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
135 ` inswl a2, 4, r3', C (a2 & 0xffff) << 32
136 ` zapnot a2, 3, r25
137 sll r25, 32, r3')
138 addq r1, r4, r1
139 addq r0, r2, r0
140 srl c0, 32, a2
141 ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
142 ` insll c0, 2, r4', C (c0 & 0xffffffff) << 16
143 ` zapnot c0, 15, r25
144 sll r25, 16, r4')
145 addq r0, r1, r0
146 addq r3, a1, r3
147 addq r0, r3, r0
148 srl c1, 16, c0
149 ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
150 ` inswl c1, 4, r2', C (c1 & 0xffff) << 32
151 ` zapnot c1, 3, r25
152 sll r25, 32, r2')
153 addq r4, a2, r4
154 C srl c2, 48, r3 C This will be 0 in practise
155 zapnot c2, 63, r1 C r1 = c2 & 0xffffffffffff
156 addq r0, r4, r0
157 addq r2, c0, r2
158 addq r0, r2, r0
159 C addq r1, r3, r1
160 addq r0, r1, r0
162 ret r31, (r26), 1
163 EPILOGUE(mpn_mod_34lsub1)
164 ASM_END()