beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / alpha / ev6 / nails / aors_n.asm
blobf6586773f573596412003b3d767071604f747e3b
1 dnl Alpha ev6 nails mpn_add_n and mpn_sub_n.
3 dnl Copyright 2002, 2006 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
32 dnl Runs at 2.5 cycles/limb. It would be possible to reach 2.0 cycles/limb
33 dnl with 8-way unrolling.
35 include(`../config.m4')
37 dnl INPUT PARAMETERS
38 define(`rp',`r16')
39 define(`up',`r17')
40 define(`vp',`r18')
41 define(`n',`r19')
43 define(`rl0',`r0')
44 define(`rl1',`r1')
45 define(`rl2',`r2')
46 define(`rl3',`r3')
48 define(`ul0',`r4')
49 define(`ul1',`r5')
50 define(`ul2',`r6')
51 define(`ul3',`r7')
53 define(`vl0',`r22')
54 define(`vl1',`r23')
55 define(`vl2',`r24')
56 define(`vl3',`r25')
58 define(`numb_mask',`r21')
60 define(`NAIL_BITS',`GMP_NAIL_BITS')
61 define(`CYSH',`GMP_NUMB_BITS')
63 dnl This declaration is munged by configure
64 NAILS_SUPPORT(1-63)
66 ifdef(`OPERATION_add_n', `
67 define(`OP', addq)
68 define(`CYSH',`GMP_NUMB_BITS')
69 define(`func', mpn_add_n)')
70 ifdef(`OPERATION_sub_n', `
71 define(`OP', subq)
72 define(`CYSH',63)
73 define(`func', mpn_sub_n)')
75 MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
77 ASM_START()
78 PROLOGUE(func)
79 lda numb_mask, -1(r31)
80 srl numb_mask, NAIL_BITS, numb_mask
81 bis r31, r31, r20
83 and n, 3, r25
84 lda n, -4(n)
85 beq r25, L(ge4)
87 L(lp0): ldq ul0, 0(up)
88 lda up, 8(up)
89 ldq vl0, 0(vp)
90 lda vp, 8(vp)
91 lda rp, 8(rp)
92 lda r25, -1(r25)
93 OP ul0, vl0, rl0
94 OP rl0, r20, rl0
95 and rl0, numb_mask, r28
96 stq r28, -8(rp)
97 srl rl0, CYSH, r20
98 bne r25, L(lp0)
100 blt n, L(ret)
102 L(ge4): ldq ul0, 0(up)
103 ldq vl0, 0(vp)
104 ldq ul1, 8(up)
105 ldq vl1, 8(vp)
106 ldq ul2, 16(up)
107 ldq vl2, 16(vp)
108 ldq ul3, 24(up)
109 ldq vl3, 24(vp)
110 lda up, 32(up)
111 lda vp, 32(vp)
112 lda n, -4(n)
113 bge n, L(ge8)
115 OP ul0, vl0, rl0 C main-add 0
116 OP rl0, r20, rl0 C cy-add 0
117 OP ul1, vl1, rl1 C main-add 1
118 srl rl0, CYSH, r20 C gen cy 0
119 OP rl1, r20, rl1 C cy-add 1
120 and rl0,numb_mask, r27
121 br r31, L(cj0)
123 L(ge8): OP ul0, vl0, rl0 C main-add 0
124 ldq ul0, 0(up)
125 ldq vl0, 0(vp)
126 OP rl0, r20, rl0 C cy-add 0
127 OP ul1, vl1, rl1 C main-add 1
128 srl rl0, CYSH, r20 C gen cy 0
129 ldq ul1, 8(up)
130 ldq vl1, 8(vp)
131 OP rl1, r20, rl1 C cy-add 1
132 and rl0,numb_mask, r27
133 OP ul2, vl2, rl2 C main-add 2
134 srl rl1, CYSH, r20 C gen cy 1
135 ldq ul2, 16(up)
136 ldq vl2, 16(vp)
137 OP rl2, r20, rl2 C cy-add 2
138 and rl1,numb_mask, r28
139 stq r27, 0(rp)
140 OP ul3, vl3, rl3 C main-add 3
141 srl rl2, CYSH, r20 C gen cy 2
142 ldq ul3, 24(up)
143 ldq vl3, 24(vp)
144 OP rl3, r20, rl3 C cy-add 3
145 and rl2,numb_mask, r27
146 stq r28, 8(rp)
147 lda rp, 32(rp)
148 lda up, 32(up)
149 lda vp, 32(vp)
150 lda n, -4(n)
151 blt n, L(end)
153 ALIGN(32)
154 L(top): OP ul0, vl0, rl0 C main-add 0
155 srl rl3, CYSH, r20 C gen cy 3
156 ldq ul0, 0(up)
157 ldq vl0, 0(vp)
159 OP rl0, r20, rl0 C cy-add 0
160 and rl3,numb_mask, r28
161 stq r27, -16(rp)
162 bis r31, r31, r31
164 OP ul1, vl1, rl1 C main-add 1
165 srl rl0, CYSH, r20 C gen cy 0
166 ldq ul1, 8(up)
167 ldq vl1, 8(vp)
169 OP rl1, r20, rl1 C cy-add 1
170 and rl0,numb_mask, r27
171 stq r28, -8(rp)
172 bis r31, r31, r31
174 OP ul2, vl2, rl2 C main-add 2
175 srl rl1, CYSH, r20 C gen cy 1
176 ldq ul2, 16(up)
177 ldq vl2, 16(vp)
179 OP rl2, r20, rl2 C cy-add 2
180 and rl1,numb_mask, r28
181 stq r27, 0(rp)
182 bis r31, r31, r31
184 OP ul3, vl3, rl3 C main-add 3
185 srl rl2, CYSH, r20 C gen cy 2
186 ldq ul3, 24(up)
187 ldq vl3, 24(vp)
189 OP rl3, r20, rl3 C cy-add 3
190 and rl2,numb_mask, r27
191 stq r28, 8(rp)
192 bis r31, r31, r31
194 bis r31, r31, r31
195 lda n, -4(n)
196 lda up, 32(up)
197 lda vp, 32(vp)
199 bis r31, r31, r31
200 bis r31, r31, r31
201 lda rp, 32(rp)
202 bge n, L(top)
204 L(end): OP ul0, vl0, rl0 C main-add 0
205 srl rl3, CYSH, r20 C gen cy 3
206 OP rl0, r20, rl0 C cy-add 0
207 and rl3,numb_mask, r28
208 stq r27, -16(rp)
209 OP ul1, vl1, rl1 C main-add 1
210 srl rl0, CYSH, r20 C gen cy 0
211 OP rl1, r20, rl1 C cy-add 1
212 and rl0,numb_mask, r27
213 stq r28, -8(rp)
214 L(cj0): OP ul2, vl2, rl2 C main-add 2
215 srl rl1, CYSH, r20 C gen cy 1
216 OP rl2, r20, rl2 C cy-add 2
217 and rl1,numb_mask, r28
218 stq r27, 0(rp)
219 OP ul3, vl3, rl3 C main-add 3
220 srl rl2, CYSH, r20 C gen cy 2
221 OP rl3, r20, rl3 C cy-add 3
222 and rl2,numb_mask, r27
223 stq r28, 8(rp)
225 srl rl3, CYSH, r20 C gen cy 3
226 and rl3,numb_mask, r28
227 stq r27, 16(rp)
228 stq r28, 24(rp)
230 L(ret): and r20, 1, r0
231 ret r31, (r26), 1
232 EPILOGUE()
233 ASM_END()