beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / pa64 / aorslsh1_n.asm
blob2a55ddea30ffda195791f8de9e5abcd148765020
1 dnl PA64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
3 dnl Copyright 2003 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C 8000,8200: 2
35 C 8500,8600,8700: 1.75
37 C TODO
38 C * Write special feed-in code for each (n mod 8). (See the ia64 code.)
39 C * Try to make this run at closer to 1.5 c/l.
40 C * Set up register aliases (define(`u0',`%r19')).
41 C * Explicitly align loop.
43 dnl INPUT PARAMETERS
44 define(`rp',`%r26')
45 define(`up',`%r25')
46 define(`vp',`%r24')
47 define(`n',`%r23')
49 ifdef(`OPERATION_addlsh1_n',`
50 define(ADCSBC, `add,dc')
51 define(INITC, `ldi 0,')
52 define(func, mpn_addlsh1_n)
54 ifdef(`OPERATION_sublsh1_n',`
55 define(ADCSBC, `sub,db')
56 define(INITC, `ldi 1,')
57 define(func, mpn_sublsh1_n)
60 MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
62 ifdef(`HAVE_ABI_2_0w',`
63 define(LEVEL, `.level 2.0w')
64 define(RETREG, `%r28')
65 define(CLRRET1, `dnl')
67 ifdef(`HAVE_ABI_2_0n',`
68 define(LEVEL, `.level 2.0')
69 define(RETREG, `%r29')
70 define(CLRRET1, `ldi 0, %r28')
73 LEVEL
74 PROLOGUE(func)
75 std,ma %r3, 0x100(%r30) C save reg
77 INITC %r1 C init saved cy
79 C Primitive code for the first (n mod 8) limbs:
80 extrd,u n, 63, 3, %r22 C count for loop0
81 comib,= 0, %r22, L(unrolled) C skip loop0?
82 copy %r0, %r28
83 LDEF(loop0)
84 ldd 0(vp), %r21
85 ldo 8(vp), vp
86 ldd 0(up), %r19
87 ldo 8(up), up
88 shrpd %r21, %r28, 63, %r31
89 addi -1, %r1, %r0 C restore cy
90 ADCSBC %r19, %r31, %r29
91 std %r29, 0(rp)
92 add,dc %r0, %r0, %r1 C save cy
93 copy %r21, %r28
94 addib,> -1, %r22, L(loop0)
95 ldo 8(rp), rp
97 addib,>= -8, n, L(unrolled)
98 addi -1, %r1, %r0 C restore cy
100 shrpd %r0, %r28, 63, %r28
101 ADCSBC %r0, %r28, RETREG
102 ifdef(`OPERATION_sublsh1_n',
103 ` sub %r0, RETREG, RETREG')
104 CLRRET1
106 bve (%r2)
107 ldd,mb -0x100(%r30), %r3
110 LDEF(unrolled)
111 std %r4, -0xf8(%r30) C save reg
112 ldd 0(vp), %r4
113 std %r5, -0xf0(%r30) C save reg
114 ldd 8(vp), %r5
115 std %r6, -0xe8(%r30) C save reg
116 ldd 16(vp), %r6
117 std %r7, -0xe0(%r30) C save reg
119 ldd 24(vp), %r7
120 shrpd %r4, %r28, 63, %r31
121 std %r8, -0xd8(%r30) C save reg
122 ldd 32(vp), %r8
123 shrpd %r5, %r4, 63, %r4
124 std %r9, -0xd0(%r30) C save reg
125 ldd 40(vp), %r9
126 shrpd %r6, %r5, 63, %r5
127 ldd 48(vp), %r3
128 shrpd %r7, %r6, 63, %r6
129 ldd 56(vp), %r28
130 shrpd %r8, %r7, 63, %r7
131 ldd 0(up), %r19
132 shrpd %r9, %r8, 63, %r8
133 ldd 8(up), %r20
134 shrpd %r3, %r9, 63, %r9
135 ldd 16(up), %r21
136 shrpd %r28, %r3, 63, %r3
137 ldd 24(up), %r22
139 nop C alignment FIXME
140 addib,<= -8, n, L(end)
141 addi -1, %r1, %r0 C restore cy
142 LDEF(loop)
143 ADCSBC %r19, %r31, %r29
144 ldd 32(up), %r19
145 std %r29, 0(rp)
146 ADCSBC %r20, %r4, %r29
147 ldd 40(up), %r20
148 std %r29, 8(rp)
149 ADCSBC %r21, %r5, %r29
150 ldd 48(up), %r21
151 std %r29, 16(rp)
152 ADCSBC %r22, %r6, %r29
153 ldd 56(up), %r22
154 std %r29, 24(rp)
155 ADCSBC %r19, %r7, %r29
156 ldd 64(vp), %r4
157 std %r29, 32(rp)
158 ADCSBC %r20, %r8, %r29
159 ldd 72(vp), %r5
160 std %r29, 40(rp)
161 ADCSBC %r21, %r9, %r29
162 ldd 80(vp), %r6
163 std %r29, 48(rp)
164 ADCSBC %r22, %r3, %r29
165 std %r29, 56(rp)
167 add,dc %r0, %r0, %r1 C save cy
169 ldd 88(vp), %r7
170 shrpd %r4, %r28, 63, %r31
171 ldd 96(vp), %r8
172 shrpd %r5, %r4, 63, %r4
173 ldd 104(vp), %r9
174 shrpd %r6, %r5, 63, %r5
175 ldd 112(vp), %r3
176 shrpd %r7, %r6, 63, %r6
177 ldd 120(vp), %r28
178 shrpd %r8, %r7, 63, %r7
179 ldd 64(up), %r19
180 shrpd %r9, %r8, 63, %r8
181 ldd 72(up), %r20
182 shrpd %r3, %r9, 63, %r9
183 ldd 80(up), %r21
184 shrpd %r28, %r3, 63, %r3
185 ldd 88(up), %r22
187 ldo 64(vp), vp
188 ldo 64(rp), rp
189 ldo 64(up), up
190 addib,> -8, n, L(loop)
191 addi -1, %r1, %r0 C restore cy
192 LDEF(end)
193 ADCSBC %r19, %r31, %r29
194 ldd 32(up), %r19
195 std %r29, 0(rp)
196 ADCSBC %r20, %r4, %r29
197 ldd 40(up), %r20
198 std %r29, 8(rp)
199 ADCSBC %r21, %r5, %r29
200 ldd 48(up), %r21
201 std %r29, 16(rp)
202 ADCSBC %r22, %r6, %r29
203 ldd 56(up), %r22
204 std %r29, 24(rp)
205 ADCSBC %r19, %r7, %r29
206 ldd -0xf8(%r30), %r4 C restore reg
207 std %r29, 32(rp)
208 ADCSBC %r20, %r8, %r29
209 ldd -0xf0(%r30), %r5 C restore reg
210 std %r29, 40(rp)
211 ADCSBC %r21, %r9, %r29
212 ldd -0xe8(%r30), %r6 C restore reg
213 std %r29, 48(rp)
214 ADCSBC %r22, %r3, %r29
215 ldd -0xe0(%r30), %r7 C restore reg
216 std %r29, 56(rp)
218 shrpd %r0, %r28, 63, %r28
219 ldd -0xd8(%r30), %r8 C restore reg
220 ADCSBC %r0, %r28, RETREG
221 ifdef(`OPERATION_sublsh1_n',
222 ` sub %r0, RETREG, RETREG')
223 CLRRET1
225 ldd -0xd0(%r30), %r9 C restore reg
226 bve (%r2)
227 ldd,mb -0x100(%r30), %r3 C restore reg
228 EPILOGUE()