beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / ia64 / logops_n.asm
blobe4a2f61cce5158e03e565e215eb053ad17662f26
1 dnl IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
2 dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
4 dnl Contributed to the GNU project by Torbjorn Granlund.
6 dnl Copyright 2003-2005 Free Software Foundation, Inc.
8 dnl This file is part of the GNU MP Library.
9 dnl
10 dnl The GNU MP Library is free software; you can redistribute it and/or modify
11 dnl it under the terms of either:
12 dnl
13 dnl * the GNU Lesser General Public License as published by the Free
14 dnl Software Foundation; either version 3 of the License, or (at your
15 dnl option) any later version.
16 dnl
17 dnl or
18 dnl
19 dnl * the GNU General Public License as published by the Free Software
20 dnl Foundation; either version 2 of the License, or (at your option) any
21 dnl later version.
22 dnl
23 dnl or both in parallel, as here.
24 dnl
25 dnl The GNU MP Library is distributed in the hope that it will be useful, but
26 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28 dnl for more details.
29 dnl
30 dnl You should have received copies of the GNU General Public License and the
31 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
32 dnl see https://www.gnu.org/licenses/.
34 include(`../config.m4')
36 C cycles/limb
37 C Itanium: 2
38 C Itanium 2: 1
40 C TODO
41 C * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in
42 C wind-down code).
44 C INPUT PARAMETERS
45 define(`rp', `r32')
46 define(`up', `r33')
47 define(`vp', `r34')
48 define(`n', `r35')
50 ifdef(`OPERATION_and_n',
51 ` define(`func',`mpn_and_n')
52 define(`logop', `and $1 = $2, $3')
53 define(`notormov', `mov $1 = $2')')
54 ifdef(`OPERATION_andn_n',
55 ` define(`func',`mpn_andn_n')
56 define(`logop', `andcm $1 = $2, $3')
57 define(`notormov', `mov $1 = $2')')
58 ifdef(`OPERATION_nand_n',
59 ` define(`func',`mpn_nand_n')
60 define(`logop', `and $1 = $2, $3')
61 define(`notormov', `sub $1 = -1, $2')')
62 ifdef(`OPERATION_ior_n',
63 ` define(`func',`mpn_ior_n')
64 define(`logop', `or $1 = $2, $3')
65 define(`notormov', `mov $1 = $2')')
66 ifdef(`OPERATION_iorn_n',
67 ` define(`func',`mpn_iorn_n')
68 define(`logop', `andcm $1 = $3, $2')
69 define(`notormov', `sub $1 = -1, $2')')
70 ifdef(`OPERATION_nior_n',
71 ` define(`func',`mpn_nior_n')
72 define(`logop', `or $1 = $2, $3')
73 define(`notormov', `sub $1 = -1, $2')')
74 ifdef(`OPERATION_xor_n',
75 ` define(`func',`mpn_xor_n')
76 define(`logop', `xor $1 = $2, $3')
77 define(`notormov', `mov $1 = $2')')
78 ifdef(`OPERATION_xnor_n',
79 ` define(`func',`mpn_xnor_n')
80 define(`logop', `xor $1 = $2, $3')
81 define(`notormov', `sub $1 = -1, $2')')
83 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
85 ASM_START()
86 PROLOGUE(func)
87 .prologue
88 .save ar.lc, r2
89 .body
90 ifdef(`HAVE_ABI_32',
91 ` addp4 rp = 0, rp C M I
92 addp4 up = 0, up C M I
93 addp4 vp = 0, vp C M I
94 nop.m 0
95 nop.m 0
96 zxt4 n = n C I
99 {.mmi
100 ld8 r10 = [up], 8 C M
101 ld8 r11 = [vp], 8 C M
102 mov.i r2 = ar.lc C I0
104 {.mmi
105 and r14 = 3, n C M I
106 cmp.lt p15, p14 = 4, n C M I
107 shr.u n = n, 2 C I0
110 {.mmi
111 cmp.eq p6, p0 = 1, r14 C M I
112 cmp.eq p7, p0 = 2, r14 C M I
113 cmp.eq p8, p0 = 3, r14 C M I
115 {.bbb
116 (p6) br.dptk .Lb01 C B
117 (p7) br.dptk .Lb10 C B
118 (p8) br.dptk .Lb11 C B
121 .Lb00: ld8 r17 = [up], 8 C M
122 ld8 r21 = [vp], 8 C M
123 add n = -2, n C M I
125 ld8 r18 = [up], 8 C M
126 ld8 r22 = [vp], 8 C M
128 ld8 r19 = [up], 8 C M
129 ld8 r23 = [vp], 8 C M
130 (p15) br.cond.dpnt .grt4 C B
132 logop( r14, r10, r11) C M I
134 logop( r15, r17, r21) C M I
135 notormov( r8, r14) C M I
136 br .Lcj4 C B
138 .grt4: logop( r14, r10, r11) C M I
139 ld8 r16 = [up], 8 C M
140 ld8 r20 = [vp], 8 C M
142 logop( r15, r17, r21) C M I
143 ld8 r17 = [up], 8 C M
144 mov.i ar.lc = n C I0
145 notormov( r8, r14) C M I
146 ld8 r21 = [vp], 8 C M
147 br .LL00 C B
149 .Lb01: add n = -1, n C M I
150 logop( r15, r10, r11) C M I
151 (p15) br.cond.dpnt .grt1 C B
154 notormov( r9, r15) C M I
155 br .Lcj1 C B
157 .grt1: ld8 r16 = [up], 8 C M
158 ld8 r20 = [vp], 8 C M
160 ld8 r17 = [up], 8 C M
161 ld8 r21 = [vp], 8 C M
162 mov.i ar.lc = n C I0
164 ld8 r18 = [up], 8 C M
165 ld8 r22 = [vp], 8 C M
167 ld8 r19 = [up], 8 C M
168 ld8 r23 = [vp], 8 C M
169 br.cloop.dptk .grt5 C B
172 logop( r14, r16, r20) C M I
173 notormov( r9, r15) C M I
174 br .Lcj5 C B
176 .grt5: logop( r14, r16, r20) C M I
177 ld8 r16 = [up], 8 C M
178 notormov( r9, r15) C M I
179 ld8 r20 = [vp], 8 C M
180 br .LL01 C B
182 .Lb10: ld8 r19 = [up], 8 C M
183 ld8 r23 = [vp], 8 C M
184 (p15) br.cond.dpnt .grt2 C B
186 logop( r14, r10, r11) C M I
188 logop( r15, r19, r23) C M I
189 notormov( r8, r14) C M I
190 br .Lcj2 C B
192 .grt2: ld8 r16 = [up], 8 C M
193 ld8 r20 = [vp], 8 C M
194 add n = -1, n C M I
196 ld8 r17 = [up], 8 C M
197 ld8 r21 = [vp], 8 C M
198 logop( r14, r10, r11) C M I
200 ld8 r18 = [up], 8 C M
201 ld8 r22 = [vp], 8 C M
202 mov.i ar.lc = n C I0
204 logop( r15, r19, r23) C M I
205 ld8 r19 = [up], 8 C M
206 notormov( r8, r14) C M I
207 ld8 r23 = [vp], 8 C M
208 br.cloop.dptk .Loop C B
209 br .Lcj6 C B
211 .Lb11: ld8 r18 = [up], 8 C M
212 ld8 r22 = [vp], 8 C M
213 add n = -1, n C M I
215 ld8 r19 = [up], 8 C M
216 ld8 r23 = [vp], 8 C M
217 logop( r15, r10, r11) C M I
218 (p15) br.cond.dpnt .grt3 C B
221 logop( r14, r18, r22) C M I
222 notormov( r9, r15) C M I
223 br .Lcj3 C B
225 .grt3: ld8 r16 = [up], 8 C M
226 ld8 r20 = [vp], 8 C M
228 ld8 r17 = [up], 8 C M
229 ld8 r21 = [vp], 8 C M
230 mov.i ar.lc = n C I0
232 logop( r14, r18, r22) C M I
233 ld8 r18 = [up], 8 C M
234 notormov( r9, r15) C M I
235 ld8 r22 = [vp], 8 C M
236 br .LL11 C B
238 C *** MAIN LOOP START ***
239 ALIGN(32)
240 .Loop: st8 [rp] = r8, 8 C M
241 logop( r14, r16, r20) C M I
242 notormov( r9, r15) C M I
243 ld8 r16 = [up], 8 C M
244 ld8 r20 = [vp], 8 C M
245 nop.b 0
247 .LL01: st8 [rp] = r9, 8 C M
248 logop( r15, r17, r21) C M I
249 notormov( r8, r14) C M I
250 ld8 r17 = [up], 8 C M
251 ld8 r21 = [vp], 8 C M
252 nop.b 0
254 .LL00: st8 [rp] = r8, 8 C M
255 logop( r14, r18, r22) C M I
256 notormov( r9, r15) C M I
257 ld8 r18 = [up], 8 C M
258 ld8 r22 = [vp], 8 C M
259 nop.b 0
261 .LL11: st8 [rp] = r9, 8 C M
262 logop( r15, r19, r23) C M I
263 notormov( r8, r14) C M I
264 ld8 r19 = [up], 8 C M
265 ld8 r23 = [vp], 8 C M
266 br.cloop.dptk .Loop ;; C B
267 C *** MAIN LOOP END ***
269 .Lcj6: st8 [rp] = r8, 8 C M
270 logop( r14, r16, r20) C M I
271 notormov( r9, r15) C M I
273 .Lcj5: st8 [rp] = r9, 8 C M
274 logop( r15, r17, r21) C M I
275 notormov( r8, r14) C M I
277 .Lcj4: st8 [rp] = r8, 8 C M
278 logop( r14, r18, r22) C M I
279 notormov( r9, r15) C M I
281 .Lcj3: st8 [rp] = r9, 8 C M
282 logop( r15, r19, r23) C M I
283 notormov( r8, r14) C M I
285 .Lcj2: st8 [rp] = r8, 8 C M
286 notormov( r9, r15) C M I
288 .Lcj1: st8 [rp] = r9, 8 C M
289 mov.i ar.lc = r2 C I0
290 br.ret.sptk.many b0 C B
291 EPILOGUE()
292 ASM_END()