1 dnl IA
-64 mpn_and_n
, mpn_andn_n
, mpn_nand_n
, mpn_ior_n
, mpn_iorn_n
,
2 dnl mpn_nior_n
, mpn_xor_n
, mpn_xnor_n
-- mpn bitwise logical operations.
4 dnl Contributed to the GNU project by Torbjorn Granlund.
6 dnl Copyright
2003-2005 Free Software Foundation
, Inc.
8 dnl
This file is part of the GNU MP Library.
10 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
11 dnl it under the terms of
either:
13 dnl
* the GNU Lesser General
Public License as published by the Free
14 dnl Software Foundation
; either version 3 of the License, or (at your
15 dnl option
) any later version.
19 dnl
* the GNU General
Public License as published by the Free Software
20 dnl Foundation
; either version 2 of the License, or (at your option) any
23 dnl
or both
in parallel
, as here.
25 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
26 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
27 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
30 dnl You should have received copies of the GNU General
Public License
and the
31 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
32 dnl see
https://www.gnu.
org/licenses
/.
34 include(`..
/config.m4
')
41 C * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in
50 ifdef(`OPERATION_and_n',
51 ` define
(`func
',`mpn_and_n')
52 define
(`logop
', `and $1 = $2, $3')
53 define
(`notormov
', `mov $1 = $2')')
54 ifdef(`OPERATION_andn_n',
55 ` define
(`func
',`mpn_andn_n')
56 define
(`logop
', `andcm $1 = $2, $3')
57 define
(`notormov
', `mov $1 = $2')')
58 ifdef(`OPERATION_nand_n',
59 ` define
(`func
',`mpn_nand_n')
60 define
(`logop
', `and $1 = $2, $3')
61 define
(`notormov
', `sub $1 = -1, $2')')
62 ifdef(`OPERATION_ior_n',
63 ` define
(`func
',`mpn_ior_n')
64 define
(`logop
', `or $1 = $2, $3')
65 define
(`notormov
', `mov $1 = $2')')
66 ifdef(`OPERATION_iorn_n',
67 ` define
(`func
',`mpn_iorn_n')
68 define
(`logop
', `andcm $1 = $3, $2')
69 define
(`notormov
', `sub $1 = -1, $2')')
70 ifdef(`OPERATION_nior_n',
71 ` define
(`func
',`mpn_nior_n')
72 define
(`logop
', `or $1 = $2, $3')
73 define
(`notormov
', `sub $1 = -1, $2')')
74 ifdef(`OPERATION_xor_n',
75 ` define
(`func
',`mpn_xor_n')
76 define
(`logop
', `xor $1 = $2, $3')
77 define
(`notormov
', `mov $1 = $2')')
78 ifdef(`OPERATION_xnor_n',
79 ` define
(`func
',`mpn_xnor_n')
80 define
(`logop
', `xor $1 = $2, $3')
81 define
(`notormov
', `sub $1 = -1, $2')')
83 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
91 ` addp4 rp
= 0, rp C M I
92 addp4 up
= 0, up C M I
93 addp4 vp
= 0, vp C M I
100 ld8 r10 = [up], 8 C M
101 ld8 r11 = [vp], 8 C M
102 mov.i r2 = ar.lc C I0
106 cmp.lt p15, p14 = 4, n C M I
111 cmp.eq p6, p0 = 1, r14 C M I
112 cmp.eq p7, p0 = 2, r14 C M I
113 cmp.eq p8, p0 = 3, r14 C M I
116 (p6) br.dptk .Lb01 C B
117 (p7) br.dptk .Lb10 C B
118 (p8) br.dptk .Lb11 C B
121 .Lb00: ld8 r17 = [up], 8 C M
122 ld8 r21 = [vp], 8 C M
125 ld8 r18 = [up], 8 C M
126 ld8 r22 = [vp], 8 C M
128 ld8 r19 = [up], 8 C M
129 ld8 r23 = [vp], 8 C M
130 (p15) br.cond.dpnt .grt4 C B
132 logop( r14, r10, r11) C M I
134 logop( r15, r17, r21) C M I
135 notormov( r8, r14) C M I
138 .grt4: logop( r14, r10, r11) C M I
139 ld8 r16 = [up], 8 C M
140 ld8 r20 = [vp], 8 C M
142 logop( r15, r17, r21) C M I
143 ld8 r17 = [up], 8 C M
145 notormov( r8, r14) C M I
146 ld8 r21 = [vp], 8 C M
149 .Lb01: add n = -1, n C M I
150 logop( r15, r10, r11) C M I
151 (p15) br.cond.dpnt .grt1 C B
154 notormov( r9, r15) C M I
157 .grt1: ld8 r16 = [up], 8 C M
158 ld8 r20 = [vp], 8 C M
160 ld8 r17 = [up], 8 C M
161 ld8 r21 = [vp], 8 C M
164 ld8 r18 = [up], 8 C M
165 ld8 r22 = [vp], 8 C M
167 ld8 r19 = [up], 8 C M
168 ld8 r23 = [vp], 8 C M
169 br.cloop.dptk .grt5 C B
172 logop( r14, r16, r20) C M I
173 notormov( r9, r15) C M I
176 .grt5: logop( r14, r16, r20) C M I
177 ld8 r16 = [up], 8 C M
178 notormov( r9, r15) C M I
179 ld8 r20 = [vp], 8 C M
182 .Lb10: ld8 r19 = [up], 8 C M
183 ld8 r23 = [vp], 8 C M
184 (p15) br.cond.dpnt .grt2 C B
186 logop( r14, r10, r11) C M I
188 logop( r15, r19, r23) C M I
189 notormov( r8, r14) C M I
192 .grt2: ld8 r16 = [up], 8 C M
193 ld8 r20 = [vp], 8 C M
196 ld8 r17 = [up], 8 C M
197 ld8 r21 = [vp], 8 C M
198 logop( r14, r10, r11) C M I
200 ld8 r18 = [up], 8 C M
201 ld8 r22 = [vp], 8 C M
204 logop( r15, r19, r23) C M I
205 ld8 r19 = [up], 8 C M
206 notormov( r8, r14) C M I
207 ld8 r23 = [vp], 8 C M
208 br.cloop.dptk .Loop C B
211 .Lb11: ld8 r18 = [up], 8 C M
212 ld8 r22 = [vp], 8 C M
215 ld8 r19 = [up], 8 C M
216 ld8 r23 = [vp], 8 C M
217 logop( r15, r10, r11) C M I
218 (p15) br.cond.dpnt .grt3 C B
221 logop( r14, r18, r22) C M I
222 notormov( r9, r15) C M I
225 .grt3: ld8 r16 = [up], 8 C M
226 ld8 r20 = [vp], 8 C M
228 ld8 r17 = [up], 8 C M
229 ld8 r21 = [vp], 8 C M
232 logop( r14, r18, r22) C M I
233 ld8 r18 = [up], 8 C M
234 notormov( r9, r15) C M I
235 ld8 r22 = [vp], 8 C M
238 C *** MAIN LOOP START ***
240 .Loop: st8 [rp] = r8, 8 C M
241 logop( r14, r16, r20) C M I
242 notormov( r9, r15) C M I
243 ld8 r16 = [up], 8 C M
244 ld8 r20 = [vp], 8 C M
247 .LL01: st8 [rp] = r9, 8 C M
248 logop( r15, r17, r21) C M I
249 notormov( r8, r14) C M I
250 ld8 r17 = [up], 8 C M
251 ld8 r21 = [vp], 8 C M
254 .LL00: st8 [rp] = r8, 8 C M
255 logop( r14, r18, r22) C M I
256 notormov( r9, r15) C M I
257 ld8 r18 = [up], 8 C M
258 ld8 r22 = [vp], 8 C M
261 .LL11: st8 [rp] = r9, 8 C M
262 logop( r15, r19, r23) C M I
263 notormov( r8, r14) C M I
264 ld8 r19 = [up], 8 C M
265 ld8 r23 = [vp], 8 C M
266 br.cloop.dptk .Loop ;; C B
267 C *** MAIN LOOP END ***
269 .Lcj6: st8 [rp] = r8, 8 C M
270 logop( r14, r16, r20) C M I
271 notormov( r9, r15) C M I
273 .Lcj5: st8 [rp] = r9, 8 C M
274 logop( r15, r17, r21) C M I
275 notormov( r8, r14) C M I
277 .Lcj4: st8 [rp] = r8, 8 C M
278 logop( r14, r18, r22) C M I
279 notormov( r9, r15) C M I
281 .Lcj3: st8 [rp] = r9, 8 C M
282 logop( r15, r19, r23) C M I
283 notormov( r8, r14) C M I
285 .Lcj2: st8 [rp] = r8, 8 C M
286 notormov( r9, r15) C M I
288 .Lcj1: st8 [rp] = r9, 8 C M
289 mov.i ar.lc = r2 C I0
290 br.ret.sptk.many b0 C B