beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / ia64 / sec_tabselect.asm
blob9b11cde8c2f9227466cf5b1d6fd45a4fcaed7349
1 dnl IA-64 mpn_sec_tabselect.
3 dnl Copyright 2011 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C Itanium: ?
35 C Itanium 2: 2.5
37 C NOTES
38 C * Using software pipelining could trivially yield 2 c/l without unrolling,
39 C or 1+epsilon with unrolling. (This code was modelled after the powerpc64
40 C code, for simplicity.)
42 C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
43 define(`rp', `r32')
44 define(`tp', `r33')
45 define(`n', `r34')
46 define(`nents', `r35')
47 define(`which', `r36')
49 define(`mask', `r8')
51 define(`rp1', `r32')
52 define(`tp1', `r33')
53 define(`rp2', `r14')
54 define(`tp2', `r15')
56 ASM_START()
57 PROLOGUE(mpn_sec_tabselect)
58 .prologue
59 .save ar.lc, r2
60 .body
61 ifdef(`HAVE_ABI_32',`
62 {.mmi; addp4 rp = 0, rp C M I
63 addp4 tp = 0, tp C M I
64 zxt4 n = n C I
65 }{.mii; nop 0
66 zxt4 nents = nents C I
67 zxt4 which = which C I
69 }')
70 {.mmi; add rp2 = 8, rp1
71 add tp2 = 8, tp1
72 add r6 = -2, n
74 }{.mmi; cmp.eq p10, p0 = 1, n
75 and r9 = 1, n C set cr0 for use in inner loop
76 shr.u r6 = r6, 1 C inner loop count
78 }{.mmi; cmp.eq p8, p0 = 0, r9
79 sub which = nents, which
80 shl n = n, 3
83 L(outer):
84 {.mmi; cmp.eq p6, p7 = which, nents C are we at the selected table entry?
85 nop 0
86 mov ar.lc = r6 C I0
88 }{.mmb;
89 (p6) mov mask = -1
90 (p7) mov mask = 0
91 (p8) br.dptk L(top) C branch to loop entry if n even
93 }{.mmi; ld8 r16 = [tp1], 8
94 add tp2 = 8, tp2
95 nop 0
97 }{.mmi; ld8 r18 = [rp1]
98 and r16 = r16, mask
99 nop 0
101 }{.mmi; andcm r18 = r18, mask
103 or r16 = r16, r18
104 nop 0
106 }{.mmb; st8 [rp1] = r16, 8
107 add rp2 = 8, rp2
108 (p10) br.dpnt L(end)
110 ALIGN(32)
111 L(top):
112 {.mmi; ld8 r16 = [tp1], 16
113 ld8 r17 = [tp2], 16
114 nop 0
116 }{.mmi; ld8 r18 = [rp1]
117 and r16 = r16, mask
118 nop 0
119 }{.mmi; ld8 r19 = [rp2]
120 and r17 = r17, mask
121 nop 0
123 }{.mmi; andcm r18 = r18, mask
124 andcm r19 = r19, mask
125 nop 0
127 }{.mmi; or r16 = r16, r18
128 or r17 = r17, r19
129 nop 0
131 }{.mmb; st8 [rp1] = r16, 16
132 st8 [rp2] = r17, 16
133 br.cloop.dptk L(top)
136 L(end):
137 {.mmi; sub rp1 = rp1, n C move rp back to beginning
138 sub rp2 = rp2, n C move rp back to beginning
139 cmp.ne p9, p0 = 1, nents
140 }{.mmb; add nents = -1, nents
141 nop 0
142 (p9) br.dptk L(outer)
144 }{.mib; nop 0
145 nop 0
146 br.ret.sptk.many b0
148 EPILOGUE()