beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / sparc64 / ultrasparct3 / aorslsh_n.asm
blob1014b1ba23a531331466b3e388ce82f692595dbf
1 dnl SPARC v9 mpn_addlsh_n and mpn_sublsh_n for T3/T4/T5.
3 dnl Contributed to the GNU project by Torbjörn Granlund.
5 dnl Copyright 2013 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C cycles/limb
36 C UltraSPARC T3: 11
37 C UltraSPARC T4: 4
39 C For sublsh_n we combine the two shifted limbs using xnor, using the identity
40 C (a xor not b) = (not (a xor b)) which equals (not (a or b)) when (a and b) =
41 C 0 as it is in our usage. This gives us the ones complement for free.
42 C Unfortunately, the same trick will not work for rsblsh_n, which will instead
43 C require a separate negation.
45 C FIXME: Add rsblsh_n to this file.
47 define(`rp', `%i0')
48 define(`up', `%i1')
49 define(`vp', `%i2')
50 define(`n', `%i3')
51 define(`cnt',`%i4')
53 define(`tnc',`%o5')
55 ifdef(`OPERATION_addlsh_n',`
56 define(`INITCY', `subcc %g0, 0, %g0')
57 define(`MERGE', `or')
58 define(`func', `mpn_addlsh_n')
60 ifdef(`OPERATION_sublsh_n',`
61 define(`INITCY', `subcc %g0, 1, %g0')
62 define(`MERGE', `xnor')
63 define(`func', `mpn_sublsh_n')
66 define(`rp0', `rp')
67 define(`rp1', `%o2')
68 define(`up0', `up')
69 define(`up1', `%o3')
70 define(`vp0', `vp')
71 define(`vp1', `%o4')
73 MULFUNC_PROLOGUE(mpn_addlsh_n mpn_sublsh_n)
74 ASM_START()
75 REGISTER(%g2,#scratch)
76 REGISTER(%g3,#scratch)
77 PROLOGUE(func)
78 save %sp, -176, %sp
79 mov 64, tnc
80 sub tnc, cnt, tnc
82 andcc n, 1, %g0
83 sllx n, 3, n
84 add n, -16, n
85 add up, n, up0
86 add vp, n, vp0
87 add rp, n, rp0
88 add up0, 8, up1
89 add vp0, 8, vp1
90 add rp0, -8, rp1
91 add rp0, -16, rp0
92 neg n, n
93 be L(evn)
94 INITCY
96 L(odd): ldx [vp0 + n], %l1
97 mov 0, %l2
98 ldx [up0 + n], %l5
99 sllx %l1, cnt, %g3
100 brgez n, L(wd1)
101 add n, 8, n
102 ldx [vp0 + n], %l0
103 b L(lo1)
104 sllx %l1, cnt, %g3
106 L(evn): ldx [vp0 + n], %l0
107 mov 0, %l3
108 ldx [up0 + n], %l4
109 ldx [vp1 + n], %l1
110 b L(lo0)
111 sllx %l0, cnt, %g1
113 L(top): addxccc(%l6, %l4, %o0)
114 ldx [vp0 + n], %l0
115 sllx %l1, cnt, %g3
116 stx %o0, [rp0 + n]
117 L(lo1): srlx %l1, tnc, %l3
118 MERGE %l2, %g3, %l7
119 ldx [up0 + n], %l4
120 addxccc(%l7, %l5, %o1)
121 ldx [vp1 + n], %l1
122 sllx %l0, cnt, %g1
123 stx %o1, [rp1 + n]
124 L(lo0): srlx %l0, tnc, %l2
125 MERGE %l3, %g1, %l6
126 ldx [up1 + n], %l5
127 brlz,pt n, L(top)
128 add n, 16, n
130 addxccc(%l6, %l4, %o0)
131 sllx %l1, cnt, %g3
132 stx %o0, [rp0 + n]
133 L(wd1): srlx %l1, tnc, %l3
134 MERGE %l2, %g3, %l7
135 addxccc(%l7, %l5, %o1)
136 stx %o1, [rp1 + n]
138 ifdef(`OPERATION_addlsh_n',
139 ` addxc( %l3, %g0, %i0)')
140 ifdef(`OPERATION_sublsh_n',
141 ` addxc( %g0, %g0, %g1)
142 add %g1, -1, %g1
143 sub %l3, %g1, %i0')
146 restore
147 EPILOGUE()