beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / sparc64 / ultrasparc1234 / sub_n.asm
blob9fb7f70747572280c19a09fc6322be0b3c26157f
1 dnl SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2 dnl store difference in a third limb vector.
4 dnl Copyright 2001-2003, 2011 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
32 include(`../config.m4')
34 C cycles/limb
35 C UltraSPARC 1&2: 4
36 C UltraSPARC 3: 4.5
38 C Compute carry-out from the most significant bits of u,v, and r, where
39 C r=u-v-carry_in, using logic operations.
41 C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn
42 C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
43 C Therefore, it seems futile to try to optimize this any further...
45 C INPUT PARAMETERS
46 define(`rp',`%i0')
47 define(`up',`%i1')
48 define(`vp',`%i2')
49 define(`n',`%i3')
51 define(`u0',`%l0')
52 define(`u1',`%l2')
53 define(`u2',`%l4')
54 define(`u3',`%l6')
55 define(`v0',`%l1')
56 define(`v1',`%l3')
57 define(`v2',`%l5')
58 define(`v3',`%l7')
60 define(`cy',`%i4')
62 define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
63 define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
65 ASM_START()
66 REGISTER(%g2,#scratch)
67 REGISTER(%g3,#scratch)
68 PROLOGUE(mpn_sub_nc)
69 save %sp,-160,%sp
71 fitod %f0,%f0 C make sure f0 contains small, quiet number
72 subcc n,4,%g0
73 bl,pn %xcc,.Loop0
74 nop
75 b,a L(com)
76 EPILOGUE()
78 PROLOGUE(mpn_sub_n)
79 save %sp,-160,%sp
81 fitod %f0,%f0 C make sure f0 contains small, quiet number
82 subcc n,4,%g0
83 bl,pn %xcc,.Loop0
84 mov 0,cy
85 L(com):
86 ldx [up+0],u0
87 ldx [vp+0],v0
88 add up,32,up
89 ldx [up-24],u1
90 ldx [vp+8],v1
91 add vp,32,vp
92 ldx [up-16],u2
93 ldx [vp-16],v2
94 ldx [up-8],u3
95 ldx [vp-8],v3
96 subcc n,8,n
97 sub u0,v0,%g1 C main sub
98 sub %g1,cy,%g5 C carry sub
99 orn u0,v0,%g2
100 bl,pn %xcc,.Lend4567
101 fanop
102 b,a .Loop
104 .align 16
105 C START MAIN LOOP
106 .Loop: orn %g5,%g2,%g2
107 andn u0,v0,%g3
108 ldx [up+0],u0
109 fanop
110 C --
111 andn %g2,%g3,%g2
112 ldx [vp+0],v0
113 add up,32,up
114 fanop
115 C --
116 srlx %g2,63,cy
117 sub u1,v1,%g1
118 stx %g5,[rp+0]
119 fanop
120 C --
121 sub %g1,cy,%g5
122 orn u1,v1,%g2
123 fmnop
124 fanop
125 C --
126 orn %g5,%g2,%g2
127 andn u1,v1,%g3
128 ldx [up-24],u1
129 fanop
130 C --
131 andn %g2,%g3,%g2
132 ldx [vp+8],v1
133 add vp,32,vp
134 fanop
135 C --
136 srlx %g2,63,cy
137 sub u2,v2,%g1
138 stx %g5,[rp+8]
139 fanop
140 C --
141 sub %g1,cy,%g5
142 orn u2,v2,%g2
143 fmnop
144 fanop
145 C --
146 orn %g5,%g2,%g2
147 andn u2,v2,%g3
148 ldx [up-16],u2
149 fanop
150 C --
151 andn %g2,%g3,%g2
152 ldx [vp-16],v2
153 add rp,32,rp
154 fanop
155 C --
156 srlx %g2,63,cy
157 sub u3,v3,%g1
158 stx %g5,[rp-16]
159 fanop
160 C --
161 sub %g1,cy,%g5
162 orn u3,v3,%g2
163 fmnop
164 fanop
165 C --
166 orn %g5,%g2,%g2
167 andn u3,v3,%g3
168 ldx [up-8],u3
169 fanop
170 C --
171 andn %g2,%g3,%g2
172 subcc n,4,n
173 ldx [vp-8],v3
174 fanop
175 C --
176 srlx %g2,63,cy
177 sub u0,v0,%g1
178 stx %g5,[rp-8]
179 fanop
180 C --
181 sub %g1,cy,%g5
182 orn u0,v0,%g2
183 bge,pt %xcc,.Loop
184 fanop
185 C END MAIN LOOP
186 .Lend4567:
187 orn %g5,%g2,%g2
188 andn u0,v0,%g3
189 andn %g2,%g3,%g2
190 srlx %g2,63,cy
191 sub u1,v1,%g1
192 stx %g5,[rp+0]
193 sub %g1,cy,%g5
194 orn u1,v1,%g2
195 orn %g5,%g2,%g2
196 andn u1,v1,%g3
197 andn %g2,%g3,%g2
198 srlx %g2,63,cy
199 sub u2,v2,%g1
200 stx %g5,[rp+8]
201 sub %g1,cy,%g5
202 orn u2,v2,%g2
203 orn %g5,%g2,%g2
204 andn u2,v2,%g3
205 andn %g2,%g3,%g2
206 add rp,32,rp
207 srlx %g2,63,cy
208 sub u3,v3,%g1
209 stx %g5,[rp-16]
210 sub %g1,cy,%g5
211 orn u3,v3,%g2
212 orn %g5,%g2,%g2
213 andn u3,v3,%g3
214 andn %g2,%g3,%g2
215 srlx %g2,63,cy
216 stx %g5,[rp-8]
218 addcc n,4,n
219 bz,pn %xcc,.Lret
220 fanop
222 .Loop0: ldx [up],u0
223 add up,8,up
224 ldx [vp],v0
225 add vp,8,vp
226 add rp,8,rp
227 subcc n,1,n
228 sub u0,v0,%g1
229 orn u0,v0,%g2
230 sub %g1,cy,%g5
231 andn u0,v0,%g3
232 orn %g5,%g2,%g2
233 stx %g5,[rp-8]
234 andn %g2,%g3,%g2
235 bnz,pt %xcc,.Loop0
236 srlx %g2,63,cy
238 .Lret: mov cy,%i0
240 restore
241 EPILOGUE(mpn_sub_n)