beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / sparc32 / add_n.asm
blob8549195d92c2b9fd345db028b91c711b1c4814d1
1 dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
2 dnl sum in a third limb vector.
4 dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C INPUT PARAMETERS
36 define(res_ptr,%o0)
37 define(s1_ptr,%o1)
38 define(s2_ptr,%o2)
39 define(n,%o3)
41 ASM_START()
42 PROLOGUE(mpn_add_n)
43 xor s2_ptr,res_ptr,%g1
44 andcc %g1,4,%g0
45 bne L(1) C branch if alignment differs
46 nop
47 C ** V1a **
48 L(0): andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
49 be L(v1) C if no, branch
50 nop
51 C Add least significant limb separately to align res_ptr and s2_ptr
52 ld [s1_ptr],%g4
53 add s1_ptr,4,s1_ptr
54 ld [s2_ptr],%g2
55 add s2_ptr,4,s2_ptr
56 add n,-1,n
57 addcc %g4,%g2,%o4
58 st %o4,[res_ptr]
59 add res_ptr,4,res_ptr
60 L(v1): addx %g0,%g0,%o4 C save cy in register
61 cmp n,2 C if n < 2 ...
62 bl L(end2) C ... branch to tail code
63 subcc %g0,%o4,%g0 C restore cy
65 ld [s1_ptr+0],%g4
66 addcc n,-10,n
67 ld [s1_ptr+4],%g1
68 ldd [s2_ptr+0],%g2
69 blt L(fin1)
70 subcc %g0,%o4,%g0 C restore cy
71 C Add blocks of 8 limbs until less than 8 limbs remain
72 L(loop1):
73 addxcc %g4,%g2,%o4
74 ld [s1_ptr+8],%g4
75 addxcc %g1,%g3,%o5
76 ld [s1_ptr+12],%g1
77 ldd [s2_ptr+8],%g2
78 std %o4,[res_ptr+0]
79 addxcc %g4,%g2,%o4
80 ld [s1_ptr+16],%g4
81 addxcc %g1,%g3,%o5
82 ld [s1_ptr+20],%g1
83 ldd [s2_ptr+16],%g2
84 std %o4,[res_ptr+8]
85 addxcc %g4,%g2,%o4
86 ld [s1_ptr+24],%g4
87 addxcc %g1,%g3,%o5
88 ld [s1_ptr+28],%g1
89 ldd [s2_ptr+24],%g2
90 std %o4,[res_ptr+16]
91 addxcc %g4,%g2,%o4
92 ld [s1_ptr+32],%g4
93 addxcc %g1,%g3,%o5
94 ld [s1_ptr+36],%g1
95 ldd [s2_ptr+32],%g2
96 std %o4,[res_ptr+24]
97 addx %g0,%g0,%o4 C save cy in register
98 addcc n,-8,n
99 add s1_ptr,32,s1_ptr
100 add s2_ptr,32,s2_ptr
101 add res_ptr,32,res_ptr
102 bge L(loop1)
103 subcc %g0,%o4,%g0 C restore cy
105 L(fin1):
106 addcc n,8-2,n
107 blt L(end1)
108 subcc %g0,%o4,%g0 C restore cy
109 C Add blocks of 2 limbs until less than 2 limbs remain
110 L(loope1):
111 addxcc %g4,%g2,%o4
112 ld [s1_ptr+8],%g4
113 addxcc %g1,%g3,%o5
114 ld [s1_ptr+12],%g1
115 ldd [s2_ptr+8],%g2
116 std %o4,[res_ptr+0]
117 addx %g0,%g0,%o4 C save cy in register
118 addcc n,-2,n
119 add s1_ptr,8,s1_ptr
120 add s2_ptr,8,s2_ptr
121 add res_ptr,8,res_ptr
122 bge L(loope1)
123 subcc %g0,%o4,%g0 C restore cy
124 L(end1):
125 addxcc %g4,%g2,%o4
126 addxcc %g1,%g3,%o5
127 std %o4,[res_ptr+0]
128 addx %g0,%g0,%o4 C save cy in register
130 andcc n,1,%g0
131 be L(ret1)
132 subcc %g0,%o4,%g0 C restore cy
133 C Add last limb
134 ld [s1_ptr+8],%g4
135 ld [s2_ptr+8],%g2
136 addxcc %g4,%g2,%o4
137 st %o4,[res_ptr+8]
139 L(ret1):
140 retl
141 addx %g0,%g0,%o0 C return carry-out from most sign. limb
143 L(1): xor s1_ptr,res_ptr,%g1
144 andcc %g1,4,%g0
145 bne L(2)
147 C ** V1b **
148 mov s2_ptr,%g1
149 mov s1_ptr,s2_ptr
150 b L(0)
151 mov %g1,s1_ptr
153 C ** V2 **
154 C If we come here, the alignment of s1_ptr and res_ptr as well as the
155 C alignment of s2_ptr and res_ptr differ. Since there are only two ways
156 C things can be aligned (that we care about) we now know that the alignment
157 C of s1_ptr and s2_ptr are the same.
159 L(2): cmp n,1
160 be L(jone)
162 andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0
163 be L(v2) C if no, branch
165 C Add least significant limb separately to align s1_ptr and s2_ptr
166 ld [s1_ptr],%g4
167 add s1_ptr,4,s1_ptr
168 ld [s2_ptr],%g2
169 add s2_ptr,4,s2_ptr
170 add n,-1,n
171 addcc %g4,%g2,%o4
172 st %o4,[res_ptr]
173 add res_ptr,4,res_ptr
175 L(v2): addx %g0,%g0,%o4 C save cy in register
176 addcc n,-8,n
177 blt L(fin2)
178 subcc %g0,%o4,%g0 C restore cy
179 C Add blocks of 8 limbs until less than 8 limbs remain
180 L(loop2):
181 ldd [s1_ptr+0],%g2
182 ldd [s2_ptr+0],%o4
183 addxcc %g2,%o4,%g2
184 st %g2,[res_ptr+0]
185 addxcc %g3,%o5,%g3
186 st %g3,[res_ptr+4]
187 ldd [s1_ptr+8],%g2
188 ldd [s2_ptr+8],%o4
189 addxcc %g2,%o4,%g2
190 st %g2,[res_ptr+8]
191 addxcc %g3,%o5,%g3
192 st %g3,[res_ptr+12]
193 ldd [s1_ptr+16],%g2
194 ldd [s2_ptr+16],%o4
195 addxcc %g2,%o4,%g2
196 st %g2,[res_ptr+16]
197 addxcc %g3,%o5,%g3
198 st %g3,[res_ptr+20]
199 ldd [s1_ptr+24],%g2
200 ldd [s2_ptr+24],%o4
201 addxcc %g2,%o4,%g2
202 st %g2,[res_ptr+24]
203 addxcc %g3,%o5,%g3
204 st %g3,[res_ptr+28]
205 addx %g0,%g0,%o4 C save cy in register
206 addcc n,-8,n
207 add s1_ptr,32,s1_ptr
208 add s2_ptr,32,s2_ptr
209 add res_ptr,32,res_ptr
210 bge L(loop2)
211 subcc %g0,%o4,%g0 C restore cy
213 L(fin2):
214 addcc n,8-2,n
215 blt L(end2)
216 subcc %g0,%o4,%g0 C restore cy
217 L(loope2):
218 ldd [s1_ptr+0],%g2
219 ldd [s2_ptr+0],%o4
220 addxcc %g2,%o4,%g2
221 st %g2,[res_ptr+0]
222 addxcc %g3,%o5,%g3
223 st %g3,[res_ptr+4]
224 addx %g0,%g0,%o4 C save cy in register
225 addcc n,-2,n
226 add s1_ptr,8,s1_ptr
227 add s2_ptr,8,s2_ptr
228 add res_ptr,8,res_ptr
229 bge L(loope2)
230 subcc %g0,%o4,%g0 C restore cy
231 L(end2):
232 andcc n,1,%g0
233 be L(ret2)
234 subcc %g0,%o4,%g0 C restore cy
235 C Add last limb
236 L(jone):
237 ld [s1_ptr],%g4
238 ld [s2_ptr],%g2
239 addxcc %g4,%g2,%o4
240 st %o4,[res_ptr]
242 L(ret2):
243 retl
244 addx %g0,%g0,%o0 C return carry-out from most sign. limb
245 EPILOGUE(mpn_add_n)