beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / sparc32 / v9 / sub_n.asm
blob636c73bf35c3d925b8a04f654f537024acba8223
1 dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2 dnl store difference in a third limb vector.
4 dnl Copyright 2001 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C INPUT PARAMETERS
36 define(rp,%o0)
37 define(s1p,%o1)
38 define(s2p,%o2)
39 define(n,%o3)
40 define(cy,%g1)
42 C This code uses 64-bit operations on `o' and `g' registers. It doesn't
43 C require that `o' registers' upper 32 bits are preserved by the operating
44 C system, but if they are not, they must be zeroed. That is indeed what
45 C happens at least on Slowaris 2.5 and 2.6.
47 C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at
48 C about 10 cycles/limb from the Ecache.
50 ASM_START()
51 PROLOGUE(mpn_sub_n)
52 lduw [s1p+0],%o4
53 lduw [s2p+0],%o5
54 addcc n,-2,n
55 bl,pn %icc,L(end1)
56 lduw [s1p+4],%g2
57 lduw [s2p+4],%g3
58 be,pn %icc,L(end2)
59 mov 0,cy
61 .align 16
62 L(loop):
63 sub %o4,%o5,%g4
64 add rp,8,rp
65 lduw [s1p+8],%o4
66 fitod %f0,%f2
67 C ---
68 sub %g4,cy,%g4
69 addcc n,-1,n
70 lduw [s2p+8],%o5
71 fitod %f0,%f2
72 C ---
73 srlx %g4,63,cy
74 add s2p,8,s2p
75 stw %g4,[rp-8]
76 be,pn %icc,L(exito)+4
77 C ---
78 sub %g2,%g3,%g4
79 addcc n,-1,n
80 lduw [s1p+12],%g2
81 fitod %f0,%f2
82 C ---
83 sub %g4,cy,%g4
84 add s1p,8,s1p
85 lduw [s2p+4],%g3
86 fitod %f0,%f2
87 C ---
88 srlx %g4,63,cy
89 bne,pt %icc,L(loop)
90 stw %g4,[rp-4]
91 C ---
92 L(exite):
93 sub %o4,%o5,%g4
94 sub %g4,cy,%g4
95 srlx %g4,63,cy
96 stw %g4,[rp+0]
97 sub %g2,%g3,%g4
98 sub %g4,cy,%g4
99 stw %g4,[rp+4]
100 retl
101 srlx %g4,63,%o0
103 L(exito):
104 sub %g2,%g3,%g4
105 sub %g4,cy,%g4
106 srlx %g4,63,cy
107 stw %g4,[rp-4]
108 sub %o4,%o5,%g4
109 sub %g4,cy,%g4
110 stw %g4,[rp+0]
111 retl
112 srlx %g4,63,%o0
114 L(end1):
115 sub %o4,%o5,%g4
116 stw %g4,[rp+0]
117 retl
118 srlx %g4,63,%o0
120 L(end2):
121 sub %o4,%o5,%g4
122 srlx %g4,63,cy
123 stw %g4,[rp+0]
124 sub %g2,%g3,%g4
125 sub %g4,cy,%g4
126 stw %g4,[rp+4]
127 retl
128 srlx %g4,63,%o0
129 EPILOGUE(mpn_sub_n)