beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / s390_32 / esame / aors_n.asm
blob98b0dbc7b08dfb345488e24e775d050687751d62
1 dnl S/390-32 mpn_add_n and mpn_sub_n.
3 dnl Copyright 2011 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C z900 ?
35 C z990 2.75-3 (fast for even n, slow for odd n)
36 C z9 ?
37 C z10 ?
38 C z196 ?
40 C TODO
41 C * Optimise for small n
42 C * Use r0 and save/restore one less register
43 C * Using logops_n's v1 inner loop operand order make the loop about 20%
44 C faster, at the expense of highly alignment-dependent performance.
46 C INPUT PARAMETERS
47 define(`rp', `%r2')
48 define(`up', `%r3')
49 define(`vp', `%r4')
50 define(`n', `%r5')
52 ifdef(`OPERATION_add_n', `
53 define(ADSB, al)
54 define(ADSBCR, alcr)
55 define(ADSBC, alc)
56 define(RETVAL,`dnl
57 lhi %r2, 0
58 alcr %r2, %r2')
59 define(func, mpn_add_n)
60 define(func_nc, mpn_add_nc)')
61 ifdef(`OPERATION_sub_n', `
62 define(ADSB, sl)
63 define(ADSBCR, slbr)
64 define(ADSBC, slb)
65 define(RETVAL,`dnl
66 slbr %r2, %r2
67 lcr %r2, %r2')
68 define(func, mpn_sub_n)
69 define(func_nc, mpn_sub_nc)')
71 MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
73 ASM_START()
74 PROLOGUE(func)
75 stm %r6, %r8, 24(%r15)
77 ahi n, 3
78 lhi %r7, 3
79 lr %r1, n
80 srl %r1, 2
81 nr %r7, n C n mod 4
82 je L(b1)
83 chi %r7, 2
84 jl L(b2)
85 jne L(b0)
87 L(b3): lm %r5, %r7, 0(up)
88 la up, 12(up)
89 ADSB %r5, 0(vp)
90 ADSBC %r6, 4(vp)
91 ADSBC %r7, 8(vp)
92 la vp, 12(vp)
93 stm %r5, %r7, 0(rp)
94 la rp, 12(rp)
95 brct %r1, L(top)
96 j L(end)
98 L(b0): lm %r5, %r8, 0(up) C This redundant insns is no mistake,
99 la up, 16(up) C it is needed to make main loop run
100 ADSB %r5, 0(vp) C fast for n = 0 (mod 4).
101 ADSBC %r6, 4(vp)
102 j L(m0)
104 L(b1): l %r5, 0(up)
105 la up, 4(up)
106 ADSB %r5, 0(vp)
107 la vp, 4(vp)
108 st %r5, 0(rp)
109 la rp, 4(rp)
110 brct %r1, L(top)
111 j L(end)
113 L(b2): lm %r5, %r6, 0(up)
114 la up, 8(up)
115 ADSB %r5, 0(vp)
116 ADSBC %r6, 4(vp)
117 la vp, 8(vp)
118 stm %r5, %r6, 0(rp)
119 la rp, 8(rp)
120 brct %r1, L(top)
121 j L(end)
123 L(top): lm %r5, %r8, 0(up)
124 la up, 16(up)
125 ADSBC %r5, 0(vp)
126 ADSBC %r6, 4(vp)
127 L(m0): ADSBC %r7, 8(vp)
128 ADSBC %r8, 12(vp)
129 la vp, 16(vp)
130 stm %r5, %r8, 0(rp)
131 la rp, 16(rp)
132 brct %r1, L(top)
134 L(end): RETVAL
135 lm %r6, %r8, 24(%r15)
136 br %r14
137 EPILOGUE()