beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / s390_32 / copyd.asm
blobff252bc1a6e1ae2acb21f606e22b20b500e94040
1 dnl S/390-32 mpn_copyd
3 dnl Copyright 2011 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
32 include(`../config.m4')
34 C cycles/limb
35 C cycles/limb
36 C z900 1.65
37 C z990 1.125
38 C z9 ?
39 C z10 ?
40 C z196 ?
42 C FIXME:
43 C * Avoid saving/restoring callee-saves registers for n < 3. This could be
44 C done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
45 C We could then use r3...r10 in main loop.
47 C INPUT PARAMETERS
48 define(`rp_param', `%r2')
49 define(`up_param', `%r3')
50 define(`n', `%r4')
52 define(`rp', `%r8')
53 define(`up', `%r9')
55 ASM_START()
56 PROLOGUE(mpn_copyd)
57 stm %r6, %r11, 24(%r15)
59 lr %r1, n
60 sll %r1, 2
61 la %r10, 8(n)
62 ahi %r1, -32
63 srl %r10, 3
64 lhi %r11, -32
66 la rp, 0(%r1,rp_param) C FIXME use lay on z990 and later
67 la up, 0(%r1,up_param) C FIXME use lay on z990 and later
69 lhi %r7, 7
70 nr %r7, n C n mod 8
71 chi %r7, 2
72 jh L(b34567)
73 chi %r7, 1
74 je L(b1)
75 jh L(b2)
77 L(b0): brct %r10, L(top)
78 j L(end)
80 L(b1): l %r0, 28(up)
81 ahi up, -4
82 st %r0, 28(rp)
83 ahi rp, -4
84 brct %r10, L(top)
85 j L(end)
87 L(b2): lm %r0, %r1, 24(up)
88 ahi up, -8
89 stm %r0, %r1, 24(rp)
90 ahi rp, -8
91 brct %r10, L(top)
92 j L(end)
94 L(b34567):
95 chi %r7, 4
96 jl L(b3)
97 je L(b4)
98 chi %r7, 6
99 je L(b6)
100 jh L(b7)
102 L(b5): lm %r0, %r4, 12(up)
103 ahi up, -20
104 stm %r0, %r4, 12(rp)
105 ahi rp, -20
106 brct %r10, L(top)
107 j L(end)
109 L(b3): lm %r0, %r2, 20(up)
110 ahi up, -12
111 stm %r0, %r2, 20(rp)
112 ahi rp, -12
113 brct %r10, L(top)
114 j L(end)
116 L(b4): lm %r0, %r3, 16(up)
117 ahi up, -16
118 stm %r0, %r3, 16(rp)
119 ahi rp, -16
120 brct %r10, L(top)
121 j L(end)
123 L(b6): lm %r0, %r5, 8(up)
124 ahi up, -24
125 stm %r0, %r5, 8(rp)
126 ahi rp, -24
127 brct %r10, L(top)
128 j L(end)
130 L(b7): lm %r0, %r6, 4(up)
131 ahi up, -28
132 stm %r0, %r6, 4(rp)
133 ahi rp, -28
134 brct %r10, L(top)
135 j L(end)
137 L(top): lm %r0, %r7, 0(up)
138 la up, 0(%r11,up)
139 stm %r0, %r7, 0(rp)
140 la rp, 0(%r11,rp)
141 brct %r10, L(top)
143 L(end): lm %r6, %r11, 24(%r15)
144 br %r14
145 EPILOGUE()