beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / lshiftc.asm
blob7cf6a83428c59b2e4dc2f673845909be6c2f1886
1 dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
3 dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C POWER3/PPC630 ?
35 C POWER4/PPC970 ?
36 C POWER5 2.25
37 C POWER6 9.5
38 C POWER7 2.15
40 C TODO
41 C * Try to reduce the number of needed live registers
42 C * Micro-optimise header code
43 C * Keep in synch with lshift.asm and rshift.asm
44 C * Could the long-scheduled std insns be less scheduled?
46 C INPUT PARAMETERS
47 define(`rp', `r3')
48 define(`up', `r4')
49 define(`n', `r5')
50 define(`cnt', `r6')
52 define(`tnc',`r0')
53 define(`u0',`r30')
54 define(`u1',`r31')
55 define(`retval',`r5')
57 ASM_START()
58 PROLOGUE(mpn_lshiftc)
59 std r31, -8(r1)
60 std r30, -16(r1)
61 subfic tnc, cnt, 64
62 sldi r7, n, 3 C byte count corresponding to n
63 add up, up, r7 C up = up + n
64 add rp, rp, r7 C rp = rp + n
65 rldicl. r30, n, 0,62 C r30 = n & 3, set cr0
66 cmpdi cr6, r30, 2
67 addi r31, n, 3 C compute count...
68 ld r10, -8(up) C load 1st limb for b00...b11
69 srd retval, r10, tnc
70 srdi r31, r31, 2 C ...for ctr
71 mtctr r31 C copy count into ctr
72 beq cr0, L(b00)
73 blt cr6, L(b01)
74 ld r11, -16(up) C load 2nd limb for b10 and b11
75 beq cr6, L(b10)
77 ALIGN(16)
78 L(b11): sld r8, r10, cnt
79 srd r9, r11, tnc
80 ld u1, -24(up)
81 addi up, up, -24
82 sld r12, r11, cnt
83 srd r7, u1, tnc
84 addi rp, rp, 16
85 bdnz L(gt3)
87 nor r11, r8, r9
88 sld r8, u1, cnt
89 nor r8, r8, r8
90 b L(cj3)
92 ALIGN(16)
93 L(gt3): ld u0, -8(up)
94 nor r11, r8, r9
95 sld r8, u1, cnt
96 srd r9, u0, tnc
97 ld u1, -16(up)
98 nor r10, r12, r7
99 b L(L11)
101 ALIGN(32)
102 L(b10): sld r12, r10, cnt
103 addi rp, rp, 24
104 srd r7, r11, tnc
105 bdnz L(gt2)
107 sld r8, r11, cnt
108 nor r10, r12, r7
109 nor r8, r8, r8
110 b L(cj2)
112 L(gt2): ld u0, -24(up)
113 sld r8, r11, cnt
114 srd r9, u0, tnc
115 ld u1, -32(up)
116 nor r10, r12, r7
117 sld r12, u0, cnt
118 srd r7, u1, tnc
119 ld u0, -40(up)
120 nor r11, r8, r9
121 addi up, up, -16
122 b L(L10)
124 ALIGN(16)
125 L(b00): ld u1, -16(up)
126 sld r12, r10, cnt
127 srd r7, u1, tnc
128 ld u0, -24(up)
129 sld r8, u1, cnt
130 srd r9, u0, tnc
131 ld u1, -32(up)
132 nor r10, r12, r7
133 sld r12, u0, cnt
134 srd r7, u1, tnc
135 addi rp, rp, 8
136 bdz L(cj4)
138 L(gt4): addi up, up, -32
139 ld u0, -8(up)
140 nor r11, r8, r9
141 b L(L00)
143 ALIGN(16)
144 L(b01): bdnz L(gt1)
145 sld r8, r10, cnt
146 nor r8, r8, r8
147 std r8, -8(rp)
148 b L(ret)
150 L(gt1): ld u0, -16(up)
151 sld r8, r10, cnt
152 srd r9, u0, tnc
153 ld u1, -24(up)
154 sld r12, u0, cnt
155 srd r7, u1, tnc
156 ld u0, -32(up)
157 nor r11, r8, r9
158 sld r8, u1, cnt
159 srd r9, u0, tnc
160 ld u1, -40(up)
161 addi up, up, -40
162 nor r10, r12, r7
163 bdz L(end)
165 ALIGN(32)
166 L(top): sld r12, u0, cnt
167 srd r7, u1, tnc
168 ld u0, -8(up)
169 std r11, -8(rp)
170 nor r11, r8, r9
171 L(L00): sld r8, u1, cnt
172 srd r9, u0, tnc
173 ld u1, -16(up)
174 std r10, -16(rp)
175 nor r10, r12, r7
176 L(L11): sld r12, u0, cnt
177 srd r7, u1, tnc
178 ld u0, -24(up)
179 std r11, -24(rp)
180 nor r11, r8, r9
181 L(L10): sld r8, u1, cnt
182 srd r9, u0, tnc
183 ld u1, -32(up)
184 addi up, up, -32
185 std r10, -32(rp)
186 addi rp, rp, -32
187 nor r10, r12, r7
188 bdnz L(top)
190 ALIGN(32)
191 L(end): sld r12, u0, cnt
192 srd r7, u1, tnc
193 std r11, -8(rp)
194 L(cj4): nor r11, r8, r9
195 sld r8, u1, cnt
196 std r10, -16(rp)
197 nor r8, r8, r8
198 L(cj3): nor r10, r12, r7
199 std r11, -24(rp)
200 L(cj2): std r10, -32(rp)
201 std r8, -40(rp)
203 L(ret): ld r31, -8(r1)
204 ld r30, -16(r1)
205 ifdef(`HAVE_ABI_mode32',
206 ` srdi r3, retval, 32
207 mr r4, retval
208 ',` mr r3, retval')
210 EPILOGUE()