beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / alpha / lshift.asm
blobc62a856aea3335bd8d67de9fc3f27dbc2a4cd994
1 dnl Alpha mpn_lshift -- Shift a number left.
3 dnl Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C EV4: ?
35 C EV5: 3.25
36 C EV6: 1.75
38 C INPUT PARAMETERS
39 C rp r16
40 C up r17
41 C n r18
42 C cnt r19
45 ASM_START()
46 PROLOGUE(mpn_lshift)
47 s8addq r18,r17,r17 C make r17 point at end of s1
48 ldq r4,-8(r17) C load first limb
49 subq r31,r19,r20
50 s8addq r18,r16,r16 C make r16 point at end of RES
51 subq r18,1,r18
52 and r18,4-1,r28 C number of limbs in first loop
53 srl r4,r20,r0 C compute function result
55 beq r28,L(L0)
56 subq r18,r28,r18
58 ALIGN(8)
59 L(top0):
60 ldq r3,-16(r17)
61 subq r16,8,r16
62 sll r4,r19,r5
63 subq r17,8,r17
64 subq r28,1,r28
65 srl r3,r20,r6
66 bis r3,r3,r4
67 bis r5,r6,r8
68 stq r8,0(r16)
69 bne r28,L(top0)
71 L(L0): sll r4,r19,r24
72 beq r18,L(end)
73 C warm up phase 1
74 ldq r1,-16(r17)
75 subq r18,4,r18
76 ldq r2,-24(r17)
77 ldq r3,-32(r17)
78 ldq r4,-40(r17)
79 C warm up phase 2
80 srl r1,r20,r7
81 sll r1,r19,r21
82 srl r2,r20,r8
83 beq r18,L(end1)
84 ldq r1,-48(r17)
85 sll r2,r19,r22
86 ldq r2,-56(r17)
87 srl r3,r20,r5
88 bis r7,r24,r7
89 sll r3,r19,r23
90 bis r8,r21,r8
91 srl r4,r20,r6
92 ldq r3,-64(r17)
93 sll r4,r19,r24
94 ldq r4,-72(r17)
95 subq r18,4,r18
96 beq r18,L(end2)
97 ALIGN(16)
98 C main loop
99 L(top): stq r7,-8(r16)
100 bis r5,r22,r5
101 stq r8,-16(r16)
102 bis r6,r23,r6
104 srl r1,r20,r7
105 subq r18,4,r18
106 sll r1,r19,r21
107 unop C ldq r31,-96(r17)
109 srl r2,r20,r8
110 ldq r1,-80(r17)
111 sll r2,r19,r22
112 ldq r2,-88(r17)
114 stq r5,-24(r16)
115 bis r7,r24,r7
116 stq r6,-32(r16)
117 bis r8,r21,r8
119 srl r3,r20,r5
120 unop C ldq r31,-96(r17)
121 sll r3,r19,r23
122 subq r16,32,r16
124 srl r4,r20,r6
125 ldq r3,-96(r17)
126 sll r4,r19,r24
127 ldq r4,-104(r17)
129 subq r17,32,r17
130 bne r18,L(top)
131 C cool down phase 2/1
132 L(end2):
133 stq r7,-8(r16)
134 bis r5,r22,r5
135 stq r8,-16(r16)
136 bis r6,r23,r6
137 srl r1,r20,r7
138 sll r1,r19,r21
139 srl r2,r20,r8
140 sll r2,r19,r22
141 stq r5,-24(r16)
142 bis r7,r24,r7
143 stq r6,-32(r16)
144 bis r8,r21,r8
145 srl r3,r20,r5
146 sll r3,r19,r23
147 srl r4,r20,r6
148 sll r4,r19,r24
149 C cool down phase 2/2
150 stq r7,-40(r16)
151 bis r5,r22,r5
152 stq r8,-48(r16)
153 bis r6,r23,r6
154 stq r5,-56(r16)
155 stq r6,-64(r16)
156 C cool down phase 2/3
157 stq r24,-72(r16)
158 ret r31,(r26),1
160 C cool down phase 1/1
161 L(end1):
162 sll r2,r19,r22
163 srl r3,r20,r5
164 bis r7,r24,r7
165 sll r3,r19,r23
166 bis r8,r21,r8
167 srl r4,r20,r6
168 sll r4,r19,r24
169 C cool down phase 1/2
170 stq r7,-8(r16)
171 bis r5,r22,r5
172 stq r8,-16(r16)
173 bis r6,r23,r6
174 stq r5,-24(r16)
175 stq r6,-32(r16)
176 stq r24,-40(r16)
177 ret r31,(r26),1
179 L(end): stq r24,-8(r16)
180 ret r31,(r26),1
181 EPILOGUE(mpn_lshift)
182 ASM_END()