beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / alpha / rshift.asm
blob6e1e21455894351ae2722ec9b59da68dc7f08f4b
1 dnl Alpha mpn_rshift -- Shift a number right.
3 dnl Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C EV4: ?
35 C EV5: 3.25
36 C EV6: 1.75
38 C INPUT PARAMETERS
39 C rp r16
40 C up r17
41 C n r18
42 C cnt r19
45 ASM_START()
46 PROLOGUE(mpn_rshift)
47 ldq r4,0(r17) C load first limb
48 subq r31,r19,r20
49 subq r18,1,r18
50 and r18,4-1,r28 C number of limbs in first loop
51 sll r4,r20,r0 C compute function result
53 beq r28,L(L0)
54 subq r18,r28,r18
56 ALIGN(8)
57 L(top0):
58 ldq r3,8(r17)
59 addq r16,8,r16
60 srl r4,r19,r5
61 addq r17,8,r17
62 subq r28,1,r28
63 sll r3,r20,r6
64 bis r3,r3,r4
65 bis r5,r6,r8
66 stq r8,-8(r16)
67 bne r28,L(top0)
69 L(L0): srl r4,r19,r24
70 beq r18,L(end)
71 C warm up phase 1
72 ldq r1,8(r17)
73 subq r18,4,r18
74 ldq r2,16(r17)
75 ldq r3,24(r17)
76 ldq r4,32(r17)
77 C warm up phase 2
78 sll r1,r20,r7
79 srl r1,r19,r21
80 sll r2,r20,r8
81 beq r18,L(end1)
82 ldq r1,40(r17)
83 srl r2,r19,r22
84 ldq r2,48(r17)
85 sll r3,r20,r5
86 bis r7,r24,r7
87 srl r3,r19,r23
88 bis r8,r21,r8
89 sll r4,r20,r6
90 ldq r3,56(r17)
91 srl r4,r19,r24
92 ldq r4,64(r17)
93 subq r18,4,r18
94 beq r18,L(end2)
95 ALIGN(16)
96 C main loop
97 L(top): stq r7,0(r16)
98 bis r5,r22,r5
99 stq r8,8(r16)
100 bis r6,r23,r6
102 sll r1,r20,r7
103 subq r18,4,r18
104 srl r1,r19,r21
105 unop C ldq r31,-96(r17)
107 sll r2,r20,r8
108 ldq r1,72(r17)
109 srl r2,r19,r22
110 ldq r2,80(r17)
112 stq r5,16(r16)
113 bis r7,r24,r7
114 stq r6,24(r16)
115 bis r8,r21,r8
117 sll r3,r20,r5
118 unop C ldq r31,-96(r17)
119 srl r3,r19,r23
120 addq r16,32,r16
122 sll r4,r20,r6
123 ldq r3,88(r17)
124 srl r4,r19,r24
125 ldq r4,96(r17)
127 addq r17,32,r17
128 bne r18,L(top)
129 C cool down phase 2/1
130 L(end2):
131 stq r7,0(r16)
132 bis r5,r22,r5
133 stq r8,8(r16)
134 bis r6,r23,r6
135 sll r1,r20,r7
136 srl r1,r19,r21
137 sll r2,r20,r8
138 srl r2,r19,r22
139 stq r5,16(r16)
140 bis r7,r24,r7
141 stq r6,24(r16)
142 bis r8,r21,r8
143 sll r3,r20,r5
144 srl r3,r19,r23
145 sll r4,r20,r6
146 srl r4,r19,r24
147 C cool down phase 2/2
148 stq r7,32(r16)
149 bis r5,r22,r5
150 stq r8,40(r16)
151 bis r6,r23,r6
152 stq r5,48(r16)
153 stq r6,56(r16)
154 C cool down phase 2/3
155 stq r24,64(r16)
156 ret r31,(r26),1
158 C cool down phase 1/1
159 L(end1):
160 srl r2,r19,r22
161 sll r3,r20,r5
162 bis r7,r24,r7
163 srl r3,r19,r23
164 bis r8,r21,r8
165 sll r4,r20,r6
166 srl r4,r19,r24
167 C cool down phase 1/2
168 stq r7,0(r16)
169 bis r5,r22,r5
170 stq r8,8(r16)
171 bis r6,r23,r6
172 stq r5,16(r16)
173 stq r6,24(r16)
174 stq r24,32(r16)
175 ret r31,(r26),1
177 L(end): stq r24,0(r16)
178 ret r31,(r26),1
179 EPILOGUE(mpn_rshift)
180 ASM_END()