new beta-0.90.0
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc32 / rshift.asm
blobd86cdcbd6301c1b8e25d72d464d8fa3e03b69d59
1 dnl PowerPC-32 mpn_rshift -- Shift a number right.
3 dnl Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C 603e: ?
35 C 604e: 3.0
36 C 75x (G3): 3.0
37 C 7400,7410 (G4): 3.0
38 C 7445,7455 (G4+): 2.5
39 C 7447,7457 (G4+): 2.25
40 C power4/ppc970: 2.5
41 C power5: 2.5
43 C INPUT PARAMETERS
44 C rp r3
45 C up r4
46 C n r5
47 C cnt r6
49 ASM_START()
50 PROLOGUE(mpn_rshift)
51 cmpwi cr0, r5, 30 C more than 30 limbs?
52 addi r7, r3, -4 C dst-4
53 bgt L(BIG) C branch if more than 12 limbs
55 mtctr r5 C copy size into CTR
56 subfic r8, r6, 32
57 lwz r11, 0(r4) C load first s1 limb
58 slw r3, r11, r8 C compute function return value
59 bdz L(end1)
61 L(oop): lwzu r10, 4(r4)
62 srw r9, r11, r6
63 slw r12, r10, r8
64 or r9, r9, r12
65 stwu r9, 4(r7)
66 bdz L(end2)
67 lwzu r11, 4(r4)
68 srw r9, r10, r6
69 slw r12, r11, r8
70 or r9, r9, r12
71 stwu r9, 4(r7)
72 bdnz L(oop)
74 L(end1):
75 srw r0, r11, r6
76 stw r0, 4(r7)
77 blr
78 L(end2):
79 srw r0, r10, r6
80 stw r0, 4(r7)
81 blr
83 L(BIG):
84 stwu r1, -48(r1)
85 stmw r24, 8(r1) C save registers we are supposed to preserve
86 lwz r9, 0(r4)
87 subfic r8, r6, 32
88 slw r3, r9, r8 C compute function return value
89 srw r0, r9, r6
90 addi r5, r5, -1
92 andi. r10, r5, 3 C count for spill loop
93 beq L(e)
94 mtctr r10
95 lwzu r28, 4(r4)
96 bdz L(xe0)
98 L(loop0):
99 srw r12, r28, r6
100 slw r24, r28, r8
101 lwzu r28, 4(r4)
102 or r24, r0, r24
103 stwu r24, 4(r7)
104 mr r0, r12
105 bdnz L(loop0) C taken at most once!
107 L(xe0): srw r12, r28, r6
108 slw r24, r28, r8
109 or r24, r0, r24
110 stwu r24, 4(r7)
111 mr r0, r12
113 L(e): srwi r5, r5, 2 C count for unrolled loop
114 addi r5, r5, -1
115 mtctr r5
116 lwz r28, 4(r4)
117 lwz r29, 8(r4)
118 lwz r30, 12(r4)
119 lwzu r31, 16(r4)
121 L(loopU):
122 srw r9, r28, r6
123 slw r24, r28, r8
124 lwz r28, 4(r4)
125 srw r10, r29, r6
126 slw r25, r29, r8
127 lwz r29, 8(r4)
128 srw r11, r30, r6
129 slw r26, r30, r8
130 lwz r30, 12(r4)
131 srw r12, r31, r6
132 slw r27, r31, r8
133 lwzu r31, 16(r4)
134 or r24, r0, r24
135 stw r24, 4(r7)
136 or r25, r9, r25
137 stw r25, 8(r7)
138 or r26, r10, r26
139 stw r26, 12(r7)
140 or r27, r11, r27
141 stwu r27, 16(r7)
142 mr r0, r12
143 bdnz L(loopU)
145 srw r9, r28, r6
146 slw r24, r28, r8
147 srw r10, r29, r6
148 slw r25, r29, r8
149 srw r11, r30, r6
150 slw r26, r30, r8
151 srw r12, r31, r6
152 slw r27, r31, r8
153 or r24, r0, r24
154 stw r24, 4(r7)
155 or r25, r9, r25
156 stw r25, 8(r7)
157 or r26, r10, r26
158 stw r26, 12(r7)
159 or r27, r11, r27
160 stw r27, 16(r7)
162 stw r12, 20(r7)
163 lmw r24, 8(r1) C restore registers
164 addi r1, r1, 48
166 EPILOGUE()