beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc32 / lshift.asm
blobce85d4d33eb30037f9e611a611686396cc6e71c1
1 dnl PowerPC-32 mpn_lshift -- Shift a number left.
3 dnl Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C 603e: ?
35 C 604e: 3.0
36 C 75x (G3): 3.0
37 C 7400,7410 (G4): 3.0
38 C 7445,7455 (G4+): 2.5
39 C 7447,7457 (G4+): 2.25
40 C power4/ppc970: 2.5
41 C power5: 2.5
43 C INPUT PARAMETERS
44 C rp r3
45 C up r4
46 C n r5
47 C cnt r6
49 ASM_START()
50 PROLOGUE(mpn_lshift)
51 cmpwi cr0, r5, 30 C more than 30 limbs?
52 slwi r0, r5, 2
53 add r4, r4, r0 C make r4 point at end of s1
54 add r7, r3, r0 C make r7 point at end of res
55 bgt L(BIG) C branch if more than 12 limbs
57 mtctr r5 C copy size into CTR
58 subfic r8, r6, 32
59 lwzu r11, -4(r4) C load first s1 limb
60 srw r3, r11, r8 C compute function return value
61 bdz L(end1)
63 L(oop): lwzu r10, -4(r4)
64 slw r9, r11, r6
65 srw r12, r10, r8
66 or r9, r9, r12
67 stwu r9, -4(r7)
68 bdz L(end2)
69 lwzu r11, -4(r4)
70 slw r9, r10, r6
71 srw r12, r11, r8
72 or r9, r9, r12
73 stwu r9, -4(r7)
74 bdnz L(oop)
76 L(end1):
77 slw r0, r11, r6
78 stw r0, -4(r7)
79 blr
80 L(end2):
81 slw r0, r10, r6
82 stw r0, -4(r7)
83 blr
85 L(BIG):
86 stwu r1, -48(r1)
87 stmw r24, 8(r1) C save registers we are supposed to preserve
88 lwzu r9, -4(r4)
89 subfic r8, r6, 32
90 srw r3, r9, r8 C compute function return value
91 slw r0, r9, r6
92 addi r5, r5, -1
94 andi. r10, r5, 3 C count for spill loop
95 beq L(e)
96 mtctr r10
97 lwzu r28, -4(r4)
98 bdz L(xe0)
100 L(loop0):
101 slw r12, r28, r6
102 srw r24, r28, r8
103 lwzu r28, -4(r4)
104 or r24, r0, r24
105 stwu r24, -4(r7)
106 mr r0, r12
107 bdnz L(loop0) C taken at most once!
109 L(xe0): slw r12, r28, r6
110 srw r24, r28, r8
111 or r24, r0, r24
112 stwu r24, -4(r7)
113 mr r0, r12
115 L(e): srwi r5, r5, 2 C count for unrolled loop
116 addi r5, r5, -1
117 mtctr r5
118 lwz r28, -4(r4)
119 lwz r29, -8(r4)
120 lwz r30, -12(r4)
121 lwzu r31, -16(r4)
123 L(loopU):
124 slw r9, r28, r6
125 srw r24, r28, r8
126 lwz r28, -4(r4)
127 slw r10, r29, r6
128 srw r25, r29, r8
129 lwz r29, -8(r4)
130 slw r11, r30, r6
131 srw r26, r30, r8
132 lwz r30, -12(r4)
133 slw r12, r31, r6
134 srw r27, r31, r8
135 lwzu r31, -16(r4)
136 or r24, r0, r24
137 stw r24, -4(r7)
138 or r25, r9, r25
139 stw r25, -8(r7)
140 or r26, r10, r26
141 stw r26, -12(r7)
142 or r27, r11, r27
143 stwu r27, -16(r7)
144 mr r0, r12
145 bdnz L(loopU)
147 slw r9, r28, r6
148 srw r24, r28, r8
149 slw r10, r29, r6
150 srw r25, r29, r8
151 slw r11, r30, r6
152 srw r26, r30, r8
153 slw r12, r31, r6
154 srw r27, r31, r8
155 or r24, r0, r24
156 stw r24, -4(r7)
157 or r25, r9, r25
158 stw r25, -8(r7)
159 or r26, r10, r26
160 stw r26, -12(r7)
161 or r27, r11, r27
162 stw r27, -16(r7)
164 stw r12, -20(r7)
165 lmw r24, 8(r1) C restore registers
166 addi r1, r1, 48
168 EPILOGUE()