beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc32 / 750 / lshift.asm
blob3a1c1a7212e81ccf2e5a99953b6d7a1e4c039dd5
1 dnl PowerPC 750 mpn_lshift -- mpn left shift.
3 dnl Copyright 2002, 2003 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C cycles/limb
35 C 750: 3.0
36 C 7400: 3.0
39 C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
40 C unsigned shift);
42 C This code is the same per-limb speed as mpn/powerpc32/lshift.asm, but
43 C smaller and saving about 30 or so cycles of overhead.
45 ASM_START()
46 PROLOGUE(mpn_lshift)
48 C r3 dst
49 C r4 src
50 C r5 size
51 C r6 shift
53 mtctr r5 C size
54 slwi r5, r5, 2 C 4*size
56 subfic r7, r6, 32 C 32-shift
57 add r4, r4, r5 C &src[size]
59 add r5, r3, r5 C &dst[size]
60 lwz r8, -4(r4) C src[size-1]
61 bdz L(one)
63 lwzu r9, -8(r4) C src[size-2]
65 srw r3, r8, r7 C return value
66 slw r8, r8, r6 C src[size-1] << shift
67 bdz L(two)
70 L(top):
71 C r3 return value
72 C r4 src, incrementing
73 C r5 dst, incrementing
74 C r6 lshift
75 C r7 32-shift
76 C r8 src[i+1] << shift
77 C r9 src[i]
78 C r10
80 lwzu r10, -4(r4)
81 srw r11, r9, r7
83 or r8, r8, r11
84 stwu r8, -4(r5)
86 slw r8, r9, r6
87 bdz L(odd)
89 C r8 src[i+1] << shift
90 C r9
91 C r10 src[i]
93 lwzu r9, -4(r4)
94 srw r11, r10, r7
96 or r8, r8, r11
97 stwu r8, -4(r5)
99 slw r8, r10, r6
100 bdnz L(top)
103 L(two):
104 C r3 return value
105 C r4
106 C r5 &dst[2]
107 C r6 shift
108 C r7 32-shift
109 C r8 src[1] << shift
110 C r9 src[0]
111 C r10
113 srw r11, r9, r7
114 slw r12, r9, r6 C src[0] << shift
116 or r8, r8, r11
117 stw r12, -8(r5) C dst[0]
119 stw r8, -4(r5) C dst[1]
123 L(odd):
124 C r3 return value
125 C r4
126 C r5 &dst[2]
127 C r6 shift
128 C r7 32-shift
129 C r8 src[1] << shift
130 C r9
131 C r10 src[0]
133 srw r11, r10, r7
134 slw r12, r10, r6
136 or r8, r8, r11
137 stw r12, -8(r5) C dst[0]
139 stw r8, -4(r5) C dst[1]
143 L(one):
144 C r5 &dst[1]
145 C r6 shift
146 C r7 32-shift
147 C r8 src[0]
149 srw r3, r8, r7 C return value
150 slw r8, r8, r6 C src[size-1] << shift
152 stw r8, -4(r5) C dst[0]
155 EPILOGUE(mpn_lshift)