beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / p6 / lshift.asm
blob1a200fb3463669b5875fa06aebd12b68248911cc
1 dnl PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
3 dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C POWER3/PPC630 ?
35 C POWER4/PPC970 ?
36 C POWER5 2.25
37 C POWER6 4
39 C TODO
40 C * Micro-optimise header code
41 C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236
42 C bytes, 4-way code would become about 50% larger.
44 C INPUT PARAMETERS
45 define(`rp_param', `r3')
46 define(`up', `r4')
47 define(`n', `r5')
48 define(`cnt', `r6')
50 define(`tnc',`r0')
51 define(`retval',`r3')
52 define(`rp', `r7')
54 ASM_START()
55 PROLOGUE(mpn_lshift,toc)
57 ifdef(`HAVE_ABI_mode32',`
58 rldicl n, n, 0,32 C FIXME: avoid this zero extend
60 mflr r12
61 sldi r8, n, 3
62 sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
63 LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
64 add up, up, r8 C make up point at end of up[]
65 add r11, r11, r10 C address of L(oN) for N = cnt
66 srdi r10, n, 1
67 add rp, rp_param, r8 C make rp point at end of rp[]
68 subfic tnc, cnt, 64
69 rlwinm. r8, n, 0,31,31 C extract bit 0
70 mtctr r10
71 beq L(evn)
73 L(odd): ld r9, -8(up)
74 cmpdi cr0, n, 1 C n = 1?
75 beq L(1)
76 ld r8, -16(up)
77 addi r11, r11, -84 C L(o1) - L(e1) - 64
78 mtlr r11
79 srd r3, r9, tnc C retval
80 addi up, up, 8
81 addi rp, rp, -8
82 blr C branch to L(oN)
84 L(evn): ld r8, -8(up)
85 ld r9, -16(up)
86 addi r11, r11, -64
87 mtlr r11
88 srd r3, r8, tnc C retval
89 blr C branch to L(eN)
91 L(1): srd r3, r9, tnc C retval
92 sld r8, r9, cnt
93 std r8, -8(rp)
94 mtlr r12
95 ifdef(`HAVE_ABI_mode32',
96 ` mr r4, r3
97 srdi r3, r3, 32
99 blr
102 define(SHIFT,`
103 L(lo$1):ld r8, -24(up)
104 std r11, -8(rp)
105 addi rp, rp, -16
106 L(o$1): srdi r10, r8, eval(64-$1)
107 rldimi r10, r9, $1, 0
108 ld r9, -32(up)
109 addi up, up, -16
110 std r10, 0(rp)
111 L(e$1): srdi r11, r9, eval(64-$1)
112 rldimi r11, r8, $1, 0
113 bdnz L(lo$1)
114 std r11, -8(rp)
115 sldi r10, r9, $1
116 b L(com)
121 ALIGN(64)
122 forloop(`i',1,63,`SHIFT(i)')
124 L(com): std r10, -16(rp)
125 mtlr r12
126 ifdef(`HAVE_ABI_mode32',
127 ` mr r4, r3
128 srdi r3, r3, 32
131 EPILOGUE()
132 ASM_END()