beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / p6 / lshiftc.asm
blobe4b3caaab8a5d78c659380541233daf9b792c92a
1 dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
3 dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C POWER3/PPC630 ?
35 C POWER4/PPC970 ?
36 C POWER5 2.25
37 C POWER6 4
39 C TODO
40 C * Micro-optimise header code
41 C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236
42 C bytes, 4-way code would become about 50% larger.
44 C INPUT PARAMETERS
45 define(`rp_param', `r3')
46 define(`up', `r4')
47 define(`n', `r5')
48 define(`cnt', `r6')
50 define(`tnc',`r0')
51 define(`retval',`r3')
52 define(`rp', `r7')
54 ASM_START()
55 PROLOGUE(mpn_lshiftc,toc)
57 ifdef(`HAVE_ABI_mode32',`
58 rldicl n, n, 0,32 C FIXME: avoid this zero extend
60 mflr r12
61 sldi r8, n, 3
62 sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
63 LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
64 add up, up, r8 C make up point at end of up[]
65 add r11, r11, r10 C address of L(oN) for N = cnt
66 srdi r10, n, 1
67 add rp, rp_param, r8 C make rp point at end of rp[]
68 subfic tnc, cnt, 64
69 rlwinm. r8, n, 0,31,31 C extract bit 0
70 mtctr r10
71 beq L(evn)
73 L(odd): ld r9, -8(up)
74 cmpdi cr0, n, 1 C n = 1?
75 beq L(1)
76 ld r8, -16(up)
77 addi r11, r11, -88 C L(o1) - L(e1) - 64
78 mtlr r11
79 srd r3, r9, tnc C retval
80 addi up, up, 8
81 addi rp, rp, -8
82 blr C branch to L(oN)
84 L(evn): ld r8, -8(up)
85 ld r9, -16(up)
86 addi r11, r11, -64
87 mtlr r11
88 srd r3, r8, tnc C retval
89 blr C branch to L(eN)
91 L(1): srd r3, r9, tnc C retval
92 sld r8, r9, cnt
93 nor r8, r8, r8
94 std r8, -8(rp)
95 mtlr r12
96 ifdef(`HAVE_ABI_mode32',
97 ` mr r4, r3
98 srdi r3, r3, 32
103 define(SHIFT,`
104 L(lo$1):ld r8, -24(up)
105 nor r11, r11, r11
106 std r11, -8(rp)
107 addi rp, rp, -16
108 L(o$1): srdi r10, r8, eval(64-$1)
109 rldimi r10, r9, $1, 0
110 ld r9, -32(up)
111 addi up, up, -16
112 nor r10, r10, r10
113 std r10, 0(rp)
114 L(e$1): srdi r11, r9, eval(64-$1)
115 rldimi r11, r8, $1, 0
116 bdnz L(lo$1)
117 sldi r10, r9, $1
118 b L(com)
122 ALIGN(64)
123 forloop(`i',1,63,`SHIFT(i)')
125 L(com): nor r11, r11, r11
126 nor r10, r10, r10
127 std r11, -8(rp)
128 std r10, -16(rp)
129 mtlr r12
130 ifdef(`HAVE_ABI_mode32',
131 ` mr r4, r3
132 srdi r3, r3, 32
135 EPILOGUE()
136 ASM_END()