beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / mode64 / rsh1aors_n.asm
blob1f57bdf8d3f6ff0ee90aeb684c327ac6344224bf
1 dnl PowerPC-64 mpn_rsh1add_n, mpn_rsh1sub_n
3 dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C POWER3/PPC630 ?
35 C POWER4/PPC970 2.9
36 C POWER5 ?
37 C POWER6 3.5
38 C POWER7 2.25
40 define(`rp', `r3')
41 define(`up', `r4')
42 define(`vp', `r5')
43 define(`n', `r6')
45 ifdef(`OPERATION_rsh1add_n', `
46 define(`ADDSUBC', `addc')
47 define(`ADDSUBE', `adde')
48 define(INITCY, `addic $1, r1, 0')
49 define(`func', mpn_rsh1add_n)')
50 ifdef(`OPERATION_rsh1sub_n', `
51 define(`ADDSUBC', `subfc')
52 define(`ADDSUBE', `subfe')
53 define(INITCY, `addic $1, r1, -1')
54 define(`func', mpn_rsh1sub_n)')
56 define(`s0', `r9')
57 define(`s1', `r7')
58 define(`x0', `r0')
59 define(`x1', `r12')
60 define(`u0', `r8')
61 define(`v0', `r10')
63 MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
65 ASM_START()
66 PROLOGUE(func)
67 ld u0, 0(up)
68 ld v0, 0(vp)
70 cmpdi cr6, n, 2
72 addi r0, n, 1
73 srdi r0, r0, 2
74 mtctr r0 C copy size to count register
76 andi. r0, n, 1
77 bne cr0, L(bx1)
79 L(bx0): ADDSUBC x1, v0, u0
80 ld u0, 8(up)
81 ld v0, 8(vp)
82 ADDSUBE x0, v0, u0
83 ble cr6, L(n2)
84 ld u0, 16(up)
85 ld v0, 16(vp)
86 srdi s0, x1, 1
87 rldicl r11, x1, 0, 63 C return value
88 ADDSUBE x1, v0, u0
89 andi. n, n, 2
90 bne cr0, L(b10)
91 L(b00): addi rp, rp, -24
92 b L(lo0)
93 L(b10): addi up, up, 16
94 addi vp, vp, 16
95 addi rp, rp, -8
96 b L(lo2)
98 ALIGN(16)
99 L(bx1): ADDSUBC x0, v0, u0
100 ble cr6, L(n1)
101 ld u0, 8(up)
102 ld v0, 8(vp)
103 ADDSUBE x1, v0, u0
104 ld u0, 16(up)
105 ld v0, 16(vp)
106 srdi s1, x0, 1
107 rldicl r11, x0, 0, 63 C return value
108 ADDSUBE x0, v0, u0
109 andi. n, n, 2
110 bne cr0, L(b11)
111 L(b01): addi up, up, 8
112 addi vp, vp, 8
113 addi rp, rp, -16
114 b L(lo1)
115 L(b11): addi up, up, 24
116 addi vp, vp, 24
117 bdz L(end)
119 ALIGN(32)
120 L(top): ld u0, 0(up)
121 ld v0, 0(vp)
122 srdi s0, x1, 1
123 rldimi s1, x1, 63, 0
124 std s1, 0(rp)
125 ADDSUBE x1, v0, u0
126 L(lo2): ld u0, 8(up)
127 ld v0, 8(vp)
128 srdi s1, x0, 1
129 rldimi s0, x0, 63, 0
130 std s0, 8(rp)
131 ADDSUBE x0, v0, u0
132 L(lo1): ld u0, 16(up)
133 ld v0, 16(vp)
134 srdi s0, x1, 1
135 rldimi s1, x1, 63, 0
136 std s1, 16(rp)
137 ADDSUBE x1, v0, u0
138 L(lo0): ld u0, 24(up)
139 ld v0, 24(vp)
140 srdi s1, x0, 1
141 rldimi s0, x0, 63, 0
142 std s0, 24(rp)
143 ADDSUBE x0, v0, u0
144 addi up, up, 32
145 addi vp, vp, 32
146 addi rp, rp, 32
147 bdnz L(top)
149 L(end): srdi s0, x1, 1
150 rldimi s1, x1, 63, 0
151 std s1, 0(rp)
152 L(cj2): srdi s1, x0, 1
153 rldimi s0, x0, 63, 0
154 std s0, 8(rp)
155 L(cj1): ADDSUBE x1, x1, x1 C pseudo-depends on x1
156 rldimi s1, x1, 63, 0
157 std s1, 16(rp)
158 mr r3, r11
161 L(n1): srdi s1, x0, 1
162 rldicl r11, x0, 0, 63 C return value
163 ADDSUBE x1, x1, x1 C pseudo-depends on x1
164 rldimi s1, x1, 63, 0
165 std s1, 0(rp)
166 mr r3, r11
169 L(n2): addi rp, rp, -8
170 srdi s0, x1, 1
171 rldicl r11, x1, 0, 63 C return value
172 b L(cj2)
173 EPILOGUE()