beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / mode64 / p7 / aormul_2.asm
blob8731e01a8949230fb92c270e70db90fb09a89e1d
1 dnl PowerPC-64 mpn_mul_2 and mpn_addmul_2.
3 dnl Copyright 2013 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb cycles/limb
34 C mul_2 addmul_2
35 C POWER3/PPC630 ? ?
36 C POWER4/PPC970 ? ?
37 C POWER5 ? ?
38 C POWER6 ? ?
39 C POWER7-SMT4 3 3
40 C POWER7-SMT2 ? ?
41 C POWER7-SMT1 ? ?
43 C INPUT PARAMETERS
44 define(`rp', `r3')
45 define(`up', `r4')
46 define(`n', `r5')
47 define(`vp', `r6')
49 define(`cy0', `r10')
50 ifdef(`EXTRA_REGISTER',
51 ` define(`cy1', EXTRA_REGISTER)',
52 ` define(`cy1', `r31')')
54 ifdef(`OPERATION_mul_2',`
55 define(`AM', `')
56 define(`ADDX', `addc')
57 define(`func', `mpn_mul_2')
59 ifdef(`OPERATION_addmul_2',`
60 define(`AM', `$1')
61 define(`ADDX', `adde')
62 define(`func', `mpn_addmul_2')
65 MULFUNC_PROLOGUE(mpn_mul_2 mpn_addmul_2)
67 ASM_START()
68 PROLOGUE(func)
70 ifdef(`EXTRA_REGISTER',,`
71 std r31, -8(r1)
73 andi. r12, n, 1
74 addi r0, n, 1
75 srdi r0, r0, 1
76 mtctr r0
77 ld r11, 0(vp) C v0
78 li cy0, 0
79 ld r12, 8(vp) C v1
80 li cy1, 0
81 ld r5, 0(up)
82 beq L(lo0)
83 addi up, up, -8
84 addi rp, rp, -8
85 b L(lo1)
87 ALIGN(32)
88 L(top):
89 AM(` ld r0, -8(rp)')
90 ld r5, 0(up)
91 AM(` addc r6, r6, r0')
92 ADDX r7, r7, r8
93 addze r9, r9
94 addc r6, r6, cy0
95 adde cy0, r7, cy1
96 std r6, -8(rp)
97 addze cy1, r9
98 L(lo0): mulld r6, r11, r5 C v0 * u[i] weight 0
99 mulhdu r7, r11, r5 C v0 * u[i] weight 1
100 mulld r8, r12, r5 C v1 * u[i] weight 1
101 mulhdu r9, r12, r5 C v1 * u[i] weight 2
102 AM(` ld r0, 0(rp)')
103 ld r5, 8(up)
104 AM(` addc r6, r6, r0')
105 ADDX r7, r7, r8
106 addze r9, r9
107 addc r6, r6, cy0
108 adde cy0, r7, cy1
109 std r6, 0(rp)
110 addze cy1, r9
111 L(lo1): mulld r6, r11, r5 C v0 * u[i] weight 0
112 mulhdu r7, r11, r5 C v0 * u[i] weight 1
113 addi up, up, 16
114 addi rp, rp, 16
115 mulld r8, r12, r5 C v1 * u[i] weight 1
116 mulhdu r9, r12, r5 C v1 * u[i] weight 2
117 bdnz L(top)
119 L(end):
120 AM(` ld r0, -8(rp)')
121 AM(` addc r6, r6, r0')
122 ADDX r7, r7, r8
123 addze r9, r9
124 addc r6, r6, cy0
125 std r6, -8(rp)
126 adde cy0, r7, cy1
127 addze cy1, r9
128 std cy0, 0(rp)
129 mr r3, cy1
131 ifdef(`EXTRA_REGISTER',,`
132 ld r31, -8(r1)
135 EPILOGUE()