beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / mode64 / p7 / aors_n.asm
blob857c701dec363c1b3fcff75143ea458a136f4ac9
1 dnl PowerPC-64 mpn_add_n, mpn_sub_n optimised for POWER7.
3 dnl Copyright 2013 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C POWER3/PPC630 ?
35 C POWER4/PPC970 ?
36 C POWER5 ?
37 C POWER6 ?
38 C POWER7 2.18
40 C This is a tad bit slower than the cnd_aors_n.asm code, which is of course an
41 C anomaly.
43 ifdef(`OPERATION_add_n',`
44 define(ADDSUBC, adde)
45 define(ADDSUB, addc)
46 define(func, mpn_add_n)
47 define(func_nc, mpn_add_nc)
48 define(GENRVAL, `addi r3, r3, 1')
49 define(SETCBR, `addic r0, $1, -1')
50 define(CLRCB, `addic r0, r0, 0')
52 ifdef(`OPERATION_sub_n',`
53 define(ADDSUBC, subfe)
54 define(ADDSUB, subfc)
55 define(func, mpn_sub_n)
56 define(func_nc, mpn_sub_nc)
57 define(GENRVAL, `neg r3, r3')
58 define(SETCBR, `subfic r0, $1, 0')
59 define(CLRCB, `addic r0, r1, -1')
62 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
64 C INPUT PARAMETERS
65 define(`rp', `r3')
66 define(`up', `r4')
67 define(`vp', `r5')
68 define(`n', `r6')
70 ASM_START()
71 PROLOGUE(func_nc)
72 SETCBR(r7)
73 b L(ent)
74 EPILOGUE()
76 PROLOGUE(func)
77 CLRCB
78 L(ent):
79 andi. r7, n, 1
80 beq L(bx0)
82 L(bx1): ld r7, 0(up)
83 ld r9, 0(vp)
84 ADDSUBC r11, r9, r7
85 std r11, 0(rp)
86 cmpldi cr6, n, 1
87 beq cr6, L(end)
88 addi up, up, 8
89 addi vp, vp, 8
90 addi rp, rp, 8
92 L(bx0): addi r0, n, 2 C compute branch...
93 srdi r0, r0, 2 C ...count
94 mtctr r0
96 andi. r7, n, 2
97 bne L(mid)
99 addi up, up, 16
100 addi vp, vp, 16
101 addi rp, rp, 16
103 ALIGN(32)
104 L(top): ld r6, -16(up)
105 ld r7, -8(up)
106 ld r8, -16(vp)
107 ld r9, -8(vp)
108 ADDSUBC r10, r8, r6
109 ADDSUBC r11, r9, r7
110 std r10, -16(rp)
111 std r11, -8(rp)
112 L(mid): ld r6, 0(up)
113 ld r7, 8(up)
114 ld r8, 0(vp)
115 ld r9, 8(vp)
116 ADDSUBC r10, r8, r6
117 ADDSUBC r11, r9, r7
118 std r10, 0(rp)
119 std r11, 8(rp)
120 addi up, up, 32
121 addi vp, vp, 32
122 addi rp, rp, 32
123 bdnz L(top)
125 L(end): subfe r3, r0, r0 C -cy
126 GENRVAL
128 EPILOGUE()