1 dnl PowerPC
-64 mpn_add_n
/mpn_sub_n
-- mpn addition
and subtraction.
3 dnl Copyright
1999-2001, 2003-2005, 2007, 2011 Free Software Foundation
, Inc.
5 dnl
This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
8 dnl it under the terms of
either:
10 dnl
* the GNU Lesser General
Public License as published by the Free
11 dnl Software Foundation
; either version 3 of the License, or (at your
12 dnl option
) any later version.
16 dnl
* the GNU General
Public License as published by the Free Software
17 dnl Foundation
; either version 2 of the License, or (at your option) any
20 dnl
or both
in parallel
, as here.
22 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
23 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
24 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
27 dnl You should have received copies of the GNU General
Public License
and the
28 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
29 dnl see
https://www.gnu.
org/licenses
/.
31 include(`..
/config.m4
')
40 C This code is a little bit slower for POWER3/PPC630 than the simple code used
41 C previously, but it is much faster for POWER4/PPC970. The reason for the
42 C POWER3/PPC630 slowdown can be attributed to the saving and restoring of 4
51 ifdef(`OPERATION_add_n',`
54 define
(func
, mpn_add_n
)
55 define
(func_nc
, mpn_add_nc
)
56 define
(GENRVAL
, `addi r3
, r3
, 1')
57 define(SETCBR, `addic r0, $1, -1')
58 define
(CLRCB
, `addic r0
, r0
, 0')
60 ifdef
(`OPERATION_sub_n
',`
61 define(ADDSUBC, subfe)
63 define(func, mpn_sub_n)
64 define(func_nc, mpn_sub_nc)
65 define(GENRVAL, `neg r3, r3')
66 define
(SETCBR
, `subfic r0
, $1, 0')
67 define(CLRCB, `addic r0, r1, -1')
70 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
80 L(ent): std r31, -8(r1)
85 rldicl. r0, r6, 0,62 C r0 = n & 3, set cr0
87 addi r6, r6, 3 C compute count...
88 srdi r6, r6, 2 C ...for ctr
89 mtctr r6 C copy count into ctr
94 L(b11): ld r8, 0(r4) C load s1 limb
95 ld r9, 0(r5) C load s2 limb
96 ld r10, 8(r4) C load s1 limb
97 ld r11, 8(r5) C load s2 limb
98 ld r12, 16(r4) C load s1 limb
100 ld r0, 16(r5) C load s2 limb
103 ADDSUBC r30, r11, r10
112 L(b01): ld r12, 0(r4) C load s1 limb
114 ld r0, 0(r5) C load s2 limb
116 ADDSUBC r31, r0, r12 C add
122 L(b10): ld r10, 0(r4) C load s1 limb
123 ld r11, 0(r5) C load s2 limb
124 ld r12, 8(r4) C load s1 limb
126 ld r0, 8(r5) C load s2 limb
128 ADDSUBC r30, r11, r10 C add
129 ADDSUBC r31, r0, r12 C add
136 L(b00): C INITCY C clear/set cy
137 L(go): ld r6, 0(r4) C load s1 limb
138 ld r7, 0(r5) C load s2 limb
139 ld r8, 8(r4) C load s1 limb
140 ld r9, 8(r5) C load s2 limb
141 ld r10, 16(r4) C load s1 limb
142 ld r11, 16(r5) C load s2 limb
143 ld r12, 24(r4) C load s1 limb
144 ld r0, 24(r5) C load s2 limb
151 L(top): ADDSUBC r28, r7, r6
152 ld r6, 0(r4) C load s1 limb
153 ld r7, 0(r5) C load s2 limb
155 ld r8, 8(r4) C load s1 limb
156 ld r9, 8(r5) C load s2 limb
157 ADDSUBC r30, r11, r10
158 ld r10, 16(r4) C load s1 limb
159 ld r11, 16(r5) C load s2 limb
161 ld r12, 24(r4) C load s1 limb
162 ld r0, 24(r5) C load s2 limb
170 bdnz L(top) C decrement ctr and loop back
172 L(end): ADDSUBC r28, r7, r6
174 ADDSUBC r30, r11, r10
181 L(ret): ld r31, -8(r1)
186 subfe r3, r0, r0 C -cy