1 dnl PowerPC
-64 mpn_addlshC_n
, mpn_sublshC_n
, mpn_rsblshC_n.
3 dnl Copyright
2003, 2005, 2009, 2010, 2013 Free Software Foundation
, Inc.
5 dnl
This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
8 dnl it under the terms of
either:
10 dnl
* the GNU Lesser General
Public License as published by the Free
11 dnl Software Foundation
; either version 3 of the License, or (at your
12 dnl option
) any later version.
16 dnl
* the GNU General
Public License as published by the Free Software
17 dnl Foundation
; either version 2 of the License, or (at your option) any
20 dnl
or both
in parallel
, as here.
22 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
23 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
24 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
27 dnl You should have received copies of the GNU General
Public License
and the
28 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
29 dnl see
https://www.gnu.
org/licenses
/.
32 C POWER3
/PPC630
1.83 (1.5 c
/l should be possible
)
33 C POWER4
/PPC970
3 (2.0 c
/l should be possible
)
39 C
* Try combining upx
+up
, and vpx
+vp.
40 C
* The worst case
47 c
/l for POWER6 happens if the
3rd operand for ldx is
41 C greater than the
2nd operand. Yes
, this addition is non
-commutative wrt
51 define(`ADDSUBC', `addc
$1, $2, $3')
52 define(`ADDSUBE', `adde
$1, $2, $3')
53 define(INITCY, `addic $1, r1, 0')
54 define
(RETVAL
, `addze r3
, $1')
55 define(`func', mpn_addlsh`
'LSH`'_n
)')
57 define
(`ADDSUBC
', `subfc $1, $2, $3')
58 define
(`ADDSUBE
', `subfe $1, $2, $3')
59 define
(INITCY
, `addic
$1, r1
, -1')
60 define(RETVAL, `subfze r3, $1
62 define
(`func
', mpn_sublsh`'LSH`
'_n)')
64 define(`ADDSUBC', `subfc
$1, $3, $2')
65 define(`ADDSUBE', `subfe
$1, $3, $2')
66 define(INITCY, `addic $1, r1, -1')
67 define
(RETVAL
, `addme r3
, $1')
68 define(`func', mpn_rsblsh`
'LSH`'_n
)')
74 define(`s0', `r0
') define(`s1', `r9
')
76 define(`v0', `r10
') define(`v1', `r11
')
84 mtctr n C copy n in ctr
85 INITCY( r0) C clear cy
87 ld v0, 0(vp) C load v limb
88 ld u0, 0(up) C load u limb
89 addi up, up, -8 C update up
90 addi rp, rp, -8 C update rp
92 bdz L(ex1) C If done, skip loop
95 L(lo0): ld v1, 8(vp) C load v limb
96 ADDSUBE(s1, s1, u0) C add limbs with cy, set cy
97 ldu u0, 16(up) C load u limb and update up
98 srdi s0, v0, RSH C shift down previous v limb
99 std s1, 8(rp) C store result limb
100 rldimi s0, v1, LSH, 0 C left shift v limb and merge with prev v limb
101 bdz L(ex0) C decrement ctr and exit if done
102 ldu v0, 16(vp) C load v limb and update vp
103 ADDSUBE(s0, s0, u0) C add limbs with cy, set cy
104 ld u0, 8(up) C load u limb
105 srdi s1, v1, RSH C shift down previous v limb
106 stdu s0, 16(rp) C store result limb and update rp
107 rldimi s1, v0, LSH, 0 C left shift v limb and merge with prev v limb
108 bdnz L(lo0) C decrement ctr and loop back
110 L(ex1): ADDSUBE(r7, s1, u0)
111 std r7, 8(rp) C store last result limb
115 L(ex0): ADDSUBE(r7, s0, u0)
116 std r7, 16(rp) C store last result limb
122 L(big): rldicl. r0, n, 0,63 C r0 = n & 1, set cr0
123 addi r6, n, -1 C ...for ctr
124 srdi r6, r6, 1 C ...for ctr
125 mtctr r6 C copy count into ctr
133 ADDSUBC(s1, s1, u0) C add limbs without cy, set cy
151 ADDSUBC(s0, s0, u0) C add limbs without cy, set cy
164 L(top): ldx u0, rp, up
166 rldimi s1, v1, LSH, 0
169 ADDSUBE(s1, s1, u0) C add limbs with cy, set cy
170 L(mid): ldx u0, rpx, upx
172 rldimi s0, v0, LSH, 0
175 ADDSUBE(s0, s0, u0) C add limbs with cy, set cy
176 bdnz L(top) C decrement CTR and loop back
179 rldimi s1, v1, LSH, 0
182 ADDSUBE(s1, s1, u0) C add limbs with cy, set cy