1 dnl HP
-PA
2.0 mpn_add_n
, mpn_sub_n
3 dnl Copyright
1997, 2000, 2002, 2003, 2009, 2010 Free Software Foundation
,
6 dnl
This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
9 dnl it under the terms of
either:
11 dnl
* the GNU Lesser General
Public License as published by the Free
12 dnl Software Foundation
; either version 3 of the License, or (at your
13 dnl option
) any later version.
17 dnl
* the GNU General
Public License as published by the Free Software
18 dnl Foundation
; either version 2 of the License, or (at your option) any
21 dnl
or both
in parallel
, as here.
23 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
24 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
25 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
28 dnl You should have received copies of the GNU General
Public License
and the
29 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
30 dnl see
https://www.gnu.
org/licenses
/.
33 dnl
This runs at
2 cycles
/limb on PA8000
and 1.6875 cycles
/limb on PA8500. It
34 dnl should be possible to reach the cache bandwidth
1.5 cycles
/limb at least
35 dnl with PA8500. The problem now is stalling of the first
ADD,DC after LDO
,
36 dnl where the processor gets confused about where carry comes from.
38 include(`..
/config.m4
')
46 ifdef(`OPERATION_add_n', `
47 define
(ADCSBC
, `
add,dc
')
48 define(INITCY, `addi -1,%r22,%r0')
49 define
(func
, mpn_add_n
)
50 define
(func_nc
, mpn_add_nc
)')
51 ifdef(`OPERATION_sub_n', `
52 define
(ADCSBC
, `
sub,db')
53 define(INITCY, `subi 0,%r22,%r0')
54 define
(func
, mpn_sub_n
)
55 define
(func_nc
, mpn_sub_nc
)')
57 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
59 ifdef(`HAVE_ABI_2_0w',
64 ifdef
(`HAVE_ABI_2_0w
',
75 depw,z %r21, 30, 3, %r28 C r28 = 2 * (-n & 7)
76 depw,z %r21, 28, 3, %r21 C r21 = 8 * (-n & 7)
77 sub up, %r21, up C offset up
78 sub vp, %r21, vp C offset vp
79 sub rp, %r21, rp C offset rp
80 blr %r28, %r0 C branch into loop
86 ADCSBC %r20, %r31, %r20
88 LDEF(7) ldd 8(up), %r21
90 ADCSBC %r21, %r19, %r21
92 LDEF(6) ldd 16(up), %r20
94 ADCSBC %r20, %r31, %r20
96 LDEF(5) ldd 24(up), %r21
98 ADCSBC %r21, %r19, %r21
100 LDEF(4) ldd 32(up), %r20
102 ADCSBC %r20, %r31, %r20
104 LDEF(3) ldd 40(up), %r21
106 ADCSBC %r21, %r19, %r21
108 LDEF(2) ldd 48(up), %r20
110 ADCSBC %r20, %r31, %r20
112 LDEF(1) ldd 56(up), %r21
114 ADCSBC %r21, %r19, %r21
118 addib,> -8, n, L(loop)
121 add,dc %r0, %r0, %r29
122 ifdef(`OPERATION_sub_n',`
126 ifdef(`HAVE_ABI_2_0w',