1 dnl ARM mpn_add_n
/mpn_sub_n optimised for A15.
3 dnl Copyright
2013 Free Software Foundation
, Inc.
5 dnl
This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
8 dnl it under the terms of
either:
10 dnl
* the GNU Lesser General
Public License as published by the Free
11 dnl Software Foundation
; either version 3 of the License, or (at your
12 dnl option
) any later version.
16 dnl
* the GNU General
Public License as published by the Free Software
17 dnl Foundation
; either version 2 of the License, or (at your option) any
20 dnl
or both
in parallel
, as here.
22 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
23 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
24 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
27 dnl You should have received copies of the GNU General
Public License
and the
28 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
29 dnl see
https://www.gnu.
org/licenses
/.
31 include(`..
/config.m4
')
39 C Cortex-A15 1.27 this
41 C This was a major improvement compared to the code we had before, but it might
42 C not be the best 8-way code possible. We've tried some permutations of auto
-
43 C increments
and separate pointer updates
, but they all ran at the same speed
46 C Architecture
requirements:
59 ifdef
(`OPERATION_add_n
', `
60 define(`ADDSUBC', adcs
)
62 define
(`SETCY
', `cmp $1, #1')
63 define
(`RETVAL
', `adc r0, n, #0')
64 define
(`RETVAL2
', `adc r0, n, #1')
65 define
(`func
', mpn_add_n)
66 define(`func_nc', mpn_add_nc
)')
67 ifdef(`OPERATION_sub_n', `
68 define
(`ADDSUBC
', sbcs)
70 define(`SETCY', `rsbs
$1, $1, #
0')
71 define(`RETVAL', `sbc r0
, r0
, r0
73 define(`RETVAL2', `RETVAL
')
74 define(`func', mpn_sub_n
)
75 define
(`func_nc
', mpn_sub_nc)')
77 MULFUNC_PROLOGUE
(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc
)
86 L
(ent
): push { r4-r9 }
95 L
(b11
): ldr r5
, [up
], #
4
100 ldrd r6
, r7
, [vp
, #
0]
104 L
(b00
): ldrd r4
, r5
, [up
], #
-8
105 ldrd r6
, r7
, [vp
], #
-8
110 L
(b01
): ldr r5
, [up
], #
-4
117 L
(gt1
): ldrd r4
, r5
, [up
, #
8]
118 ldrd r6
, r7
, [vp
, #
8]
121 L
(b10
): ldrd r4
, r5
, [up
]
128 L
(top
): ldrd r4
, r5
, [up
, #
8]
129 ldrd r6
, r7
, [vp
, #
8]
130 strd r8
, r9
, [rp
, #
8]
131 L
(mid
): ADDSUBC r8
, r4
, r6
133 ldrd r4
, r5
, [up
, #
16]
134 ldrd r6
, r7
, [vp
, #
16]
135 strd r8
, r9
, [rp
, #
16]
141 ldrd r4
, r5
, [up
, #
24]
142 ldrd r6
, r7
, [vp
, #
24]
143 strd r8
, r9
, [rp
, #
24]
146 ldrd r4
, r5
, [up
, #
32]!
147 ldrd r6
, r7
, [vp
, #
32]!
148 strd r8
, r9
, [rp
, #
32]!
149 L
(lo
): ADDSUBC r8
, r4
, r6
154 L
(end): strd r8
, r9
, [rp
, #
8]
158 L
(dne
): strd r8
, r9
, [rp
, #
24]