beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / arm / v7a / cora15 / cnd_aors_n.asm
blobb9e5cd3f7934af0f7743dda7f7e7b95451e96899
1 dnl ARM mpn_cnd_add_n/mpn_cnd_sub_n optimised for A15.
3 dnl Copyright 2013 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb best
34 C StrongARM: -
35 C XScale ?
36 C Cortex-A7 ?
37 C Cortex-A8 ?
38 C Cortex-A9 3.75 3
39 C Cortex-A15 1.78 this
41 C This code does not run as well as one could have hoped, since 1.5 c/l seems
42 C realistic for this insn mix.
44 C Architecture requirements:
45 C v5 -
46 C v5t -
47 C v5te ldrd strd
48 C v6 -
49 C v6t2 -
50 C v7a -
52 define(`cnd',`r0')
53 define(`rp', `r1')
54 define(`up', `r2')
55 define(`vp', `r3')
56 define(`n', `r12')
58 ifdef(`OPERATION_cnd_add_n', `
59 define(`ADDSUB', adds)
60 define(`ADDSUBC', adcs)
61 define(`IFADD', `$1')
62 define(`INITCY', `cmn r0, #0')
63 define(`RETVAL', `adc r0, n, #0')
64 define(`RETVAL2', `adc r0, n, #1')
65 define(`func', mpn_cnd_add_n)
66 define(`func_nc', mpn_add_nc)')
67 ifdef(`OPERATION_cnd_sub_n', `
68 define(`ADDSUB', subs)
69 define(`ADDSUBC', sbcs)
70 define(`IFADD', `')
71 define(`INITCY', `cmp r0, #0')
72 define(`RETVAL', `sbc r0, r0, r0
73 and r0, r0, #1')
74 define(`RETVAL2', `RETVAL')
75 define(`func', mpn_cnd_sub_n)
76 define(`func_nc', mpn_sub_nc)')
78 MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
80 ASM_START()
81 PROLOGUE(func)
82 ldr n, [sp]
83 push { r4-r9 }
85 cmp cnd, #1
86 sbc cnd, cnd, cnd C conditionally set to 0xffffffff
88 ands r6, n, #3
89 mov n, n, lsr #2
90 beq L(b00)
91 cmp r6, #2
92 bcc L(b01)
93 beq L(b10)
95 L(b11): ldr r5, [up], #4
96 ldr r7, [vp], #4
97 bic r7, r7, cnd
98 ADDSUB r9, r5, r7
99 ldrd r4, r5, [up, #0]
100 ldrd r6, r7, [vp, #0]
101 bic r6, r6, cnd
102 bic r7, r7, cnd
103 str r9, [rp], #-4
104 b L(lo)
106 L(b00): ldrd r4, r5, [up], #-8
107 ldrd r6, r7, [vp], #-8
108 bic r6, r6, cnd
109 bic r7, r7, cnd
110 INITCY
111 sub rp, rp, #16
112 b L(mid)
114 L(b01): ldr r5, [up], #-4
115 ldr r7, [vp], #-4
116 bic r7, r7, cnd
117 ADDSUB r9, r5, r7
118 str r9, [rp], #-12
119 tst n, n
120 beq L(wd1)
121 L(gt1): ldrd r4, r5, [up, #8]
122 ldrd r6, r7, [vp, #8]
123 bic r6, r6, cnd
124 bic r7, r7, cnd
125 b L(mid)
127 L(b10): ldrd r4, r5, [up]
128 ldrd r6, r7, [vp]
129 bic r6, r6, cnd
130 bic r7, r7, cnd
131 INITCY
132 sub rp, rp, #8
133 b L(lo)
135 ALIGN(16)
136 L(top): ldrd r6, r7, [vp, #8]
137 ldrd r4, r5, [up, #8]
138 bic r6, r6, cnd
139 bic r7, r7, cnd
140 strd r8, r9, [rp, #8]
141 L(mid): ADDSUBC r8, r4, r6
142 ADDSUBC r9, r5, r7
143 ldrd r6, r7, [vp, #16]!
144 ldrd r4, r5, [up, #16]!
145 bic r6, r6, cnd
146 bic r7, r7, cnd
147 sub n, n, #1
148 strd r8, r9, [rp, #16]!
149 L(lo): ADDSUBC r8, r4, r6
150 ADDSUBC r9, r5, r7
151 tst n, n
152 bne L(top)
154 L(end): strd r8, r9, [rp, #8]
155 L(wd1): RETVAL
156 pop { r4-r9 }
157 bx r14
158 EPILOGUE()