beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / arm / aorslsh1_n.asm
blob1cbd4ba1af4667f8fdf03f78c85776c3961f1622
1 dnl ARM mpn_addlsh1_n and mpn_sublsh1_n
3 dnl Contributed to the GNU project by Torbjörn Granlund.
5 dnl Copyright 2012 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C addlsh1_n sublsh1_n
36 C cycles/limb cycles/limb
37 C StrongARM ? ?
38 C XScale ? ?
39 C Cortex-A7 ? ?
40 C Cortex-A8 ? ?
41 C Cortex-A9 3.12 3.7
42 C Cortex-A15 ? ?
44 C TODO
45 C * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1.
46 C The sublsh1_n code could surely be tweaked, its REVCY slows down things
47 C very much. If two insns are really needed, it might help to separate them
48 C for better micro-parallelism.
50 define(`rp', `r0')
51 define(`up', `r1')
52 define(`vp', `r2')
53 define(`n', `r3')
55 ifdef(`OPERATION_addlsh1_n', `
56 define(`ADDSUB', adds)
57 define(`ADDSUBC', adcs)
58 define(`SETCY', `cmp $1, #1')
59 define(`RETVAL', `adc r0, $1, #2')
60 define(`SAVECY', `sbc $1, $2, #0')
61 define(`RESTCY', `cmn $1, #1')
62 define(`REVCY', `')
63 define(`INICYR', `mov $1, #0')
64 define(`r10r11', `r11')
65 define(`func', mpn_addlsh1_n)
66 define(`func_nc', mpn_addlsh1_nc)')
67 ifdef(`OPERATION_sublsh1_n', `
68 define(`ADDSUB', subs)
69 define(`ADDSUBC', sbcs)
70 define(`SETCY', `rsbs $1, $1, #0')
71 define(`RETVAL', `adc r0, $1, #1')
72 define(`SAVECY', `sbc $1, $1, $1')
73 define(`RESTCY', `cmn $1, #1')
74 define(`REVCY', `sbc $1, $1, $1
75 cmn $1, #1')
76 define(`INICYR', `mvn $1, #0')
77 define(`r10r11', `r10')
78 define(`func', mpn_sublsh1_n)
79 define(`func_nc', mpn_sublsh1_nc)')
81 MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
83 ASM_START()
84 PROLOGUE(func)
85 push {r4-r10r11, r14}
87 ifdef(`OPERATION_addlsh1_n', `
88 mvn r11, #0
90 INICYR( r14)
91 subs n, n, #3
92 blt L(le2) C carry clear on branch path
94 cmn r0, #0 C clear carry
95 ldmia vp!, {r8, r9, r10}
96 b L(mid)
98 L(top): RESTCY( r14)
99 ADDSUBC r4, r4, r8
100 ADDSUBC r5, r5, r9
101 ADDSUBC r6, r6, r10
102 ldmia vp!, {r8, r9, r10}
103 stmia rp!, {r4, r5, r6}
104 REVCY(r14)
105 adcs r8, r8, r8
106 adcs r9, r9, r9
107 adcs r10, r10, r10
108 ldmia up!, {r4, r5, r6}
109 SAVECY( r14, r11)
110 subs n, n, #3
111 blt L(exi)
112 RESTCY( r12)
113 ADDSUBC r4, r4, r8
114 ADDSUBC r5, r5, r9
115 ADDSUBC r6, r6, r10
116 ldmia vp!, {r8, r9, r10}
117 stmia rp!, {r4, r5, r6}
118 REVCY(r12)
119 L(mid): adcs r8, r8, r8
120 adcs r9, r9, r9
121 adcs r10, r10, r10
122 ldmia up!, {r4, r5, r6}
123 SAVECY( r12, r11)
124 subs n, n, #3
125 bge L(top)
127 mov r7, r12 C swap alternating...
128 mov r12, r14 C ...carry-save...
129 mov r14, r7 C ...registers
131 L(exi): RESTCY( r12)
132 ADDSUBC r4, r4, r8
133 ADDSUBC r5, r5, r9
134 ADDSUBC r6, r6, r10
135 stmia rp!, {r4, r5, r6}
137 REVCY(r12)
138 L(le2): tst n, #1 C n = {-1,-2,-3} map to [2], [1], [0]
139 beq L(e1)
141 L(e02): tst n, #2
142 beq L(rt0)
143 ldm vp, {r8, r9}
144 adcs r8, r8, r8
145 adcs r9, r9, r9
146 ldm up, {r4, r5}
147 SAVECY( r12, r11)
148 RESTCY( r14)
149 ADDSUBC r4, r4, r8
150 ADDSUBC r5, r5, r9
151 stm rp, {r4, r5}
152 b L(rt1)
154 L(e1): ldr r8, [vp]
155 adcs r8, r8, r8
156 ldr r4, [up]
157 SAVECY( r12, r11)
158 RESTCY( r14)
159 ADDSUBC r4, r4, r8
160 str r4, [rp]
162 L(rt1): mov r14, r12
163 REVCY(r12)
164 L(rt0): RETVAL( r14)
165 pop {r4-r10r11, r14}
166 bx r14
167 EPILOGUE()