beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / alpha / ev6 / aorslsh1_n.asm
blobcb966ce0213bf65895227d62a8792e991cc91ce3
1 dnl Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
3 dnl Copyright 2003, 2013 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C EV4: ?
35 C EV5: 7
36 C EV6: 4
38 C TODO
39 C * Tune to reach 3.75 c/l on ev6.
41 define(`rp',`r16')
42 define(`up',`r17')
43 define(`vp',`r18')
44 define(`n', `r19')
46 define(`u0', `r8')
47 define(`u1', `r1')
48 define(`v0', `r4')
49 define(`v1', `r5')
51 define(`cy0', `r0')
52 define(`cy1', `r20')
53 define(`cy', `r22')
54 define(`rr', `r24')
55 define(`ps', `r25')
56 define(`sl', `r28')
58 ifdef(`OPERATION_addlsh1_n',`
59 define(ADDSUB, addq)
60 define(CARRY, `cmpult $1,$2,$3')
61 define(func, mpn_addlsh1_n)
63 ifdef(`OPERATION_sublsh1_n',`
64 define(ADDSUB, subq)
65 define(CARRY, `cmpult $2,$1,$3')
66 define(func, mpn_sublsh1_n)
69 MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
71 ASM_START()
72 PROLOGUE(func)
73 and n, 2, cy0
74 blbs n, L(bx1)
75 L(bx0): ldq v1, 0(vp)
76 ldq u1, 0(up)
77 lda r2, 0(r31)
78 bne cy0, L(b10)
80 L(b00): lda vp, 48(vp)
81 lda up, -16(up)
82 lda rp, -8(rp)
83 lda cy0, 0(r31)
84 br r31, L(lo0)
86 L(b10): lda vp, 32(vp)
87 lda rp, 8(rp)
88 lda cy0, 0(r31)
89 br r31, L(lo2)
91 L(bx1): ldq v0, 0(vp)
92 ldq u0, 0(up)
93 lda r3, 0(r31)
94 beq cy0, L(b01)
96 L(b11): lda vp, 40(vp)
97 lda up, -24(up)
98 lda rp, 16(rp)
99 lda cy1, 0(r31)
100 br r31, L(lo3)
102 L(b01): lda n, -4(n)
103 lda cy1, 0(r31)
104 ble n, L(end)
105 lda vp, 24(vp)
106 lda up, -8(up)
108 ALIGN(16)
109 L(top): addq v0, v0, r6
110 ldq v1, -16(vp)
111 addq r6, r3, sl C combined vlimb
112 ldq u1, 16(up)
113 ADDSUB u0, sl, ps C ulimb + (vlimb << 1)
114 cmplt v0, r31, r2 C high v bits
115 ADDSUB ps, cy1, rr C consume carry from previous operation
116 CARRY( ps, u0, cy0) C carry out #2
117 stq rr, 0(rp)
118 CARRY( rr, ps, cy) C carry out #3
119 lda vp, 32(vp) C bookkeeping
120 addq cy, cy0, cy0 C final carry out
121 L(lo0): addq v1, v1, r7
122 ldq v0, -40(vp)
123 addq r7, r2, sl
124 ldq u0, 24(up)
125 ADDSUB u1, sl, ps
126 cmplt v1, r31, r3
127 ADDSUB ps, cy0, rr
128 CARRY( ps, u1, cy1)
129 stq rr, 8(rp)
130 CARRY( rr, ps, cy)
131 lda rp, 32(rp) C bookkeeping
132 addq cy, cy1, cy1
133 L(lo3): addq v0, v0, r6
134 ldq v1, -32(vp)
135 addq r6, r3, sl
136 ldq u1, 32(up)
137 ADDSUB u0, sl, ps
138 cmplt v0, r31, r2
139 ADDSUB ps, cy1, rr
140 CARRY( ps, u0, cy0)
141 stq rr, -16(rp)
142 CARRY( rr, ps, cy)
143 lda up, 32(up) C bookkeeping
144 addq cy, cy0, cy0
145 L(lo2): addq v1, v1, r7
146 ldq v0, -24(vp)
147 addq r7, r2, sl
148 ldq u0, 8(up)
149 ADDSUB u1, sl, ps
150 cmplt v1, r31, r3
151 ADDSUB ps, cy0, rr
152 CARRY( ps, u1, cy1)
153 stq rr, -8(rp)
154 CARRY( rr, ps, cy)
155 lda n, -4(n) C bookkeeping
156 addq cy, cy1, cy1
157 bgt n, L(top)
159 L(end): addq v0, v0, r6
160 addq r6, r3, sl
161 ADDSUB u0, sl, ps
162 cmplt v0, r31, r2
163 ADDSUB ps, cy1, rr
164 CARRY( ps, u0, cy0)
165 stq rr, 0(rp)
166 CARRY( rr, ps, cy)
167 addq cy, cy0, cy0
168 addq cy0, r2, r0
170 ret r31,(r26),1
171 EPILOGUE()
172 ASM_END()