beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / alpha / sub_n.asm
blob1bb72263f855c04310ba1dbac70aa4c9270dfd7c
1 dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0
2 dnl and store difference in a third limb vector.
4 dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
32 include(`../config.m4')
34 C cycles/limb
35 C EV4: ?
36 C EV5: 4.75
37 C EV6: 3
39 dnl INPUT PARAMETERS
40 dnl res_ptr r16
41 dnl s1_ptr r17
42 dnl s2_ptr r18
43 dnl size r19
45 ASM_START()
46 PROLOGUE(mpn_sub_nc)
47 bis r31,r20,r25
48 br L(com)
49 EPILOGUE()
50 PROLOGUE(mpn_sub_n)
51 bis r31,r31,r25 C clear cy
52 L(com): subq r19,4,r19 C decr loop cnt
53 blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
54 C Start software pipeline for 1st loop
55 ldq r0,0(r18)
56 ldq r4,0(r17)
57 ldq r1,8(r18)
58 ldq r5,8(r17)
59 addq r17,32,r17 C update s1_ptr
60 subq r4,r0,r28 C 1st main subtract
61 ldq r2,16(r18)
62 subq r28,r25,r20 C 1st carry subtract
63 ldq r3,24(r18)
64 cmpult r4,r0,r8 C compute cy from last subtract
65 ldq r6,-16(r17)
66 cmpult r28,r25,r25 C compute cy from last subtract
67 ldq r7,-8(r17)
68 bis r8,r25,r25 C combine cy from the two subtracts
69 subq r19,4,r19 C decr loop cnt
70 subq r5,r1,r28 C 2nd main subtract
71 addq r18,32,r18 C update s2_ptr
72 subq r28,r25,r21 C 2nd carry subtract
73 cmpult r5,r1,r8 C compute cy from last subtract
74 blt r19,$Lend1 C if less than 4 limbs remain, jump
75 C 1st loop handles groups of 4 limbs in a software pipeline
76 ALIGN(16)
77 $Loop: cmpult r28,r25,r25 C compute cy from last subtract
78 ldq r0,0(r18)
79 bis r8,r25,r25 C combine cy from the two subtracts
80 ldq r1,8(r18)
81 subq r6,r2,r28 C 3rd main subtract
82 ldq r4,0(r17)
83 subq r28,r25,r22 C 3rd carry subtract
84 ldq r5,8(r17)
85 cmpult r6,r2,r8 C compute cy from last subtract
86 cmpult r28,r25,r25 C compute cy from last subtract
87 stq r20,0(r16)
88 bis r8,r25,r25 C combine cy from the two subtracts
89 stq r21,8(r16)
90 subq r7,r3,r28 C 4th main subtract
91 subq r28,r25,r23 C 4th carry subtract
92 cmpult r7,r3,r8 C compute cy from last subtract
93 cmpult r28,r25,r25 C compute cy from last subtract
94 addq r17,32,r17 C update s1_ptr
95 bis r8,r25,r25 C combine cy from the two subtracts
96 addq r16,32,r16 C update res_ptr
97 subq r4,r0,r28 C 1st main subtract
98 ldq r2,16(r18)
99 subq r28,r25,r20 C 1st carry subtract
100 ldq r3,24(r18)
101 cmpult r4,r0,r8 C compute cy from last subtract
102 ldq r6,-16(r17)
103 cmpult r28,r25,r25 C compute cy from last subtract
104 ldq r7,-8(r17)
105 bis r8,r25,r25 C combine cy from the two subtracts
106 subq r19,4,r19 C decr loop cnt
107 stq r22,-16(r16)
108 subq r5,r1,r28 C 2nd main subtract
109 stq r23,-8(r16)
110 subq r28,r25,r21 C 2nd carry subtract
111 addq r18,32,r18 C update s2_ptr
112 cmpult r5,r1,r8 C compute cy from last subtract
113 bge r19,$Loop
114 C Finish software pipeline for 1st loop
115 $Lend1: cmpult r28,r25,r25 C compute cy from last subtract
116 bis r8,r25,r25 C combine cy from the two subtracts
117 subq r6,r2,r28 C cy add
118 subq r28,r25,r22 C 3rd main subtract
119 cmpult r6,r2,r8 C compute cy from last subtract
120 cmpult r28,r25,r25 C compute cy from last subtract
121 stq r20,0(r16)
122 bis r8,r25,r25 C combine cy from the two subtracts
123 stq r21,8(r16)
124 subq r7,r3,r28 C cy add
125 subq r28,r25,r23 C 4th main subtract
126 cmpult r7,r3,r8 C compute cy from last subtract
127 cmpult r28,r25,r25 C compute cy from last subtract
128 bis r8,r25,r25 C combine cy from the two subtracts
129 addq r16,32,r16 C update res_ptr
130 stq r22,-16(r16)
131 stq r23,-8(r16)
132 $Lend2: addq r19,4,r19 C restore loop cnt
133 beq r19,$Lret
134 C Start software pipeline for 2nd loop
135 ldq r0,0(r18)
136 ldq r4,0(r17)
137 subq r19,1,r19
138 beq r19,$Lend0
139 C 2nd loop handles remaining 1-3 limbs
140 ALIGN(16)
141 $Loop0: subq r4,r0,r28 C main subtract
142 cmpult r4,r0,r8 C compute cy from last subtract
143 ldq r0,8(r18)
144 ldq r4,8(r17)
145 subq r28,r25,r20 C carry subtract
146 addq r18,8,r18
147 addq r17,8,r17
148 stq r20,0(r16)
149 cmpult r28,r25,r25 C compute cy from last subtract
150 subq r19,1,r19 C decr loop cnt
151 bis r8,r25,r25 C combine cy from the two subtracts
152 addq r16,8,r16
153 bne r19,$Loop0
154 $Lend0: subq r4,r0,r28 C main subtract
155 subq r28,r25,r20 C carry subtract
156 cmpult r4,r0,r8 C compute cy from last subtract
157 cmpult r28,r25,r25 C compute cy from last subtract
158 stq r20,0(r16)
159 bis r8,r25,r25 C combine cy from the two subtracts
161 $Lret: bis r25,r31,r0 C return cy
162 ret r31,(r26),1
163 EPILOGUE()
164 ASM_END()