1 dnl Alpha mpn_add_n
-- Add two limb vectors of the same
length > 0 and
2 dnl store sum
in a third limb vector.
4 dnl Copyright
1995, 1999, 2000, 2005, 2011 Free Software Foundation
, Inc.
6 dnl
This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
9 dnl it under the terms of
either:
11 dnl
* the GNU Lesser General
Public License as published by the Free
12 dnl Software Foundation
; either version 3 of the License, or (at your
13 dnl option
) any later version.
17 dnl
* the GNU General
Public License as published by the Free Software
18 dnl Foundation
; either version 2 of the License, or (at your option) any
21 dnl
or both
in parallel
, as here.
23 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
24 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
25 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
28 dnl You should have received copies of the GNU General
Public License
and the
29 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
30 dnl see
https://www.gnu.
org/licenses
/.
32 include(`..
/config.m4
')
51 bis r31,r31,r25 C clear cy
52 L(com): subq r19,4,r19 C decr loop cnt
53 blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
54 C Start software pipeline for 1st loop
59 addq r17,32,r17 C update s1_ptr
60 addq r0,r4,r28 C 1st main add
62 addq r25,r28,r20 C 1st carry add
64 cmpult r28,r4,r8 C compute cy from last add
66 cmpult r20,r28,r25 C compute cy from last add
68 bis r8,r25,r25 C combine cy from the two adds
69 subq r19,4,r19 C decr loop cnt
70 addq r1,r5,r28 C 2nd main add
71 addq r18,32,r18 C update s2_ptr
72 addq r28,r25,r21 C 2nd carry add
73 cmpult r28,r5,r8 C compute cy from last add
74 blt r19,$Lend1 C if less than 4 limbs remain, jump
75 C 1st loop handles groups of 4 limbs in a software pipeline
77 $Loop: cmpult r21,r28,r25 C compute cy from last add
79 bis r8,r25,r25 C combine cy from the two adds
81 addq r2,r6,r28 C 3rd main add
83 addq r28,r25,r22 C 3rd carry add
85 cmpult r28,r6,r8 C compute cy from last add
86 cmpult r22,r28,r25 C compute cy from last add
88 bis r8,r25,r25 C combine cy from the two adds
90 addq r3,r7,r28 C 4th main add
91 addq r28,r25,r23 C 4th carry add
92 cmpult r28,r7,r8 C compute cy from last add
93 cmpult r23,r28,r25 C compute cy from last add
94 addq r17,32,r17 C update s1_ptr
95 bis r8,r25,r25 C combine cy from the two adds
96 addq r16,32,r16 C update res_ptr
97 addq r0,r4,r28 C 1st main add
99 addq r25,r28,r20 C 1st carry add
101 cmpult r28,r4,r8 C compute cy from last add
103 cmpult r20,r28,r25 C compute cy from last add
105 bis r8,r25,r25 C combine cy from the two adds
106 subq r19,4,r19 C decr loop cnt
108 addq r1,r5,r28 C 2nd main add
110 addq r25,r28,r21 C 2nd carry add
111 addq r18,32,r18 C update s2_ptr
112 cmpult r28,r5,r8 C compute cy from last add
114 C Finish software pipeline for 1st loop
115 $Lend1: cmpult r21,r28,r25 C compute cy from last add
116 bis r8,r25,r25 C combine cy from the two adds
117 addq r2,r6,r28 C 3rd main add
118 addq r28,r25,r22 C 3rd carry add
119 cmpult r28,r6,r8 C compute cy from last add
120 cmpult r22,r28,r25 C compute cy from last add
122 bis r8,r25,r25 C combine cy from the two adds
124 addq r3,r7,r28 C 4th main add
125 addq r28,r25,r23 C 4th carry add
126 cmpult r28,r7,r8 C compute cy from last add
127 cmpult r23,r28,r25 C compute cy from last add
128 bis r8,r25,r25 C combine cy from the two adds
129 addq r16,32,r16 C update res_ptr
132 $Lend2: addq r19,4,r19 C restore loop cnt
134 C Start software pipeline for 2nd loop
139 C 2nd loop handles remaining 1-3 limbs
141 $Loop0: addq r0,r4,r28 C main add
143 cmpult r28,r4,r8 C compute cy from last add
145 addq r28,r25,r20 C carry add
149 cmpult r20,r28,r25 C compute cy from last add
150 subq r19,1,r19 C decr loop cnt
151 bis r8,r25,r25 C combine cy from the two adds
154 $Lend0: addq r0,r4,r28 C main add
155 addq r28,r25,r20 C carry add
156 cmpult r28,r4,r8 C compute cy from last add
157 cmpult r20,r28,r25 C compute cy from last add
159 bis r8,r25,r25 C combine cy from the two adds
161 $Lret: bis r25,r31,r0 C return cy