1 dnl HP
-PA
2.0 64-bit mpn_addmul_1
-- Multiply a limb vector with a limb
and
2 dnl
add the result to a second limb vector.
4 dnl Copyright
1998-2000, 2002, 2003 Free Software Foundation
, Inc.
6 dnl
This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
9 dnl it under the terms of
either:
11 dnl
* the GNU Lesser General
Public License as published by the Free
12 dnl Software Foundation
; either version 3 of the License, or (at your
13 dnl option
) any later version.
17 dnl
* the GNU General
Public License as published by the Free Software
18 dnl Foundation
; either version 2 of the License, or (at your option) any
21 dnl
or both
in parallel
, as here.
23 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
24 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
25 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
28 dnl You should have received copies of the GNU General
Public License
and the
29 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
30 dnl see
https://www.gnu.
org/licenses
/.
32 include(`..
/config.m4
')
36 C 8500,8600,8700: 6.375
38 C The feed-in and wind-down code has not yet been scheduled. Many cycles
39 C could be saved there per call.
42 C The main loop "BIG" is 4-way unrolled, mainly to allow
43 C effective use of ADD,DC. Delays in moving data via the cache from the FP
44 C registers to the IU registers, have demanded a deep software pipeline, and
45 C a lot of stack slots for partial products in flight.
49 C do 0, 1, 2, or 3 limbs
50 C if done, restore-some-regs and return
56 C HP-PA stack grows upwards. We could allocate 8 fewer slots by using the
57 C slots marked FREE, as well as some slots in the caller's
"frame marker".
94 C
-38/-138 vlimb home slot. For
2.0N
, the vlimb arg will arrive here.
97 include(`..
/config.m4
')
100 define(`rp',`
%r26
') C
101 define(`up',`
%r25
') C
103 define(`vlimb',`
%r23
') C
105 define(`climb',`
%r23
') C
107 ifdef(`HAVE_ABI_2_0w',
111 PROLOGUE
(mpn_addmul_1
)
113 ifdef
(`HAVE_ABI_2_0w
',
114 ` std vlimb, -0x38(%r30) C store vlimb into "home" slot
116 std,ma
%r3
, 0x100(%r30
)
119 ldo
0(%r0
), climb C clear climb
120 fldd
-0x138(%r30
), %fr8 C put vlimb
in fp register
122 define
(`p032a1
',`%r1') C
123 define
(`p032a2
',`%r19') C
125 define
(`m032
',`%r20') C
126 define
(`m096
',`%r21') C
128 define
(`p000a
',`%r22') C
129 define
(`p064a
',`%r29') C
131 define
(`s000
',`%r31') C
133 define
(`ma000
',`%r4') C
134 define
(`ma064
',`%r20') C
136 define
(`r000
',`%r3') C
138 extrd
,u n
, 63, 2, %r5
139 cmpb
,= %r5
, %r0
, L
(BIG
)
144 xmpyu
%fr8R
, %fr4L
, %fr22
145 xmpyu
%fr8L
, %fr4R
, %fr23
146 fstd
%fr22
, -0x78(%r30
) C mid product to
-0x78..
-0x71
147 xmpyu
%fr8R
, %fr4R
, %fr24
148 xmpyu
%fr8L
, %fr4L
, %fr25
149 fstd
%fr23
, -0x70(%r30
) C mid product to
-0x70..
-0x69
150 fstd
%fr24
, -0x80(%r30
) C
low product to
-0x80..
-0x79
151 addib
,<> -1, %r5
, L
(two_or_more
)
152 fstd
%fr25
, -0x68(%r30
) C
high product to
-0x68..
-0x61
154 ldd
-0x78(%r30
), p032a1
155 ldd
-0x70(%r30
), p032a2
156 ldd
-0x80(%r30
), p000a
158 ldd
-0x68(%r30
), p064a
163 xmpyu
%fr8R
, %fr4L
, %fr22
164 xmpyu
%fr8L
, %fr4R
, %fr23
165 ldd
-0x78(%r30
), p032a1
166 fstd
%fr22
, -0x78(%r30
) C mid product to
-0x78..
-0x71
167 xmpyu
%fr8R
, %fr4R
, %fr24
168 xmpyu
%fr8L
, %fr4L
, %fr25
169 ldd
-0x70(%r30
), p032a2
170 fstd
%fr23
, -0x70(%r30
) C mid product to
-0x70..
-0x69
171 ldd
-0x80(%r30
), p000a
172 fstd
%fr24
, -0x80(%r30
) C
low product to
-0x80..
-0x79
173 ldd
-0x68(%r30
), p064a
174 addib
,<> -1, %r5
, L
(three_or_more
)
175 fstd
%fr25
, -0x68(%r30
) C
high product to
-0x68..
-0x61
177 add p032a1
, p032a2
, m032
178 add,dc
%r0
, %r0
, m096
179 depd
,z m032
, 31, 32, ma000
180 extrd
,u m032
, 31, 32, ma064
183 depd m096
, 31, 32, ma064
187 add p032a1
, p032a2
, m032
188 add,dc
%r0
, %r0
, m096
189 depd
,z m032
, 31, 32, ma000
190 extrd
,u m032
, 31, 32, ma064
192 C addib
,= -1, %r5
, L
(0_out
)
193 depd m096
, 31, 32, ma064
195 C xmpyu
%fr8R
, %fr4L
, %fr22
196 C xmpyu
%fr8L
, %fr4R
, %fr23
197 C ldd
-0x78(%r30
), p032a1
198 C fstd
%fr22
, -0x78(%r30
) C mid product to
-0x78..
-0x71
200 C xmpyu
%fr8R
, %fr4R
, %fr24
201 C xmpyu
%fr8L
, %fr4L
, %fr25
202 C ldd
-0x70(%r30
), p032a2
203 C fstd
%fr23
, -0x70(%r30
) C mid product to
-0x70..
-0x69
206 C
add climb
, p000a
, s000
207 C ldd
-0x80(%r30
), p000a
208 C fstd
%fr24
, -0x80(%r30
) C
low product to
-0x80..
-0x79
210 C
add,dc p064a
, %r0
, climb
212 C ldd
-0x68(%r30
), p064a
213 C fstd
%fr25
, -0x68(%r30
) C
high product to
-0x68..
-0x61
215 C
add ma000
, s000
, s000
216 C
add,dc ma064
, climb
, climb
219 C
add r000
, s000
, s000
220 C
add,dc
%r0
, climb
, climb
223 C
add p032a1
, p032a2
, m032
224 C
add,dc
%r0
, %r0
, m096
226 C depd
,z m032
, 31, 32, ma000
227 C extrd
,u m032
, 31, 32, ma064
229 C addib
,<> -1, %r5
, L
(loop0
)
230 C depd m096
, 31, 32, ma064
233 xmpyu
%fr8R
, %fr4L
, %fr22
234 xmpyu
%fr8L
, %fr4R
, %fr23
235 ldd
-0x78(%r30
), p032a1
236 fstd
%fr22
, -0x78(%r30
) C mid product to
-0x78..
-0x71
237 xmpyu
%fr8R
, %fr4R
, %fr24
238 xmpyu
%fr8L
, %fr4L
, %fr25
239 ldd
-0x70(%r30
), p032a2
240 fstd
%fr23
, -0x70(%r30
) C mid product to
-0x70..
-0x69
242 add climb
, p000a
, s000
243 ldd
-0x80(%r30
), p000a
244 fstd
%fr24
, -0x80(%r30
) C
low product to
-0x80..
-0x79
245 add,dc p064a
, %r0
, climb
246 ldd
-0x68(%r30
), p064a
247 fstd
%fr25
, -0x68(%r30
) C
high product to
-0x68..
-0x61
248 add ma000
, s000
, s000
249 add,dc ma064
, climb
, climb
251 add,dc
%r0
, climb
, climb
253 add p032a1
, p032a2
, m032
254 add,dc
%r0
, %r0
, m096
255 depd
,z m032
, 31, 32, ma000
256 extrd
,u m032
, 31, 32, ma064
258 depd m096
, 31, 32, ma064
260 ldd
-0x78(%r30
), p032a1
261 ldd
-0x70(%r30
), p032a2
263 add climb
, p000a
, s000
264 ldd
-0x80(%r30
), p000a
265 add,dc p064a
, %r0
, climb
266 ldd
-0x68(%r30
), p064a
267 add ma000
, s000
, s000
268 add,dc ma064
, climb
, climb
270 add,dc
%r0
, climb
, climb
273 add p032a1
, p032a2
, m032
274 add,dc
%r0
, %r0
, m096
275 depd
,z m032
, 31, 32, ma000
276 extrd
,u m032
, 31, 32, ma064
278 depd m096
, 31, 32, ma064
280 add climb
, p000a
, s000
281 add,dc p064a
, %r0
, climb
282 add ma000
, s000
, s000
283 add,dc ma064
, climb
, climb
285 add,dc
%r0
, climb
, climb
288 cmpib
,>= 4, n
, L
(done
)
291 C
4-way unrolled code.
295 define
(`p032a1
',`%r1') C
296 define
(`p032a2
',`%r19') C
297 define
(`p096b1
',`%r20') C
298 define
(`p096b2
',`%r21') C
299 define
(`p160c1
',`%r22') C
300 define
(`p160c2
',`%r29') C
301 define
(`p224d1
',`%r31') C
302 define
(`p224d2
',`%r3') C
304 define
(`m032
',`%r4') C
305 define
(`m096
',`%r5') C
306 define
(`m160
',`%r6') C
307 define
(`m224
',`%r7') C
308 define
(`m288
',`%r8') C
310 define
(`p000a
',`%r1') C
311 define
(`p064a
',`%r19') C
312 define
(`p064b
',`%r20') C
313 define
(`p128b
',`%r21') C
314 define
(`p128c
',`%r22') C
315 define
(`p192c
',`%r29') C
316 define
(`p192d
',`%r31') C
317 define
(`p256d
',`%r3') C
319 define
(`s000
',`%r10') C
320 define
(`s064
',`%r11') C
321 define
(`s128
',`%r12') C
322 define
(`s192
',`%r13') C
324 define
(`ma000
',`%r9') C
325 define
(`ma064
',`%r4') C
326 define
(`ma128
',`%r5') C
327 define
(`ma192
',`%r6') C
328 define
(`ma256
',`%r7') C
330 define
(`r000
',`%r1') C
331 define
(`r064
',`%r19') C
332 define
(`r128
',`%r20') C
333 define
(`r192
',`%r21') C
339 std %r10
, -0xc8(%r30
)
340 std %r11
, -0xc0(%r30
)
341 std %r12
, -0xb8(%r30
)
342 std %r13
, -0xb0(%r30
)
344 ifdef
(`HAVE_ABI_2_0w
',
345 ` extrd,u n, 61, 62, n C right shift 2
346 ',` extrd
,u n
, 61, 30, n C right shift
2, zero extend
354 xmpyu %fr8R, %fr4L, %fr22
355 xmpyu %fr8L, %fr4R, %fr23
356 xmpyu %fr8R, %fr5L, %fr24
357 xmpyu %fr8L, %fr5R, %fr25
358 xmpyu %fr8R, %fr6L, %fr26
359 xmpyu %fr8L, %fr6R, %fr27
360 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
361 xmpyu %fr8R, %fr7L, %fr28
362 xmpyu %fr8L, %fr7R, %fr29
363 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
364 xmpyu %fr8R, %fr4R, %fr30
365 xmpyu %fr8L, %fr4L, %fr31
366 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
367 xmpyu %fr8R, %fr5R, %fr22
368 xmpyu %fr8L, %fr5L, %fr23
369 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
370 xmpyu %fr8R, %fr6R, %fr24
371 xmpyu %fr8L, %fr6L, %fr25
372 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
373 xmpyu %fr8R, %fr7R, %fr26
374 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
375 addib,<> -1, n, L(8_or_more)
376 xmpyu %fr8L, %fr7L, %fr27
377 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
378 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
379 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
380 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
381 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
382 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
383 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
384 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
385 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
386 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
387 ldd -0x78(%r30), p032a1
388 ldd -0x70(%r30), p032a2
389 ldd -0x38(%r30), p096b1
390 ldd -0x30(%r30), p096b2
391 ldd -0x58(%r30), p160c1
392 ldd -0x50(%r30), p160c2
393 ldd -0x18(%r30), p224d1
394 ldd -0x10(%r30), p224d2
399 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
400 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
402 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
403 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
404 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
405 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
406 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
407 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
408 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
409 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
414 xmpyu %fr8R, %fr4L, %fr22
415 ldd -0x78(%r30), p032a1
416 xmpyu %fr8L, %fr4R, %fr23
417 xmpyu %fr8R, %fr5L, %fr24
418 ldd -0x70(%r30), p032a2
419 xmpyu %fr8L, %fr5R, %fr25
420 xmpyu %fr8R, %fr6L, %fr26
421 ldd -0x38(%r30), p096b1
422 xmpyu %fr8L, %fr6R, %fr27
423 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
424 xmpyu %fr8R, %fr7L, %fr28
425 ldd -0x30(%r30), p096b2
426 xmpyu %fr8L, %fr7R, %fr29
427 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
428 xmpyu %fr8R, %fr4R, %fr30
429 ldd -0x58(%r30), p160c1
430 xmpyu %fr8L, %fr4L, %fr31
431 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
432 xmpyu %fr8R, %fr5R, %fr22
433 ldd -0x50(%r30), p160c2
434 xmpyu %fr8L, %fr5L, %fr23
435 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
436 xmpyu %fr8R, %fr6R, %fr24
437 ldd -0x18(%r30), p224d1
438 xmpyu %fr8L, %fr6L, %fr25
439 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
440 xmpyu %fr8R, %fr7R, %fr26
441 ldd -0x10(%r30), p224d2
442 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
443 addib,= -1, n, L(end2)
444 xmpyu %fr8L, %fr7L, %fr27
446 add p032a1, p032a2, m032
447 ldd -0x80(%r30), p000a
448 add,dc p096b1, p096b2, m096
449 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
451 add,dc p160c1, p160c2, m160
452 ldd -0x68(%r30), p064a
453 add,dc p224d1, p224d2, m224
454 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
456 add,dc %r0, %r0, m288
457 ldd -0x40(%r30), p064b
459 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
461 depd,z m032, 31, 32, ma000
462 ldd -0x28(%r30), p128b
463 extrd,u m032, 31, 32, ma064
464 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
466 depd m096, 31, 32, ma064
467 ldd -0x60(%r30), p128c
468 extrd,u m096, 31, 32, ma128
469 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
471 depd m160, 31, 32, ma128
472 ldd -0x48(%r30), p192c
473 extrd,u m160, 31, 32, ma192
474 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
476 depd m224, 31, 32, ma192
477 ldd -0x20(%r30), p192d
478 extrd,u m224, 31, 32, ma256
479 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
481 depd m288, 31, 32, ma256
482 ldd -0x88(%r30), p256d
483 add climb, p000a, s000
484 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
486 add,dc p064a, p064b, s064
488 add,dc p128b, p128c, s128
489 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
491 add,dc p192c, p192d, s192
493 add,dc p256d, %r0, climb
494 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
497 add ma000, s000, s000 C accum mid 0
499 add,dc ma064, s064, s064 C accum mid 1
501 add,dc ma128, s128, s128 C accum mid 2
503 add,dc ma192, s192, s192 C accum mid 3
506 add,dc ma256, climb, climb
508 add r000, s000, s000 C accum rlimb 0
511 add,dc r064, s064, s064 C accum rlimb 1
512 add,dc r128, s128, s128 C accum rlimb 2
515 add,dc r192, s192, s192 C accum rlimb 3
516 add,dc %r0, climb, climb
519 xmpyu %fr8R, %fr4L, %fr22
520 ldd -0x78(%r30), p032a1
521 xmpyu %fr8L, %fr4R, %fr23
524 xmpyu %fr8R, %fr5L, %fr24
525 ldd -0x70(%r30), p032a2
526 xmpyu %fr8L, %fr5R, %fr25
529 xmpyu %fr8R, %fr6L, %fr26
530 ldd -0x38(%r30), p096b1
531 xmpyu %fr8L, %fr6R, %fr27
532 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71
534 xmpyu %fr8R, %fr7L, %fr28
535 ldd -0x30(%r30), p096b2
536 xmpyu %fr8L, %fr7R, %fr29
537 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69
539 xmpyu %fr8R, %fr4R, %fr30
540 ldd -0x58(%r30), p160c1
541 xmpyu %fr8L, %fr4L, %fr31
542 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31
544 xmpyu %fr8R, %fr5R, %fr22
545 ldd -0x50(%r30), p160c2
546 xmpyu %fr8L, %fr5L, %fr23
547 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29
549 xmpyu %fr8R, %fr6R, %fr24
550 ldd -0x18(%r30), p224d1
551 xmpyu %fr8L, %fr6L, %fr25
552 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51
554 xmpyu %fr8R, %fr7R, %fr26
555 ldd -0x10(%r30), p224d2
556 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49
557 xmpyu %fr8L, %fr7L, %fr27
559 addib,<> -1, n, L(loop)
563 add p032a1, p032a2, m032
564 ldd -0x80(%r30), p000a
565 add,dc p096b1, p096b2, m096
566 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11
567 add,dc p160c1, p160c2, m160
568 ldd -0x68(%r30), p064a
569 add,dc p224d1, p224d2, m224
570 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09
571 add,dc %r0, %r0, m288
572 ldd -0x40(%r30), p064b
573 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79
574 depd,z m032, 31, 32, ma000
575 ldd -0x28(%r30), p128b
576 extrd,u m032, 31, 32, ma064
577 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61
578 depd m096, 31, 32, ma064
579 ldd -0x60(%r30), p128c
580 extrd,u m096, 31, 32, ma128
581 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39
582 depd m160, 31, 32, ma128
583 ldd -0x48(%r30), p192c
584 extrd,u m160, 31, 32, ma192
585 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21
586 depd m224, 31, 32, ma192
587 ldd -0x20(%r30), p192d
588 extrd,u m224, 31, 32, ma256
589 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59
590 depd m288, 31, 32, ma256
591 ldd -0x88(%r30), p256d
592 add climb, p000a, s000
593 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41
594 add,dc p064a, p064b, s064
596 add,dc p128b, p128c, s128
597 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19
598 add,dc p192c, p192d, s192
600 add,dc p256d, %r0, climb
601 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81
603 add ma000, s000, s000 C accum mid 0
605 add,dc ma064, s064, s064 C accum mid 1
606 add,dc ma128, s128, s128 C accum mid 2
607 add,dc ma192, s192, s192 C accum mid 3
608 add,dc ma256, climb, climb
609 add r000, s000, s000 C accum rlimb 0
610 add,dc r064, s064, s064 C accum rlimb 1
611 add,dc r128, s128, s128 C accum rlimb 2
613 add,dc r192, s192, s192 C accum rlimb 3
614 add,dc %r0, climb, climb
616 ldd -0x78(%r30), p032a1
618 ldd -0x70(%r30), p032a2
620 ldd -0x38(%r30), p096b1
621 ldd -0x30(%r30), p096b2
622 ldd -0x58(%r30), p160c1
623 ldd -0x50(%r30), p160c2
624 ldd -0x18(%r30), p224d1
625 ldd -0x10(%r30), p224d2
629 add p032a1, p032a2, m032
630 ldd -0x80(%r30), p000a
631 add,dc p096b1, p096b2, m096
632 add,dc p160c1, p160c2, m160
633 ldd -0x68(%r30), p064a
634 add,dc p224d1, p224d2, m224
635 add,dc %r0, %r0, m288
636 ldd -0x40(%r30), p064b
637 depd,z m032, 31, 32, ma000
638 ldd -0x28(%r30), p128b
639 extrd,u m032, 31, 32, ma064
640 depd m096, 31, 32, ma064
641 ldd -0x60(%r30), p128c
642 extrd,u m096, 31, 32, ma128
643 depd m160, 31, 32, ma128
644 ldd -0x48(%r30), p192c
645 extrd,u m160, 31, 32, ma192
646 depd m224, 31, 32, ma192
647 ldd -0x20(%r30), p192d
648 extrd,u m224, 31, 32, ma256
649 depd m288, 31, 32, ma256
650 ldd -0x88(%r30), p256d
651 add climb, p000a, s000
652 add,dc p064a, p064b, s064
654 add,dc p128b, p128c, s128
655 add,dc p192c, p192d, s192
657 add,dc p256d, %r0, climb
659 add ma000, s000, s000 C accum mid 0
661 add,dc ma064, s064, s064 C accum mid 1
662 add,dc ma128, s128, s128 C accum mid 2
663 add,dc ma192, s192, s192 C accum mid 3
664 add,dc ma256, climb, climb
665 add r000, s000, s000 C accum rlimb 0
666 add,dc r064, s064, s064 C accum rlimb 1
667 add,dc r128, s128, s128 C accum rlimb 2
669 add,dc r192, s192, s192 C accum rlimb 3
670 add,dc %r0, climb, climb
675 ldd -0xb0(%r30), %r13
676 ldd -0xb8(%r30), %r12
677 ldd -0xc0(%r30), %r11
678 ldd -0xc8(%r30), %r10
684 ifdef(`HAVE_ABI_2_0w',
686 ',` extrd,u climb, 63, 32, %r29
687 extrd,u climb, 31, 32, %r28
692 ldd
,mb
-0x100(%r30
), %r3
693 EPILOGUE
(mpn_addmul_1
)