1 dnl PowerPC
-64 mpn_mul_basecase.
3 dnl Copyright
1999-2001, 2003-2006, 2008 Free Software Foundation
, Inc.
5 dnl
This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
8 dnl it under the terms of
either:
10 dnl
* the GNU Lesser General
Public License as published by the Free
11 dnl Software Foundation
; either version 3 of the License, or (at your
12 dnl option
) any later version.
16 dnl
* the GNU General
Public License as published by the Free Software
17 dnl Foundation
; either version 2 of the License, or (at your option) any
20 dnl
or both
in parallel
, as here.
22 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
23 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
24 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
27 dnl You should have received copies of the GNU General
Public License
and the
28 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
29 dnl see
https://www.gnu.
org/licenses
/.
31 include(`..
/config.m4
')
47 define(`outer_rp', `r22
')
48 define(`outer_up', `r23
')
51 PROLOGUE(mpn_mul_basecase)
53 C Special code for un <= 2, for efficiency of these important cases,
54 C and since it simplifies the default code.
60 mulld r8, r5, r7 C weight 0
61 mulhdu r9, r5, r7 C weight 1
68 mulld r8, r0, r7 C weight 1
69 mulhdu r10, r0, r7 C weight 2
79 mulld r8, r5, r6 C weight 1
80 mulhdu r11, r5, r6 C weight 2
84 mulld r12, r0, r6 C weight 2
85 mulhdu r0, r0, r6 C weight 3
108 ld v0, 0(vp) C new v limb
112 rldicl. r0, un, 0,62 C r0 = n & 3, set cr0
114 addi un, un, 1 C compute count...
115 srdi un, un, 2 C ...for ctr
116 mtctr un C copy inner loop count into ctr
123 L(b3): mulld r0, r26, v0
180 mtctr un C copy inner loop count into ctr
183 addi outer_rp, outer_rp, 8
184 ld v0, 0(vp) C new v limb
196 ALIGN(16) C registers dying
199 mulhdu r10, r26, v0 C 26
203 mulhdu r8, r27, v0 C 27
206 adde r0, r0, r12 C 0 12
207 adde r24, r24, r10 C 24 10
209 mulhdu r10, r26, v0 C 26
213 mulhdu r12, r27, v0 C 27
216 adde r9, r9, r8 C 8 9
217 adde r11, r11, r10 C 10 11
219 addc r0, r0, r28 C 0 28
221 adde r24, r24, r29 C 7 29
223 adde r9, r9, r30 C 9 30
225 adde r11, r11, r31 C 11 31
322 mtctr un C copy inner loop count into ctr
323 addi rp, outer_rp, 16
325 addi outer_rp, outer_rp, 8
326 ld v0, 0(vp) C new v limb
348 ALIGN(16) C registers dying
351 mulhdu r10, r26, v0 C 26
355 mulhdu r8, r27, v0 C 27
358 adde r0, r0, r12 C 0 12
359 adde r24, r24, r10 C 24 10
361 mulhdu r10, r26, v0 C 26
365 mulhdu r12, r27, v0 C 27
368 adde r9, r9, r8 C 8 9
369 adde r11, r11, r10 C 10 11
371 addc r0, r0, r28 C 0 28
373 adde r24, r24, r29 C 7 29
375 adde r9, r9, r30 C 9 30
377 adde r11, r11, r31 C 11 31
479 mtctr un C copy inner loop count into ctr
480 addi rp, outer_rp, 24
481 addi up, outer_up, 16
482 addi outer_rp, outer_rp, 8
483 ld v0, 0(vp) C new v limb
510 ALIGN(16) C registers dying
513 mulhdu r10, r26, v0 C 26
517 mulhdu r8, r27, v0 C 27
520 adde r0, r0, r12 C 0 12
521 adde r24, r24, r10 C 24 10
523 mulhdu r10, r26, v0 C 26
527 mulhdu r12, r27, v0 C 27
530 adde r9, r9, r8 C 8 9
531 adde r11, r11, r10 C 10 11
533 addc r0, r0, r28 C 0 28
535 adde r24, r24, r29 C 7 29
537 adde r9, r9, r30 C 9 30
539 adde r11, r11, r31 C 11 31
627 mtctr un C copy inner loop count into ctr
629 addi up, outer_up, -8
630 addi outer_rp, outer_rp, 8
631 ld v0, 0(vp) C new v limb
638 ALIGN(16) C registers dying
641 mulhdu r10, r26, v0 C 26
645 mulhdu r8, r27, v0 C 27
648 adde r0, r0, r12 C 0 12
649 adde r24, r24, r10 C 24 10
651 mulhdu r10, r26, v0 C 26
655 mulhdu r12, r27, v0 C 27
658 adde r9, r9, r8 C 8 9
659 adde r11, r11, r10 C 10 11
661 addc r0, r0, r28 C 0 28
663 adde r24, r24, r29 C 7 29
665 adde r9, r9, r30 C 9 30
667 adde r11, r11, r31 C 11 31
697 L(ret): ld r31, -8(r1)