beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / mode32 / sqr_diagonal.asm
blobff5f4b3cfba7142bd24d2444973cce26ab149073
1 dnl PowerPC-64 mpn_sqr_diagonal.
3 dnl Copyright 2001-2003, 2005, 2006, 20010 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C POWER3/PPC630 18
35 C POWER4/PPC970 ?
36 C POWER5 7.25
37 C POWER6 9.5
39 C INPUT PARAMETERS
40 define(`rp', r3)
41 define(`up', r4)
42 define(`n', r5)
44 ASM_START()
45 PROLOGUE(mpn_sqr_diagonal)
46 ifdef(`HAVE_ABI_mode32',
47 ` rldicl n, n, 0, 32') C zero extend n
49 rldicl. r0, n, 0,62 C r0 = n & 3, set cr0
50 addi n, n, 3 C compute count...
51 cmpdi cr6, r0, 2
52 srdi n, n, 2 C ...for ctr
53 mtctr n C copy count into ctr
54 beq cr0, L(b00)
55 blt cr6, L(b01)
56 beq cr6, L(b10)
58 L(b11): ld r0, 0(up)
59 ld r10, 8(up)
60 ld r12, 16(up)
61 addi rp, rp, -16
62 mulld r7, r0, r0
63 mulhdu r8, r0, r0
64 mulld r9, r10, r10
65 mulhdu r10, r10, r10
66 mulld r11, r12, r12
67 mulhdu r12, r12, r12
68 addi up, up, 24
69 b L(11)
71 ALIGN(16)
72 L(b01): ld r0, 0(up)
73 addi rp, rp, -48
74 addi up, up, 8
75 mulld r11, r0, r0
76 mulhdu r12, r0, r0
77 b L(01)
79 ALIGN(16)
80 L(b10): ld r0, 0(up)
81 ld r12, 8(up)
82 addi rp, rp, -32
83 addi up, up, 16
84 mulld r9, r0, r0
85 mulhdu r10, r0, r0
86 mulld r11, r12, r12
87 mulhdu r12, r12, r12
88 b L(10)
90 ALIGN(32)
91 L(b00):
92 L(top): ld r0, 0(up)
93 ld r8, 8(up)
94 ld r10, 16(up)
95 ld r12, 24(up)
96 mulld r5, r0, r0
97 mulhdu r6, r0, r0
98 mulld r7, r8, r8
99 mulhdu r8, r8, r8
100 mulld r9, r10, r10
101 mulhdu r10, r10, r10
102 mulld r11, r12, r12
103 mulhdu r12, r12, r12
104 addi up, up, 32
105 std r5, 0(rp)
106 std r6, 8(rp)
107 L(11): std r7, 16(rp)
108 std r8, 24(rp)
109 L(10): std r9, 32(rp)
110 std r10, 40(rp)
111 L(01): std r11, 48(rp)
112 std r12, 56(rp)
113 addi rp, rp, 64
114 bdnz L(top)
117 EPILOGUE()