beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / pa64 / umul.asm
blobc3341ecfe60d72894fc0baf6fbe40d0ec47b9101
1 dnl Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
3 dnl This file is part of the GNU MP Library.
4 dnl
5 dnl The GNU MP Library is free software; you can redistribute it and/or modify
6 dnl it under the terms of either:
7 dnl
8 dnl * the GNU Lesser General Public License as published by the Free
9 dnl Software Foundation; either version 3 of the License, or (at your
10 dnl option) any later version.
11 dnl
12 dnl or
13 dnl
14 dnl * the GNU General Public License as published by the Free Software
15 dnl Foundation; either version 2 of the License, or (at your option) any
16 dnl later version.
17 dnl
18 dnl or both in parallel, as here.
19 dnl
20 dnl The GNU MP Library is distributed in the hope that it will be useful, but
21 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
23 dnl for more details.
24 dnl
25 dnl You should have received copies of the GNU General Public License and the
26 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
27 dnl see https://www.gnu.org/licenses/.
30 dnl Optimizations:
31 dnl * Avoid skip instructions
32 dnl * Put carry-generating and carry-consuming insns consecutively
33 dnl * Don't allocate any stack, "home" positions for parameters could be used.
35 include(`../config.m4')
37 define(`p0',`%r28')
38 define(`p1',`%r29')
39 define(`t32',`%r19')
40 define(`t0',`%r20')
41 define(`t1',`%r21')
42 define(`x',`%r22')
43 define(`m0',`%r23')
44 define(`m1',`%r24')
46 ifdef(`HAVE_ABI_2_0w',
47 ` .level 2.0w
48 ',` .level 2.0
50 PROLOGUE(mpn_umul_ppmm_r)
51 ldo 128(%r30),%r30
52 ifdef(`HAVE_ABI_2_0w',
53 ` std %r26,-64(%r30)
54 std %r25,-56(%r30)
55 copy %r24,%r31
56 ',`
57 depd %r25,31,32,%r26
58 std %r26,-64(%r30)
59 depd %r23,31,32,%r24
60 std %r24,-56(%r30)
61 ldw -180(%r30),%r31
64 fldd -64(%r30),%fr4
65 fldd -56(%r30),%fr5
67 xmpyu %fr5R,%fr4R,%fr6
68 fstd %fr6,-128(%r30)
69 xmpyu %fr5R,%fr4L,%fr7
70 fstd %fr7,-120(%r30)
71 xmpyu %fr5L,%fr4R,%fr8
72 fstd %fr8,-112(%r30)
73 xmpyu %fr5L,%fr4L,%fr9
74 fstd %fr9,-104(%r30)
76 depdi,z 1,31,1,t32 C t32 = 2^32
78 ldd -128(%r30),p0 C lo = low 64 bit of product
79 ldd -120(%r30),m0 C m0 = mid0 64 bit of product
80 ldd -112(%r30),m1 C m1 = mid1 64 bit of product
81 ldd -104(%r30),p1 C hi = high 64 bit of product
83 add,l,*nuv m0,m1,x C x = m1+m0
84 add,l t32,p1,p1 C propagate carry to mid of p1
85 depd,z x,31,32,t0 C lo32(m1+m0)
86 add t0,p0,p0
87 extrd,u x,31,32,t1 C hi32(m1+m0)
88 add,dc t1,p1,p1
90 std p0,0(%r31) C store low half of product
91 ifdef(`HAVE_ABI_2_0w',
92 ` copy p1,%r28 C return val in %r28
93 ',` extrd,u p1,31,32,%r28 C return val in %r28,%r29
95 bve (%r2)
96 ldo -128(%r30),%r30
97 EPILOGUE(mpn_umul_ppmm_r)