1 dnl HP
-PA
7100/7200 mpn_submul_1
-- Multiply a limb vector with a limb
and
2 dnl subtract the result from a second limb vector.
4 dnl Copyright
1995, 2000-2003 Free Software Foundation
, Inc.
6 dnl
This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
9 dnl it under the terms of
either:
11 dnl
* the GNU Lesser General
Public License as published by the Free
12 dnl Software Foundation
; either version 3 of the License, or (at your
13 dnl option
) any later version.
17 dnl
* the GNU General
Public License as published by the Free Software
18 dnl Foundation
; either version 2 of the License, or (at your option) any
21 dnl
or both
in parallel
, as here.
23 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
24 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
25 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
28 dnl You should have received copies of the GNU General
Public License
and the
29 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
30 dnl see
https://www.gnu.
org/licenses
/.
32 include(`..
/config.m4
')
35 define(`res_ptr',`
%r26
')
36 define(`s1_ptr',`
%r25
')
37 define(`size_param',`
%r24
')
38 define(`s2_limb',`
%r23
')
40 define(`cylimb',`
%r28
')
50 define(`hi1',`
%r23
') C safe to reuse
55 PROLOGUE(mpn_submul_1)
56 C .callinfo frame=128,no_calls
59 stws s2_limb,-16(%r30)
60 add %r0,%r0,cylimb C clear cy and cylimb
61 addib,< -4,size_param,L(few_limbs)
62 fldws -16(%r30),%fr31R
71 bb,>=,n s1_ptr,29,L(0)
73 fldws,ma 4(s1_ptr),%fr4
75 xmpyu %fr4,%fr31R,%fr5
80 add s0,lo0,%r0 C invert cy
81 addib,< -1,size_param,L(few_limbs)
84 C start software pipeline ----------------------------------------------------
86 fldds,ma 8(s1_ptr),%fr4
87 fldds,ma 8(s1_ptr),%fr8
89 xmpyu %fr4L,%fr31R,%fr5
90 xmpyu %fr4R,%fr31R,%fr6
91 xmpyu %fr8L,%fr31R,%fr9
92 xmpyu %fr8R,%fr31R,%fr10
113 addib,< -4,size_param,L(end)
114 addc %r0,hi3,cylimb C propagate carry into cylimb
115 C main loop ------------------------------------------------------------------
117 fldds,ma 8(s1_ptr),%fr4
118 fldds,ma 8(s1_ptr),%fr8
121 xmpyu %fr4L,%fr31R,%fr5
123 xmpyu %fr4R,%fr31R,%fr6
125 xmpyu %fr8L,%fr31R,%fr9
127 xmpyu %fr8R,%fr31R,%fr10
137 subb %r0,%r0,lo0 C these two insns ...
138 add lo0,lo0,%r0 C ... just invert cy
150 stws,ma s0,4(res_ptr)
152 stws,ma s1,4(res_ptr)
154 stws,ma s2,4(res_ptr)
156 stws,ma s3,4(res_ptr)
158 addib,>= -4,size_param,L(loop)
159 addc %r0,hi3,cylimb C propagate carry into cylimb
160 C finish software pipeline ---------------------------------------------------
168 stws,ma s0,4(res_ptr)
170 stws,ma s1,4(res_ptr)
172 stws,ma s2,4(res_ptr)
174 stws,ma s3,4(res_ptr)
175 subb %r0,%r0,lo0 C these two insns ...
176 add lo0,lo0,%r0 C ... invert cy
178 C restore callee-saves registers ---------------------------------------------
186 addib,=,n 4,size_param,L(ret)
189 fldws,ma 4(s1_ptr),%fr4
191 xmpyu %fr4,%fr31R,%fr5
198 add s0,lo0,%r0 C invert cy
199 stws,ma s0,4(res_ptr)
200 addib,<> -1,size_param,L(loop2)
204 addc %r0,cylimb,cylimb
207 EPILOGUE(mpn_submul_1)