1 dnl AMD64 mpn_cnd_add_n
, mpn_cnd_sub_n
3 dnl Copyright
2011-2013 Free Software Foundation
, Inc.
5 dnl
This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
8 dnl it under the terms of
either:
10 dnl
* the GNU Lesser General
Public License as published by the Free
11 dnl Software Foundation
; either version 3 of the License, or (at your
12 dnl option
) any later version.
16 dnl
* the GNU General
Public License as published by the Free Software
17 dnl Foundation
; either version 2 of the License, or (at your option) any
20 dnl
or both
in parallel
, as here.
22 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
23 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
24 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
27 dnl You should have received copies of the GNU General
Public License
and the
28 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
29 dnl see
https://www.gnu.
org/licenses
/.
31 include(`..
/config.m4
')
46 C * It might seem natural to use the cmov insn here, but since this function
47 C is supposed to have the exact same execution pattern for cnd true and
48 C false, and since cmov's documentation is
not clear about whether it
49 C actually reads both source operands
and writes the register for a false
50 C condition
, we cannot use it.
51 C
* Two cases could be
optimised: (1) cnd_add_n could use ADCSBB
-from
-memory
52 C to save one insn
/limb
, and (2) when up
=rp cnd_add_n
and cnd_sub_n could use
53 C ADCSBB
-to
-memory
, again saving
1 insn
/limb.
54 C
* This runs optimally at decoder bandwidth on K10. It has
not been tuned
55 C for any other processor.
58 define
(`cnd
', `%rdi') dnl rcx
59 define
(`rp
', `%rsi') dnl rdx
60 define
(`up
', `%rdx') dnl r8
61 define
(`vp
', `%rcx') dnl r9
62 define
(`n
', `%r8') dnl rsp
+40
64 ifdef
(`OPERATION_cnd_add_n
', `
67 define(func, mpn_cnd_add_n)')
68 ifdef
(`OPERATION_cnd_sub_n
', `
71 define(func, mpn_cnd_sub_n)')
73 MULFUNC_PROLOGUE
(mpn_cnd_add_n mpn_cnd_sub_n
)
83 IFDOS
(`
mov 56(%rsp
), R32
(%r8
)')
91 sbb cnd, cnd C make cnd mask
100 jz L(top) C carry-save reg rax = 0 in this arc
105 L(b3): mov (vp,n,8), %r12
120 sbb R32(%rax), R32(%rax) C save carry
125 L(b2): mov (vp,n,8), %r12
135 sbb R32(%rax), R32(%rax) C save carry
140 L(b1): mov (vp,n,8), %r12
145 sbb R32(%rax), R32(%rax) C save carry
150 L(top): mov (vp,n,8), %r12
162 add R32(%rax), R32(%rax) C restore carry
171 sbb R32(%rax), R32(%rax) C save carry
175 L(end): neg R32(%rax)