beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / alpha / submul_1.asm
blob2b63b52fa40dedb27d499f8b90c55f7b67496bc2
1 dnl Alpha mpn_submul_1 -- Multiply a limb vector with a limb and subtract
2 dnl the result from a second limb vector.
4 dnl Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
32 include(`../config.m4')
34 C cycles/limb
35 C EV4: 42
36 C EV5: 18
37 C EV6: 7
39 C INPUT PARAMETERS
40 C rp r16
41 C up r17
42 C n r18
43 C limb r19
46 ASM_START()
47 PROLOGUE(mpn_submul_1)
48 ldq r2,0(r17) C r2 = s1_limb
49 addq r17,8,r17 C s1_ptr++
50 subq r18,1,r18 C size--
51 mulq r2,r19,r3 C r3 = prod_low
52 ldq r5,0(r16) C r5 = *res_ptr
53 umulh r2,r19,r0 C r0 = prod_high
54 beq r18,$Lend1 C jump if size was == 1
55 ldq r2,0(r17) C r2 = s1_limb
56 addq r17,8,r17 C s1_ptr++
57 subq r18,1,r18 C size--
58 subq r5,r3,r3
59 cmpult r5,r3,r4
60 stq r3,0(r16)
61 addq r16,8,r16 C res_ptr++
62 beq r18,$Lend2 C jump if size was == 2
64 ALIGN(8)
65 $Loop: mulq r2,r19,r3 C r3 = prod_low
66 ldq r5,0(r16) C r5 = *res_ptr
67 addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
68 subq r18,1,r18 C size--
69 umulh r2,r19,r4 C r4 = cy_limb
70 ldq r2,0(r17) C r2 = s1_limb
71 addq r17,8,r17 C s1_ptr++
72 addq r3,r0,r3 C r3 = cy_limb + prod_low
73 cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
74 subq r5,r3,r3
75 cmpult r5,r3,r5
76 stq r3,0(r16)
77 addq r16,8,r16 C res_ptr++
78 addq r5,r0,r0 C combine carries
79 bne r18,$Loop
81 $Lend2: mulq r2,r19,r3 C r3 = prod_low
82 ldq r5,0(r16) C r5 = *res_ptr
83 addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
84 umulh r2,r19,r4 C r4 = cy_limb
85 addq r3,r0,r3 C r3 = cy_limb + prod_low
86 cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
87 subq r5,r3,r3
88 cmpult r5,r3,r5
89 stq r3,0(r16)
90 addq r5,r0,r0 C combine carries
91 addq r4,r0,r0 C cy_limb = prod_high + cy
92 ret r31,(r26),1
93 $Lend1: subq r5,r3,r3
94 cmpult r5,r3,r5
95 stq r3,0(r16)
96 addq r0,r5,r0
97 ret r31,(r26),1
98 EPILOGUE(mpn_submul_1)
99 ASM_END()