beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / power / addmul_1.asm
blob76d8df3c76c174b8471c790753dd818348d83bfa
1 dnl IBM POWER mpn_addmul_1 -- Multiply a limb vector with a limb and add the
2 dnl result to a second limb vector.
4 dnl Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
33 dnl INPUT PARAMETERS
34 dnl res_ptr r3
35 dnl s1_ptr r4
36 dnl size r5
37 dnl s2_limb r6
39 dnl The POWER architecture has no unsigned 32x32->64 bit multiplication
40 dnl instruction. To obtain that operation, we have to use the 32x32->64
41 dnl signed multiplication instruction, and add the appropriate compensation to
42 dnl the high limb of the result. We add the multiplicand if the multiplier
43 dnl has its most significant bit set, and we add the multiplier if the
44 dnl multiplicand has its most significant bit set. We need to preserve the
45 dnl carry flag between each iteration, so we have to compute the compensation
46 dnl carefully (the natural, srai+and doesn't work). Since all POWER can
47 dnl branch in zero cycles, we use conditional branches for the compensation.
49 include(`../config.m4')
51 ASM_START()
52 PROLOGUE(mpn_addmul_1)
53 cal 3,-4(3)
54 l 0,0(4)
55 cmpi 0,6,0
56 mtctr 5
57 mul 9,0,6
58 srai 7,0,31
59 and 7,7,6
60 mfmq 8
61 cax 9,9,7
62 l 7,4(3)
63 a 8,8,7 C add res_limb
64 blt Lneg
65 Lpos: bdz Lend
67 Lploop: lu 0,4(4)
68 stu 8,4(3)
69 cmpi 0,0,0
70 mul 10,0,6
71 mfmq 0
72 ae 8,0,9 C low limb + old_cy_limb + old cy
73 l 7,4(3)
74 aze 10,10 C propagate cy to new cy_limb
75 a 8,8,7 C add res_limb
76 bge Lp0
77 cax 10,10,6 C adjust high limb for negative limb from s1
78 Lp0: bdz Lend0
79 lu 0,4(4)
80 stu 8,4(3)
81 cmpi 0,0,0
82 mul 9,0,6
83 mfmq 0
84 ae 8,0,10
85 l 7,4(3)
86 aze 9,9
87 a 8,8,7
88 bge Lp1
89 cax 9,9,6 C adjust high limb for negative limb from s1
90 Lp1: bdn Lploop
92 b Lend
94 Lneg: cax 9,9,0
95 bdz Lend
96 Lnloop: lu 0,4(4)
97 stu 8,4(3)
98 cmpi 0,0,0
99 mul 10,0,6
100 mfmq 7
101 ae 8,7,9
102 l 7,4(3)
103 ae 10,10,0 C propagate cy to new cy_limb
104 a 8,8,7 C add res_limb
105 bge Ln0
106 cax 10,10,6 C adjust high limb for negative limb from s1
107 Ln0: bdz Lend0
108 lu 0,4(4)
109 stu 8,4(3)
110 cmpi 0,0,0
111 mul 9,0,6
112 mfmq 7
113 ae 8,7,10
114 l 7,4(3)
115 ae 9,9,0 C propagate cy to new cy_limb
116 a 8,8,7 C add res_limb
117 bge Ln1
118 cax 9,9,6 C adjust high limb for negative limb from s1
119 Ln1: bdn Lnloop
120 b Lend
122 Lend0: cal 9,0(10)
123 Lend: st 8,4(3)
124 aze 3,9
126 EPILOGUE(mpn_addmul_1)