beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / mul_1.asm
blob421de622253c07e1bff8459055d9a6a12905d507
1 dnl x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector
2 dnl with a limb and store the result in a second limb vector.
4 dnl Copyright 1992, 1994, 1997-2002, 2005 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
32 include(`../config.m4')
35 C cycles/limb
36 C P5 12.5
37 C P6 model 0-8,10-12 5.5
38 C P6 model 9 (Banias)
39 C P6 model 13 (Dothan) 5.25
40 C P4 model 0 (Willamette) 19.0
41 C P4 model 1 (?) 19.0
42 C P4 model 2 (Northwood) 19.0
43 C P4 model 3 (Prescott)
44 C P4 model 4 (Nocona)
45 C AMD K6 10.5
46 C AMD K7 4.5
47 C AMD K8
50 C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
51 C mp_limb_t multiplier);
53 defframe(PARAM_MULTIPLIER,16)
54 defframe(PARAM_SIZE, 12)
55 defframe(PARAM_SRC, 8)
56 defframe(PARAM_DST, 4)
58 TEXT
59 ALIGN(8)
60 PROLOGUE(mpn_mul_1)
61 deflit(`FRAME',0)
63 pushl %edi
64 pushl %esi
65 pushl %ebx
66 pushl %ebp
67 deflit(`FRAME',16)
69 movl PARAM_DST,%edi
70 movl PARAM_SRC,%esi
71 movl PARAM_SIZE,%ecx
73 xorl %ebx,%ebx
74 andl $3,%ecx
75 jz L(end0)
77 L(oop0):
78 movl (%esi),%eax
79 mull PARAM_MULTIPLIER
80 leal 4(%esi),%esi
81 addl %ebx,%eax
82 movl $0,%ebx
83 adcl %ebx,%edx
84 movl %eax,(%edi)
85 movl %edx,%ebx C propagate carry into cylimb
87 leal 4(%edi),%edi
88 decl %ecx
89 jnz L(oop0)
91 L(end0):
92 movl PARAM_SIZE,%ecx
93 shrl $2,%ecx
94 jz L(end)
97 ALIGN(8)
98 L(oop): movl (%esi),%eax
99 mull PARAM_MULTIPLIER
100 addl %eax,%ebx
101 movl $0,%ebp
102 adcl %edx,%ebp
104 movl 4(%esi),%eax
105 mull PARAM_MULTIPLIER
106 movl %ebx,(%edi)
107 addl %eax,%ebp C new lo + cylimb
108 movl $0,%ebx
109 adcl %edx,%ebx
111 movl 8(%esi),%eax
112 mull PARAM_MULTIPLIER
113 movl %ebp,4(%edi)
114 addl %eax,%ebx C new lo + cylimb
115 movl $0,%ebp
116 adcl %edx,%ebp
118 movl 12(%esi),%eax
119 mull PARAM_MULTIPLIER
120 movl %ebx,8(%edi)
121 addl %eax,%ebp C new lo + cylimb
122 movl $0,%ebx
123 adcl %edx,%ebx
125 movl %ebp,12(%edi)
127 leal 16(%esi),%esi
128 leal 16(%edi),%edi
129 decl %ecx
130 jnz L(oop)
132 L(end): movl %ebx,%eax
134 popl %ebp
135 popl %ebx
136 popl %esi
137 popl %edi
140 EPILOGUE()