beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / pentium / mul_1.asm
bloba0858af2b41fb4b869294e4113ea1dcbf2744ca1
1 dnl Intel Pentium mpn_mul_1 -- mpn by limb multiplication.
3 dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
4 dnl Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
32 include(`../config.m4')
35 C P5: 12.0 cycles/limb
38 C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
39 C mp_limb_t multiplier);
40 C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
41 C mp_limb_t multiplier, mp_limb_t carry);
44 defframe(PARAM_CARRY, 20)
45 defframe(PARAM_MULTIPLIER,16)
46 defframe(PARAM_SIZE, 12)
47 defframe(PARAM_SRC, 8)
48 defframe(PARAM_DST, 4)
50 TEXT
51 ALIGN(8)
52 PROLOGUE(mpn_mul_1c)
53 deflit(`FRAME',0)
55 movl PARAM_CARRY, %ecx
56 pushl %esi FRAME_pushl()
58 jmp L(start_1c)
60 EPILOGUE()
63 ALIGN(8)
64 PROLOGUE(mpn_mul_1)
65 deflit(`FRAME',0)
67 xorl %ecx, %ecx
68 pushl %esi FRAME_pushl()
70 L(start_1c):
71 movl PARAM_SRC, %esi
72 movl PARAM_SIZE, %eax
74 shrl %eax
75 jnz L(two_or_more)
78 C one limb only
80 movl (%esi), %eax
82 mull PARAM_MULTIPLIER
84 addl %eax, %ecx
85 movl PARAM_DST, %eax
87 adcl $0, %edx
88 popl %esi
90 movl %ecx, (%eax)
91 movl %edx, %eax
93 ret
96 L(two_or_more):
97 C eax size/2
98 C ebx
99 C ecx carry
100 C edx
101 C esi src
102 C edi
103 C ebp
105 pushl %edi FRAME_pushl()
106 pushl %ebx FRAME_pushl()
108 movl PARAM_DST, %edi
109 leal -1(%eax), %ebx C size/2-1
111 notl %ebx C -size, preserve carry
113 leal (%esi,%eax,8), %esi C src end
114 leal (%edi,%eax,8), %edi C dst end
116 pushl %ebp FRAME_pushl()
117 jnc L(top)
120 C size was odd, process one limb separately
122 movl (%esi,%ebx,8), %eax
123 addl $4, %esi
125 mull PARAM_MULTIPLIER
127 addl %ecx, %eax
128 movl %edx, %ecx
130 movl %eax, (%edi,%ebx,8)
131 leal 4(%edi), %edi
134 L(top):
135 C eax
136 C ebx counter, negative
137 C ecx carry
138 C edx
139 C esi src end
140 C edi dst end
141 C ebp
143 adcl $0, %ecx
144 movl (%esi,%ebx,8), %eax
146 mull PARAM_MULTIPLIER
148 movl %edx, %ebp
149 addl %eax, %ecx
151 adcl $0, %ebp
152 movl 4(%esi,%ebx,8), %eax
154 mull PARAM_MULTIPLIER
156 movl %ecx, (%edi,%ebx,8)
157 addl %ebp, %eax
159 movl %eax, 4(%edi,%ebx,8)
160 incl %ebx
162 movl %edx, %ecx
163 jnz L(top)
166 adcl $0, %ecx
167 popl %ebp
169 movl %ecx, %eax
170 popl %ebx
172 popl %edi
173 popl %esi
177 EPILOGUE()