beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / pentium / mul_basecase.asm
blob50e15d356720d0e8c585f429b03a0f0473f30867
1 dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
3 dnl Copyright 1996, 1998-2000, 2002 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C P5: 14.2 cycles/crossproduct (approx)
37 C void mpn_mul_basecase (mp_ptr wp,
38 C mp_srcptr xp, mp_size_t xsize,
39 C mp_srcptr yp, mp_size_t ysize);
41 defframe(PARAM_YSIZE, 20)
42 defframe(PARAM_YP, 16)
43 defframe(PARAM_XSIZE, 12)
44 defframe(PARAM_XP, 8)
45 defframe(PARAM_WP, 4)
47 defframe(VAR_COUNTER, -4)
49 TEXT
50 ALIGN(8)
51 PROLOGUE(mpn_mul_basecase)
53 pushl %eax C dummy push for allocating stack slot
54 pushl %esi
55 pushl %ebp
56 pushl %edi
57 deflit(`FRAME',16)
59 movl PARAM_XP,%esi
60 movl PARAM_WP,%edi
61 movl PARAM_YP,%ebp
63 movl (%esi),%eax C load xp[0]
64 mull (%ebp) C multiply by yp[0]
65 movl %eax,(%edi) C store to wp[0]
66 movl PARAM_XSIZE,%ecx C xsize
67 decl %ecx C If xsize = 1, ysize = 1 too
68 jz L(done)
70 movl PARAM_XSIZE,%eax
71 pushl %ebx
72 FRAME_pushl()
73 movl %edx,%ebx
74 leal (%esi,%eax,4),%esi C make xp point at end
75 leal (%edi,%eax,4),%edi C offset wp by xsize
76 negl %ecx C negate j size/index for inner loop
77 xorl %eax,%eax C clear carry
79 ALIGN(8)
80 L(oop1): adcl $0,%ebx
81 movl (%esi,%ecx,4),%eax C load next limb at xp[j]
82 mull (%ebp)
83 addl %ebx,%eax
84 movl %eax,(%edi,%ecx,4)
85 incl %ecx
86 movl %edx,%ebx
87 jnz L(oop1)
89 adcl $0,%ebx
90 movl PARAM_YSIZE,%eax
91 movl %ebx,(%edi) C most significant limb of product
92 addl $4,%edi C increment wp
93 decl %eax
94 jz L(skip)
95 movl %eax,VAR_COUNTER C set index i to ysize
97 L(outer):
98 addl $4,%ebp C make ebp point to next y limb
99 movl PARAM_XSIZE,%ecx
100 negl %ecx
101 xorl %ebx,%ebx
103 C code at 0x61 here, close enough to aligned
104 L(oop2):
105 adcl $0,%ebx
106 movl (%esi,%ecx,4),%eax
107 mull (%ebp)
108 addl %ebx,%eax
109 movl (%edi,%ecx,4),%ebx
110 adcl $0,%edx
111 addl %eax,%ebx
112 movl %ebx,(%edi,%ecx,4)
113 incl %ecx
114 movl %edx,%ebx
115 jnz L(oop2)
117 adcl $0,%ebx
119 movl %ebx,(%edi)
120 addl $4,%edi
121 movl VAR_COUNTER,%eax
122 decl %eax
123 movl %eax,VAR_COUNTER
124 jnz L(outer)
126 L(skip):
127 popl %ebx
128 popl %edi
129 popl %ebp
130 popl %esi
131 addl $4,%esp
134 L(done):
135 movl %edx,4(%edi) C store to wp[1]
136 popl %edi
137 popl %ebp
138 popl %esi
139 popl %eax C dummy pop for deallocating stack slot
142 EPILOGUE()