beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / mod_34lsub1.asm
blobe09e702c6f877c861b5122b7ea3bdbc5e6ff74fc
1 dnl Generic x86 mpn_mod_34lsub1 -- mpn remainder modulo 2^24-1.
3 dnl Copyright 2000-2002, 2004 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C cycles/limb
35 C P5 3.0
36 C P6 3.66
37 C K6 3.0
38 C K7 1.3
39 C P4 9
42 C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
45 defframe(PARAM_SIZE, 8)
46 defframe(PARAM_SRC, 4)
48 dnl re-use parameter space
49 define(SAVE_EBX, `PARAM_SRC')
51 TEXT
52 ALIGN(16)
53 PROLOGUE(mpn_mod_34lsub1)
54 deflit(`FRAME',0)
56 movl PARAM_SIZE, %ecx
57 movl PARAM_SRC, %edx
59 subl $2, %ecx
60 ja L(three_or_more)
62 movl (%edx), %eax
63 jb L(one)
65 movl 4(%edx), %ecx
66 movl %eax, %edx
67 shrl $24, %eax C src[0] low
69 andl $0xFFFFFF, %edx C src[0] high
70 addl %edx, %eax
71 movl %ecx, %edx
73 andl $0xFFFF, %ecx
74 shrl $16, %edx C src[1] high
75 addl %edx, %eax
77 shll $8, %ecx C src[1] low
78 addl %ecx, %eax
80 L(one):
81 ret
84 L(three_or_more):
85 C eax
86 C ebx
87 C ecx size-2
88 C edx src
89 C esi
90 C edi
91 C ebp
93 movl %ebx, SAVE_EBX C and arrange 16-byte loop alignment
94 xorl %ebx, %ebx
96 pushl %esi FRAME_pushl()
97 xorl %esi, %esi
99 pushl %edi FRAME_pushl()
100 xorl %eax, %eax C and clear carry flag
103 C offset 0x40 here
104 L(top):
105 C eax acc 0mod3
106 C ebx acc 1mod3
107 C ecx counter, limbs
108 C edx src
109 C esi acc 2mod3
110 C edi
111 C ebp
113 leal 12(%edx), %edx
114 leal -2(%ecx), %ecx
116 adcl -12(%edx), %eax
117 adcl -8(%edx), %ebx
118 adcl -4(%edx), %esi
120 decl %ecx
121 jg L(top)
124 C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively
126 movl $0xFFFFFFFF, %edi
127 incl %ecx
128 js L(combine)
130 adcl (%edx), %eax
131 movl $0xFFFFFF00, %edi
132 decl %ecx
133 js L(combine)
135 adcl 4(%edx), %ebx
136 movl $0xFFFF0000, %edi
139 L(combine):
140 C eax acc 0mod3
141 C ebx acc 1mod3
142 C ecx
143 C edx
144 C esi acc 2mod3
145 C edi mask
146 C ebp
148 sbbl %ecx, %ecx C carry
149 movl %eax, %edx C 0mod3
151 shrl $24, %eax C 0mod3 high
152 andl %edi, %ecx C carry masked
154 subl %ecx, %eax C apply carry
155 movl %ebx, %edi C 1mod3
157 shrl $16, %ebx C 1mod3 high
158 andl $0x00FFFFFF, %edx C 0mod3 low
160 addl %edx, %eax C apply 0mod3 low
161 andl $0xFFFF, %edi
163 shll $8, %edi C 1mod3 low
164 addl %ebx, %eax C apply 1mod3 high
166 addl %edi, %eax C apply 1mod3 low
167 movl %esi, %edx C 2mod3
169 shrl $8, %esi C 2mod3 high
170 andl $0xFF, %edx C 2mod3 low
172 shll $16, %edx C 2mod3 low
173 addl %esi, %eax C apply 2mod3 high
175 addl %edx, %eax C apply 2mod3 low
176 popl %edi FRAME_popl()
178 movl SAVE_EBX, %ebx
179 popl %esi FRAME_popl()
183 EPILOGUE()