1 /* Pentium optimized __mpn_rshift --
2 Copyright (C) 1992,94,95,96,97,98,2000,2005 Free Software Foundation, Inc.
3 This file is part of the GNU MP Library.
5 The GNU MP Library is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or (at your
8 option) any later version.
10 The GNU MP Library is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with the GNU MP Library; see the file COPYING.LIB. If not,
17 see <http://www.gnu.org/licenses/>. */
20 #include "asm-syntax.h"
24 #define PARMS LINKAGE+16 /* space for 4 saved regs */
26 #define S RES+PTR_SIZE
27 #define SIZE S+PTR_SIZE
31 ENTRY (BP_SYM (__mpn_rshift))
35 cfi_adjust_cfa_offset (4)
37 cfi_adjust_cfa_offset (4)
39 cfi_adjust_cfa_offset (4)
40 cfi_rel_offset (ebp, 0)
42 cfi_adjust_cfa_offset (4)
45 cfi_rel_offset (edi, 12)
47 cfi_rel_offset (esi, 8)
49 cfi_rel_offset (ebx, 0)
51 #if __BOUNDED_POINTERS__
52 shll $2, %ebx /* convert limbs to bytes */
53 CHECK_BOUNDS_BOTH_WIDE (%edi, RES(%esp), %ebx)
54 CHECK_BOUNDS_BOTH_WIDE (%esi, S(%esp), %ebx)
58 /* We can use faster code for shift-by-1 under certain conditions. */
63 jnc L(special) /* jump if res_ptr + 1 >= s_ptr */
64 leal (%edi,%ebx,4),%eax
66 jnc L(special) /* jump if s_ptr >= res_ptr + size */
72 shrdl %cl,%edx,%eax /* compute carry limb */
73 pushl %eax /* push carry limb onto stack */
74 cfi_adjust_cfa_offset (4)
78 cfi_adjust_cfa_offset (4)
82 movl (%edi),%eax /* fetch destination cache line */
85 L(oop): movl 28(%edi),%eax /* fetch destination cache line */
122 cfi_adjust_cfa_offset (-4)
127 shrdl %cl,%eax,%edx /* compute result limb */
136 shrl %cl,%edx /* compute most significant limb */
137 movl %edx,(%edi) /* store it */
139 popl %eax /* pop carry limb */
140 cfi_adjust_cfa_offset (-4)
143 cfi_adjust_cfa_offset (-4)
146 cfi_adjust_cfa_offset (-4)
149 cfi_adjust_cfa_offset (-4)
152 cfi_adjust_cfa_offset (-4)
158 /* We loop from least significant end of the arrays, which is only
159 permissible if the source and destination don't overlap, since the
160 function is documented to work for overlapping source and destination.
163 cfi_adjust_cfa_offset (16)
164 cfi_rel_offset (edi, 12)
165 cfi_rel_offset (esi, 8)
166 cfi_rel_offset (ebp, 4)
167 cfi_rel_offset (ebx, 0)
169 leal -4(%edi,%ebx,4),%edi
170 leal -4(%esi,%ebx,4),%esi
177 cfi_adjust_cfa_offset (4)
185 movl (%edi),%eax /* fetch destination cache line */
189 movl -28(%edi),%eax /* fetch destination cache line */
220 leal -32(%esi),%esi /* use leal not to clobber carry */
227 cfi_adjust_cfa_offset (-4)
228 sbbl %eax,%eax /* save carry in %eax */
231 addl %eax,%eax /* restore carry from eax */
238 leal -4(%esi),%esi /* use leal not to clobber carry */
245 addl %eax,%eax /* restore carry from eax */
246 L(L1): movl %edx,(%edi) /* store last limb */
252 cfi_adjust_cfa_offset (-4)
255 cfi_adjust_cfa_offset (-4)
258 cfi_adjust_cfa_offset (-4)
261 cfi_adjust_cfa_offset (-4)
266 END (BP_SYM (__mpn_rshift))