1 /* Pentium optimized __mpn_rshift --
2 Copyright (C) 1992, 94, 95, 96, 97, 98, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU MP Library.
5 The GNU MP Library is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or (at your
8 option) any later version.
10 The GNU MP Library is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
17 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18 MA 02111-1307, USA. */
21 #include "asm-syntax.h"
25 #define PARMS LINKAGE+16 /* space for 4 saved regs */
27 #define S RES+PTR_SIZE
28 #define SIZE S+PTR_SIZE
32 ENTRY (BP_SYM (__mpn_rshift))
44 #if __BOUNDED_POINTERS__
45 shll $2, %ebx /* convert limbs to bytes */
46 CHECK_BOUNDS_BOTH_WIDE (%edi, RES(%esp), %ebx)
47 CHECK_BOUNDS_BOTH_WIDE (%esi, S(%esp), %ebx)
51 /* We can use faster code for shift-by-1 under certain conditions. */
56 jnc L(special) /* jump if res_ptr + 1 >= s_ptr */
57 leal (%edi,%ebx,4),%eax
59 jnc L(special) /* jump if s_ptr >= res_ptr + size */
65 shrdl %cl,%edx,%eax /* compute carry limb */
66 pushl %eax /* push carry limb onto stack */
73 movl (%edi),%eax /* fetch destination cache line */
76 L(oop): movl 28(%edi),%eax /* fetch destination cache line */
117 shrdl %cl,%eax,%edx /* compute result limb */
126 shrl %cl,%edx /* compute most significant limb */
127 movl %edx,(%edi) /* store it */
129 popl %eax /* pop carry limb */
139 /* We loop from least significant end of the arrays, which is only
140 permissible if the source and destination don't overlap, since the
141 function is documented to work for overlapping source and destination.
145 leal -4(%edi,%ebx,4),%edi
146 leal -4(%esi,%ebx,4),%esi
160 movl (%edi),%eax /* fetch destination cache line */
164 movl -28(%edi),%eax /* fetch destination cache line */
195 leal -32(%esi),%esi /* use leal not to clobber carry */
202 sbbl %eax,%eax /* save carry in %eax */
205 addl %eax,%eax /* restore carry from eax */
212 leal -4(%esi),%esi /* use leal not to clobber carry */
219 addl %eax,%eax /* restore carry from eax */
220 L(L1): movl %edx,(%edi) /* store last limb */
232 END (BP_SYM (__mpn_rshift))