1 /* Pentium optimized __mpn_rshift --
2 Copyright (C) 1992-2015 Free Software Foundation, Inc.
3 This file is part of the GNU MP Library.
5 The GNU MP Library is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or (at your
8 option) any later version.
10 The GNU MP Library is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with the GNU MP Library; see the file COPYING.LIB. If not,
17 see <http://www.gnu.org/licenses/>. */
20 #include "asm-syntax.h"
22 #define PARMS 4+16 /* space for 4 saved regs */
32 cfi_adjust_cfa_offset (4)
34 cfi_adjust_cfa_offset (4)
36 cfi_adjust_cfa_offset (4)
37 cfi_rel_offset (ebp, 0)
39 cfi_adjust_cfa_offset (4)
42 cfi_rel_offset (edi, 12)
44 cfi_rel_offset (esi, 8)
46 cfi_rel_offset (ebx, 0)
49 /* We can use faster code for shift-by-1 under certain conditions. */
54 jnc L(special) /* jump if res_ptr + 1 >= s_ptr */
55 leal (%edi,%ebx,4),%eax
57 jnc L(special) /* jump if s_ptr >= res_ptr + size */
63 shrdl %cl,%edx,%eax /* compute carry limb */
64 pushl %eax /* push carry limb onto stack */
65 cfi_adjust_cfa_offset (4)
69 cfi_adjust_cfa_offset (4)
73 movl (%edi),%eax /* fetch destination cache line */
76 L(oop): movl 28(%edi),%eax /* fetch destination cache line */
113 cfi_adjust_cfa_offset (-4)
118 shrdl %cl,%eax,%edx /* compute result limb */
127 shrl %cl,%edx /* compute most significant limb */
128 movl %edx,(%edi) /* store it */
130 popl %eax /* pop carry limb */
131 cfi_adjust_cfa_offset (-4)
134 cfi_adjust_cfa_offset (-4)
137 cfi_adjust_cfa_offset (-4)
140 cfi_adjust_cfa_offset (-4)
143 cfi_adjust_cfa_offset (-4)
148 /* We loop from least significant end of the arrays, which is only
149 permissible if the source and destination don't overlap, since the
150 function is documented to work for overlapping source and destination.
153 cfi_adjust_cfa_offset (16)
154 cfi_rel_offset (edi, 12)
155 cfi_rel_offset (esi, 8)
156 cfi_rel_offset (ebp, 4)
157 cfi_rel_offset (ebx, 0)
159 leal -4(%edi,%ebx,4),%edi
160 leal -4(%esi,%ebx,4),%esi
167 cfi_adjust_cfa_offset (4)
175 movl (%edi),%eax /* fetch destination cache line */
179 movl -28(%edi),%eax /* fetch destination cache line */
210 leal -32(%esi),%esi /* use leal not to clobber carry */
217 cfi_adjust_cfa_offset (-4)
218 sbbl %eax,%eax /* save carry in %eax */
221 addl %eax,%eax /* restore carry from eax */
228 leal -4(%esi),%esi /* use leal not to clobber carry */
235 addl %eax,%eax /* restore carry from eax */
236 L(L1): movl %edx,(%edi) /* store last limb */
242 cfi_adjust_cfa_offset (-4)
245 cfi_adjust_cfa_offset (-4)
248 cfi_adjust_cfa_offset (-4)
251 cfi_adjust_cfa_offset (-4)