sysdeps/x86_64/lshift.S

   1 /* x86-64 __mpn_lshift --
   2    Copyright (C) 2007-2014 Free Software Foundation, Inc.
   3    This file is part of the GNU MP Library.
   4
   5    The GNU MP Library is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU Lesser General Public License as published by
   7    the Free Software Foundation; either version 2.1 of the License, or (at your
   8    option) any later version.
   9
  10    The GNU MP Library is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  13    License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public License
  16    along with the GNU MP Library; see the file COPYING.LIB.  If not,
  17    see <http://www.gnu.org/licenses/>.  */
  18
  19 #include "sysdep.h"
  20 #include "asm-syntax.h"
  21
  22 #define rp      %rdi
  23 #define up      %rsi
  24 #define n       %rdx
  25 #define cnt     %cl
  26
  27         .text
  28 ENTRY (__mpn_lshift)
  29         lea     -8(rp,n,8), rp
  30         lea     -8(up,n,8), up
  31
  32         mov     %edx, %eax
  33         and     $3, %eax
  34         jne     L(nb00)
  35 L(b00): /* n = 4, 8, 12, ... */
  36         mov     (up), %r10
  37         mov     -8(up), %r11
  38         xor     %eax, %eax
  39         shld    %cl, %r10, %rax
  40         mov     -16(up), %r8
  41         lea     24(rp), rp
  42         sub     $4, n
  43         jmp     L(00)
  44
  45 L(nb00):/* n = 1, 5, 9, ... */
  46         cmp     $2, %eax
  47         jae     L(nb01)
  48 L(b01): mov     (up), %r9
  49         xor     %eax, %eax
  50         shld    %cl, %r9, %rax
  51         sub     $2, n
  52         jb      L(le1)
  53         mov     -8(up), %r10
  54         mov     -16(up), %r11
  55         lea     -8(up), up
  56         lea     16(rp), rp
  57         jmp     L(01)
  58 L(le1): shl     %cl, %r9
  59         mov     %r9, (rp)
  60         ret
  61
  62 L(nb01):/* n = 2, 6, 10, ... */
  63         jne     L(b11)
  64 L(b10): mov     (up), %r8
  65         mov     -8(up), %r9
  66         xor     %eax, %eax
  67         shld    %cl, %r8, %rax
  68         sub     $3, n
  69         jb      L(le2)
  70         mov     -16(up), %r10
  71         lea     -16(up), up
  72         lea     8(rp), rp
  73         jmp     L(10)
  74 L(le2): shld    %cl, %r9, %r8
  75         mov     %r8, (rp)
  76         shl     %cl, %r9
  77         mov     %r9, -8(rp)
  78         ret
  79
  80         .p2align 4              /* performance critical! */
  81 L(b11): /* n = 3, 7, 11, ... */
  82         mov     (up), %r11
  83         mov     -8(up), %r8
  84         xor     %eax, %eax
  85         shld    %cl, %r11, %rax
  86         mov     -16(up), %r9
  87         lea     -24(up), up
  88         sub     $4, n
  89         jb      L(end)
  90
  91         .p2align 4
  92 L(top): shld    %cl, %r8, %r11
  93         mov     (up), %r10
  94         mov     %r11, (rp)
  95 L(10):  shld    %cl, %r9, %r8
  96         mov     -8(up), %r11
  97         mov     %r8, -8(rp)
  98 L(01):  shld    %cl, %r10, %r9
  99         mov     -16(up), %r8
 100         mov     %r9, -16(rp)
 101 L(00):  shld    %cl, %r11, %r10
 102         mov     -24(up), %r9
 103         mov     %r10, -24(rp)
 104         add     $-32, up
 105         lea     -32(rp), rp
 106         sub     $4, n
 107         jnc     L(top)
 108
 109 L(end): shld    %cl, %r8, %r11
 110         mov     %r11, (rp)
 111         shld    %cl, %r9, %r8
 112         mov     %r8, -8(rp)
 113         shl     %cl, %r9
 114         mov     %r9, -16(rp)
 115         ret
 116 END (__mpn_lshift)