sysdeps/x86_64/addmul_1.S

   1 /* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
   2    the result to a second limb vector.
   3    Copyright (C) 2003-2023 Free Software Foundation, Inc.
   4    This file is part of the GNU MP Library.
   5
   6    The GNU MP Library is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU Lesser General Public License as published by
   8    the Free Software Foundation; either version 2.1 of the License, or (at your
   9    option) any later version.
  10
  11    The GNU MP Library is distributed in the hope that it will be useful, but
  12    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  14    License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public License
  17    along with the GNU MP Library; see the file COPYING.LIB.  If not,
  18    see <https://www.gnu.org/licenses/>.  */
  19
  20 #include "sysdep.h"
  21 #include "asm-syntax.h"
  22
  23 #define rp      %rdi
  24 #define up      %rsi
  25 #define n       %rdx
  26 #define v0      %rcx
  27
  28 #ifndef func
  29 # define func __mpn_addmul_1
  30 # define ADDSUB add
  31 #endif
  32
  33         .text
  34 ENTRY (func)
  35         push    %rbx
  36         push    %rbp
  37         lea     (%rdx), %rbx
  38         neg     %rbx
  39
  40         mov     (up), %rax
  41         mov     (rp), %r10
  42
  43         lea     -16(rp,%rdx,8), rp
  44         lea     (up,%rdx,8), up
  45         mul     %rcx
  46
  47         bt      $0, %ebx
  48         jc      L(odd)
  49
  50         lea     (%rax), %r11
  51         mov     8(up,%rbx,8), %rax
  52         lea     (%rdx), %rbp
  53         mul     %rcx
  54         add     $2, %rbx
  55         jns     L(n2)
  56
  57         lea     (%rax), %r8
  58         mov     (up,%rbx,8), %rax
  59         lea     (%rdx), %r9
  60         jmp     L(mid)
  61
  62 L(odd): add     $1, %rbx
  63         jns     L(n1)
  64
  65         lea     (%rax), %r8
  66         mov     (up,%rbx,8), %rax
  67         lea     (%rdx), %r9
  68         mul     %rcx
  69         lea     (%rax), %r11
  70         mov     8(up,%rbx,8), %rax
  71         lea     (%rdx), %rbp
  72         jmp     L(e)
  73
  74         .p2align 4
  75 L(top): mul     %rcx
  76         ADDSUB  %r8, %r10
  77         lea     (%rax), %r8
  78         mov     (up,%rbx,8), %rax
  79         adc     %r9, %r11
  80         mov     %r10, -8(rp,%rbx,8)
  81         mov     (rp,%rbx,8), %r10
  82         lea     (%rdx), %r9
  83         adc     $0, %rbp
  84 L(mid): mul     %rcx
  85         ADDSUB  %r11, %r10
  86         lea     (%rax), %r11
  87         mov     8(up,%rbx,8), %rax
  88         adc     %rbp, %r8
  89         mov     %r10, (rp,%rbx,8)
  90         mov     8(rp,%rbx,8), %r10
  91         lea     (%rdx), %rbp
  92         adc     $0, %r9
  93 L(e):   add     $2, %rbx
  94         js      L(top)
  95
  96         mul     %rcx
  97         ADDSUB  %r8, %r10
  98         adc     %r9, %r11
  99         mov     %r10, -8(rp)
 100         adc     $0, %rbp
 101 L(n2):  mov     (rp), %r10
 102         ADDSUB  %r11, %r10
 103         adc     %rbp, %rax
 104         mov     %r10, (rp)
 105         adc     $0, %rdx
 106 L(n1):  mov     8(rp), %r10
 107         ADDSUB  %rax, %r10
 108         mov     %r10, 8(rp)
 109         mov     %ebx, %eax      /* zero rax */
 110         adc     %rdx, %rax
 111         pop     %rbp
 112         pop     %rbx
 113         ret
 114 END (func)