sysdeps/x86_64/fpu/svml_d_sincos2_core.S

   1 /* Function sincos vectorized with SSE2.
   2    Copyright (C) 2014-2023 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #include <sysdep.h>
  20 #include "svml_d_wrapper_impl.h"
  21
  22         .text
  23 ENTRY (_ZGVbN2vl8l8_sincos)
  24 WRAPPER_IMPL_SSE2_fFF sincos
  25 END (_ZGVbN2vl8l8_sincos)
  26 libmvec_hidden_def (_ZGVbN2vl8l8_sincos)
  27
  28 /* SSE2 ISA version as wrapper to scalar (for vector
  29    function declared with #pragma omp declare simd notinbranch).  */
  30 .macro WRAPPER_IMPL_SSE2_fFF_vvv callee
  31 #ifndef __ILP32__
  32         subq      $88, %rsp
  33         cfi_adjust_cfa_offset(88)
  34         movaps    %xmm0, 64(%rsp)
  35         lea       (%rsp), %rdi
  36         movdqa    %xmm1, 32(%rdi)
  37         lea       16(%rsp), %rsi
  38         movdqa    %xmm2, 32(%rsi)
  39         call      JUMPTARGET(\callee)
  40         movsd     72(%rsp), %xmm0
  41         lea       8(%rsp), %rdi
  42         lea       24(%rsp), %rsi
  43         call      JUMPTARGET(\callee)
  44         movq      32(%rsp), %rdx
  45         movq      48(%rsp), %rsi
  46         movq      40(%rsp), %r8
  47         movq      56(%rsp), %r10
  48         movq      (%rsp), %rax
  49         movq      16(%rsp), %rcx
  50         movq      8(%rsp), %rdi
  51         movq      24(%rsp), %r9
  52         movq      %rax, (%rdx)
  53         movq      %rcx, (%rsi)
  54         movq      %rdi, (%r8)
  55         movq      %r9, (%r10)
  56         addq      $88, %rsp
  57         cfi_adjust_cfa_offset(-88)
  58         ret
  59 #else
  60         pushq   %rbp
  61         .cfi_def_cfa_offset 16
  62         .cfi_offset 6, -16
  63         pushq   %rbx
  64         .cfi_def_cfa_offset 24
  65         .cfi_offset 3, -24
  66         subl    $88, %esp
  67         .cfi_def_cfa_offset 112
  68         leal    64(%rsp), %esi
  69         movaps  %xmm1, 32(%esp)
  70         leal    48(%rsp), %edi
  71         movaps  %xmm2, 16(%esp)
  72         movq    %rsi, %rbp
  73         movq    %rdi, %rbx
  74         movaps  %xmm0, (%esp)
  75         call    JUMPTARGET(\callee)
  76         movupd  8(%esp), %xmm0
  77         leal    8(%rbp), %esi
  78         leal    8(%rbx), %edi
  79         call    JUMPTARGET(\callee)
  80         movdqa  32(%esp), %xmm1
  81         movsd   48(%esp), %xmm0
  82         movq    %xmm1, %rax
  83         movdqa  16(%esp), %xmm2
  84         movsd   %xmm0, (%eax)
  85         movsd   56(%esp), %xmm0
  86         pextrd  $1, %xmm1, %eax
  87         movsd   %xmm0, (%eax)
  88         movsd   64(%esp), %xmm0
  89         movq    %xmm2, %rax
  90         movsd   %xmm0, (%eax)
  91         movsd   72(%esp), %xmm0
  92         pextrd  $1, %xmm2, %eax
  93         movsd   %xmm0, (%eax)
  94         addl    $88, %esp
  95         .cfi_def_cfa_offset 24
  96         popq    %rbx
  97         .cfi_def_cfa_offset 16
  98         popq    %rbp
  99         .cfi_def_cfa_offset 8
 100         ret
 101 #endif
 102 .endm
 103
 104 ENTRY (_ZGVbN2vvv_sincos)
 105 WRAPPER_IMPL_SSE2_fFF_vvv sincos
 106 END (_ZGVbN2vvv_sincos)
 107
 108 #ifndef USE_MULTIARCH
 109  libmvec_hidden_def (_ZGVbN2vvv_sincos)
 110 #endif