sysdeps/x86_64/fpu/svml_s_sincosf4_core.S

   1 /* Function sincosf vectorized with SSE2.
   2    Copyright (C) 2014-2017 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <sysdep.h>
  20 #include "svml_s_wrapper_impl.h"
  21
  22         .text
  23 ENTRY (_ZGVbN4vl4l4_sincosf)
  24 WRAPPER_IMPL_SSE2_fFF sincosf
  25 END (_ZGVbN4vl4l4_sincosf)
  26 libmvec_hidden_def (_ZGVbN4vl4l4_sincosf)
  27
  28 /* SSE2 ISA version as wrapper to scalar (for vector
  29    function declared with #pragma omp declare simd notinbranch).  */
  30 .macro WRAPPER_IMPL_SSE2_fFF_vvv callee
  31 #ifndef __ILP32__
  32         subq      $120, %rsp
  33         cfi_adjust_cfa_offset(120)
  34         movaps    %xmm0, 96(%rsp)
  35         lea       (%rsp), %rdi
  36         movdqa    %xmm1, 32(%rdi)
  37         lea       16(%rsp), %rsi
  38         movdqa    %xmm2, 32(%rsi)
  39         movdqa    %xmm3, 48(%rsi)
  40         movdqa    %xmm4, 64(%rsi)
  41         call      JUMPTARGET(\callee)
  42         movss     100(%rsp), %xmm0
  43         lea       4(%rsp), %rdi
  44         lea       20(%rsp), %rsi
  45         call      JUMPTARGET(\callee)
  46         movss     104(%rsp), %xmm0
  47         lea       8(%rsp), %rdi
  48         lea       24(%rsp), %rsi
  49         call      JUMPTARGET(\callee)
  50         movss     108(%rsp), %xmm0
  51         lea       12(%rsp), %rdi
  52         lea       28(%rsp), %rsi
  53         call      JUMPTARGET(\callee)
  54         movq      32(%rsp), %rdx
  55         movq      40(%rsp), %rsi
  56         movq      48(%rsp), %r8
  57         movq      56(%rsp), %r10
  58         movl      (%rsp), %eax
  59         movl      4(%rsp), %ecx
  60         movl      8(%rsp), %edi
  61         movl      12(%rsp), %r9d
  62         movl      %eax, (%rdx)
  63         movl      %ecx, (%rsi)
  64         movq      64(%rsp), %rax
  65         movq      72(%rsp), %rcx
  66         movl      %edi, (%r8)
  67         movl      %r9d, (%r10)
  68         movq      80(%rsp), %rdi
  69         movq      88(%rsp), %r9
  70         movl      16(%rsp), %r11d
  71         movl      20(%rsp), %edx
  72         movl      24(%rsp), %esi
  73         movl      28(%rsp), %r8d
  74         movl      %r11d, (%rax)
  75         movl      %edx, (%rcx)
  76         movl      %esi, (%rdi)
  77         movl      %r8d, (%r9)
  78         addq      $120, %rsp
  79         cfi_adjust_cfa_offset(-120)
  80         ret
  81 #else
  82         pushq   %rbp
  83         .cfi_def_cfa_offset 16
  84         .cfi_offset 6, -16
  85         pushq   %rbx
  86         .cfi_def_cfa_offset 24
  87         .cfi_offset 3, -24
  88         subl    $88, %esp
  89         .cfi_def_cfa_offset 112
  90         leal    64(%rsp), %esi
  91         movaps  %xmm1, (%esp)
  92         leal    48(%rsp), %edi
  93         movaps  %xmm2, 16(%esp)
  94         movq    %rsi, %rbp
  95         movq    %rdi, %rbx
  96         movaps  %xmm0, 32(%esp)
  97         call    JUMPTARGET(\callee)
  98         movups  36(%esp), %xmm0
  99         leal    4(%rbp), %esi
 100         leal    4(%rbx), %edi
 101         call    JUMPTARGET(\callee)
 102         movups  40(%esp), %xmm0
 103         leal    8(%rbp), %esi
 104         leal    8(%rbx), %edi
 105         call    JUMPTARGET(\callee)
 106         movups  44(%esp), %xmm0
 107         leal    12(%rbp), %esi
 108         leal    12(%rbx), %edi
 109         call    JUMPTARGET(\callee)
 110         movq    (%esp), %rax
 111         movss   48(%esp), %xmm0
 112         movdqa  (%esp), %xmm4
 113         movdqa  16(%esp), %xmm7
 114         movss   %xmm0, (%eax)
 115         movss   52(%esp), %xmm0
 116         pextrd  $1, %xmm4, %eax
 117         movss   %xmm0, (%eax)
 118         movq    8(%esp), %rax
 119         movss   56(%esp), %xmm0
 120         movss   %xmm0, (%eax)
 121         movss   60(%esp), %xmm0
 122         pextrd  $3, %xmm4, %eax
 123         movss   %xmm0, (%eax)
 124         movq    16(%esp), %rax
 125         movss   64(%esp), %xmm0
 126         movss   %xmm0, (%eax)
 127         movss   68(%esp), %xmm0
 128         pextrd  $1, %xmm7, %eax
 129         movss   %xmm0, (%eax)
 130         movq    24(%esp), %rax
 131         movss   72(%esp), %xmm0
 132         movss   %xmm0, (%eax)
 133         movss   76(%esp), %xmm0
 134         pextrd  $3, %xmm7, %eax
 135         movss   %xmm0, (%eax)
 136         addl    $88, %esp
 137         .cfi_def_cfa_offset 24
 138         popq    %rbx
 139         .cfi_def_cfa_offset 16
 140         popq    %rbp
 141         .cfi_def_cfa_offset 8
 142         ret
 143 #endif
 144 .endm
 145
 146 ENTRY (_ZGVbN4vvv_sincosf)
 147 WRAPPER_IMPL_SSE2_fFF_vvv sincosf
 148 END (_ZGVbN4vvv_sincosf)
 149
 150 #ifndef USE_MULTIARCH
 151  libmvec_hidden_def (_ZGVbN4vvv_sincosf)
 152 #endif