sysdeps/aarch64/fpu/sv_math.h

   1 /* Utilities for SVE libmvec routines.
   2    Copyright (C) 2023-2024 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #ifndef SV_MATH_H
  20 #define SV_MATH_H
  21
  22 #include <arm_sve.h>
  23 #include <stdbool.h>
  24
  25 #include "vecmath_config.h"
  26
  27 #define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
  28 #define SV_NAME_D1(fun) _ZGVsMxv_##fun
  29 #define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
  30 #define SV_NAME_D2(fun) _ZGVsMxvv_##fun
  31
  32 /* Double precision.  */
  33 static inline svint64_t
  34 sv_s64 (int64_t x)
  35 {
  36   return svdup_n_s64 (x);
  37 }
  38
  39 static inline svuint64_t
  40 sv_u64 (uint64_t x)
  41 {
  42   return svdup_n_u64 (x);
  43 }
  44
  45 static inline svfloat64_t
  46 sv_f64 (double x)
  47 {
  48   return svdup_n_f64 (x);
  49 }
  50
  51 static inline svfloat64_t
  52 sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp)
  53 {
  54   svbool_t p = svpfirst (cmp, svpfalse ());
  55   while (svptest_any (cmp, p))
  56     {
  57       double elem = svclastb_n_f64 (p, 0, x);
  58       elem = (*f) (elem);
  59       svfloat64_t y2 = svdup_n_f64 (elem);
  60       y = svsel_f64 (p, y2, y);
  61       p = svpnext_b64 (cmp, p);
  62     }
  63   return y;
  64 }
  65
  66 static inline svfloat64_t
  67 sv_call2_f64 (double (*f) (double, double), svfloat64_t x1, svfloat64_t x2,
  68               svfloat64_t y, svbool_t cmp)
  69 {
  70   svbool_t p = svpfirst (cmp, svpfalse ());
  71   while (svptest_any (cmp, p))
  72     {
  73       double elem1 = svclastb_n_f64 (p, 0, x1);
  74       double elem2 = svclastb_n_f64 (p, 0, x2);
  75       double ret = (*f) (elem1, elem2);
  76       svfloat64_t y2 = svdup_n_f64 (ret);
  77       y = svsel_f64 (p, y2, y);
  78       p = svpnext_b64 (cmp, p);
  79     }
  80   return y;
  81 }
  82
  83 static inline svuint64_t
  84 sv_mod_n_u64_x (svbool_t pg, svuint64_t x, uint64_t y)
  85 {
  86   svuint64_t q = svdiv_n_u64_x (pg, x, y);
  87   return svmls_n_u64_x (pg, x, q, y);
  88 }
  89
  90 /* Single precision.  */
  91 static inline svint32_t
  92 sv_s32 (int32_t x)
  93 {
  94   return svdup_n_s32 (x);
  95 }
  96
  97 static inline svuint32_t
  98 sv_u32 (uint32_t x)
  99 {
 100   return svdup_n_u32 (x);
 101 }
 102
 103 static inline svfloat32_t
 104 sv_f32 (float x)
 105 {
 106   return svdup_n_f32 (x);
 107 }
 108
 109 static inline svfloat32_t
 110 sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp)
 111 {
 112   svbool_t p = svpfirst (cmp, svpfalse ());
 113   while (svptest_any (cmp, p))
 114     {
 115       float elem = svclastb_n_f32 (p, 0, x);
 116       elem = f (elem);
 117       svfloat32_t y2 = svdup_n_f32 (elem);
 118       y = svsel_f32 (p, y2, y);
 119       p = svpnext_b32 (cmp, p);
 120     }
 121   return y;
 122 }
 123
 124 static inline svfloat32_t
 125 sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2,
 126               svfloat32_t y, svbool_t cmp)
 127 {
 128   svbool_t p = svpfirst (cmp, svpfalse ());
 129   while (svptest_any (cmp, p))
 130     {
 131       float elem1 = svclastb_n_f32 (p, 0, x1);
 132       float elem2 = svclastb_n_f32 (p, 0, x2);
 133       float ret = f (elem1, elem2);
 134       svfloat32_t y2 = svdup_n_f32 (ret);
 135       y = svsel_f32 (p, y2, y);
 136       p = svpnext_b32 (cmp, p);
 137     }
 138   return y;
 139 }
 140
 141 #endif