target/arm/vec_internal.h

   1 /*
   2  * ARM AdvSIMD / SVE Vector Helpers
   3  *
   4  * Copyright (c) 2020 Linaro
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #ifndef TARGET_ARM_VEC_INTERNALS_H
  21 #define TARGET_ARM_VEC_INTERNALS_H
  22
  23 /*
  24  * Note that vector data is stored in host-endian 64-bit chunks,
  25  * so addressing units smaller than that needs a host-endian fixup.
  26  *
  27  * The H<N> macros are used when indexing an array of elements of size N.
  28  *
  29  * The H1_<N> macros are used when performing byte arithmetic and then
  30  * casting the final pointer to a type of size N.
  31  */
  32 #ifdef HOST_WORDS_BIGENDIAN
  33 #define H1(x)   ((x) ^ 7)
  34 #define H1_2(x) ((x) ^ 6)
  35 #define H1_4(x) ((x) ^ 4)
  36 #define H2(x)   ((x) ^ 3)
  37 #define H4(x)   ((x) ^ 1)
  38 #else
  39 #define H1(x)   (x)
  40 #define H1_2(x) (x)
  41 #define H1_4(x) (x)
  42 #define H2(x)   (x)
  43 #define H4(x)   (x)
  44 #endif
  45
  46
  47 static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
  48 {
  49     uint64_t *d = vd + opr_sz;
  50     uintptr_t i;
  51
  52     for (i = opr_sz; i < max_sz; i += 8) {
  53         *d++ = 0;
  54     }
  55 }
  56
  57 static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits,
  58                                     bool round, uint32_t *sat)
  59 {
  60     if (shift <= -bits) {
  61         /* Rounding the sign bit always produces 0. */
  62         if (round) {
  63             return 0;
  64         }
  65         return src >> 31;
  66     } else if (shift < 0) {
  67         if (round) {
  68             src >>= -shift - 1;
  69             return (src >> 1) + (src & 1);
  70         }
  71         return src >> -shift;
  72     } else if (shift < bits) {
  73         int32_t val = src << shift;
  74         if (bits == 32) {
  75             if (!sat || val >> shift == src) {
  76                 return val;
  77             }
  78         } else {
  79             int32_t extval = sextract32(val, 0, bits);
  80             if (!sat || val == extval) {
  81                 return extval;
  82             }
  83         }
  84     } else if (!sat || src == 0) {
  85         return 0;
  86     }
  87
  88     *sat = 1;
  89     return (1u << (bits - 1)) - (src >= 0);
  90 }
  91
  92 static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits,
  93                                      bool round, uint32_t *sat)
  94 {
  95     if (shift <= -(bits + round)) {
  96         return 0;
  97     } else if (shift < 0) {
  98         if (round) {
  99             src >>= -shift - 1;
 100             return (src >> 1) + (src & 1);
 101         }
 102         return src >> -shift;
 103     } else if (shift < bits) {
 104         uint32_t val = src << shift;
 105         if (bits == 32) {
 106             if (!sat || val >> shift == src) {
 107                 return val;
 108             }
 109         } else {
 110             uint32_t extval = extract32(val, 0, bits);
 111             if (!sat || val == extval) {
 112                 return extval;
 113             }
 114         }
 115     } else if (!sat || src == 0) {
 116         return 0;
 117     }
 118
 119     *sat = 1;
 120     return MAKE_64BIT_MASK(0, bits);
 121 }
 122
 123 static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits,
 124                                      bool round, uint32_t *sat)
 125 {
 126     if (sat && src < 0) {
 127         *sat = 1;
 128         return 0;
 129     }
 130     return do_uqrshl_bhs(src, shift, bits, round, sat);
 131 }
 132
 133 static inline int64_t do_sqrshl_d(int64_t src, int64_t shift,
 134                                   bool round, uint32_t *sat)
 135 {
 136     if (shift <= -64) {
 137         /* Rounding the sign bit always produces 0. */
 138         if (round) {
 139             return 0;
 140         }
 141         return src >> 63;
 142     } else if (shift < 0) {
 143         if (round) {
 144             src >>= -shift - 1;
 145             return (src >> 1) + (src & 1);
 146         }
 147         return src >> -shift;
 148     } else if (shift < 64) {
 149         int64_t val = src << shift;
 150         if (!sat || val >> shift == src) {
 151             return val;
 152         }
 153     } else if (!sat || src == 0) {
 154         return 0;
 155     }
 156
 157     *sat = 1;
 158     return src < 0 ? INT64_MIN : INT64_MAX;
 159 }
 160
 161 static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift,
 162                                    bool round, uint32_t *sat)
 163 {
 164     if (shift <= -(64 + round)) {
 165         return 0;
 166     } else if (shift < 0) {
 167         if (round) {
 168             src >>= -shift - 1;
 169             return (src >> 1) + (src & 1);
 170         }
 171         return src >> -shift;
 172     } else if (shift < 64) {
 173         uint64_t val = src << shift;
 174         if (!sat || val >> shift == src) {
 175             return val;
 176         }
 177     } else if (!sat || src == 0) {
 178         return 0;
 179     }
 180
 181     *sat = 1;
 182     return UINT64_MAX;
 183 }
 184
 185 static inline int64_t do_suqrshl_d(int64_t src, int64_t shift,
 186                                    bool round, uint32_t *sat)
 187 {
 188     if (sat && src < 0) {
 189         *sat = 1;
 190         return 0;
 191     }
 192     return do_uqrshl_d(src, shift, round, sat);
 193 }
 194
 195 int8_t do_sqrdmlah_b(int8_t, int8_t, int8_t, bool, bool);
 196 int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
 197 int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
 198 int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
 199
 200 #endif /* TARGET_ARM_VEC_INTERNALS_H */