target/arm/tcg/gengvec64.c

   1 /*
   2  *  AArch64 generic vector expansion
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "translate.h"
  22 #include "translate-a64.h"
  23
  24
  25 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
  26 {
  27     tcg_gen_rotli_i64(d, m, 1);
  28     tcg_gen_xor_i64(d, d, n);
  29 }
  30
  31 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
  32 {
  33     tcg_gen_rotli_vec(vece, d, m, 1);
  34     tcg_gen_xor_vec(vece, d, d, n);
  35 }
  36
  37 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
  38                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
  39 {
  40     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
  41     static const GVecGen3 op = {
  42         .fni8 = gen_rax1_i64,
  43         .fniv = gen_rax1_vec,
  44         .opt_opc = vecop_list,
  45         .fno = gen_helper_crypto_rax1,
  46         .vece = MO_64,
  47     };
  48     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
  49 }
  50
  51 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
  52 {
  53     TCGv_i64 t = tcg_temp_new_i64();
  54     uint64_t mask = dup_const(MO_8, 0xff >> sh);
  55
  56     tcg_gen_xor_i64(t, n, m);
  57     tcg_gen_shri_i64(d, t, sh);
  58     tcg_gen_shli_i64(t, t, 8 - sh);
  59     tcg_gen_andi_i64(d, d, mask);
  60     tcg_gen_andi_i64(t, t, ~mask);
  61     tcg_gen_or_i64(d, d, t);
  62 }
  63
  64 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
  65 {
  66     TCGv_i64 t = tcg_temp_new_i64();
  67     uint64_t mask = dup_const(MO_16, 0xffff >> sh);
  68
  69     tcg_gen_xor_i64(t, n, m);
  70     tcg_gen_shri_i64(d, t, sh);
  71     tcg_gen_shli_i64(t, t, 16 - sh);
  72     tcg_gen_andi_i64(d, d, mask);
  73     tcg_gen_andi_i64(t, t, ~mask);
  74     tcg_gen_or_i64(d, d, t);
  75 }
  76
  77 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
  78 {
  79     tcg_gen_xor_i32(d, n, m);
  80     tcg_gen_rotri_i32(d, d, sh);
  81 }
  82
  83 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
  84 {
  85     tcg_gen_xor_i64(d, n, m);
  86     tcg_gen_rotri_i64(d, d, sh);
  87 }
  88
  89 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
  90                         TCGv_vec m, int64_t sh)
  91 {
  92     tcg_gen_xor_vec(vece, d, n, m);
  93     tcg_gen_rotri_vec(vece, d, d, sh);
  94 }
  95
  96 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
  97                   uint32_t rm_ofs, int64_t shift,
  98                   uint32_t opr_sz, uint32_t max_sz)
  99 {
 100     static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
 101     static const GVecGen3i ops[4] = {
 102         { .fni8 = gen_xar8_i64,
 103           .fniv = gen_xar_vec,
 104           .fno = gen_helper_sve2_xar_b,
 105           .opt_opc = vecop,
 106           .vece = MO_8 },
 107         { .fni8 = gen_xar16_i64,
 108           .fniv = gen_xar_vec,
 109           .fno = gen_helper_sve2_xar_h,
 110           .opt_opc = vecop,
 111           .vece = MO_16 },
 112         { .fni4 = gen_xar_i32,
 113           .fniv = gen_xar_vec,
 114           .fno = gen_helper_sve2_xar_s,
 115           .opt_opc = vecop,
 116           .vece = MO_32 },
 117         { .fni8 = gen_xar_i64,
 118           .fniv = gen_xar_vec,
 119           .fno = gen_helper_gvec_xar_d,
 120           .opt_opc = vecop,
 121           .vece = MO_64 }
 122     };
 123     int esize = 8 << vece;
 124
 125     /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
 126     tcg_debug_assert(shift >= 0);
 127     tcg_debug_assert(shift <= esize);
 128     shift &= esize - 1;
 129
 130     if (shift == 0) {
 131         /* xar with no rotate devolves to xor. */
 132         tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
 133     } else {
 134         tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
 135                         shift, &ops[vece]);
 136     }
 137 }
 138
 139 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 140 {
 141     tcg_gen_xor_i64(d, n, m);
 142     tcg_gen_xor_i64(d, d, k);
 143 }
 144
 145 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 146                          TCGv_vec m, TCGv_vec k)
 147 {
 148     tcg_gen_xor_vec(vece, d, n, m);
 149     tcg_gen_xor_vec(vece, d, d, k);
 150 }
 151
 152 void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 153                    uint32_t a, uint32_t oprsz, uint32_t maxsz)
 154 {
 155     static const GVecGen4 op = {
 156         .fni8 = gen_eor3_i64,
 157         .fniv = gen_eor3_vec,
 158         .fno = gen_helper_sve2_eor3,
 159         .vece = MO_64,
 160         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 161     };
 162     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 163 }
 164
 165 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 166 {
 167     tcg_gen_andc_i64(d, m, k);
 168     tcg_gen_xor_i64(d, d, n);
 169 }
 170
 171 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 172                          TCGv_vec m, TCGv_vec k)
 173 {
 174     tcg_gen_andc_vec(vece, d, m, k);
 175     tcg_gen_xor_vec(vece, d, d, n);
 176 }
 177
 178 void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 179                    uint32_t a, uint32_t oprsz, uint32_t maxsz)
 180 {
 181     static const GVecGen4 op = {
 182         .fni8 = gen_bcax_i64,
 183         .fniv = gen_bcax_vec,
 184         .fno = gen_helper_sve2_bcax,
 185         .vece = MO_64,
 186         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 187     };
 188     tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 189 }
 190
 191 /*
 192  * Set @res to the correctly saturated result.
 193  * Set @qc non-zero if saturation occured.
 194  */
 195 void gen_suqadd_bhs(TCGv_i64 res, TCGv_i64 qc,
 196                     TCGv_i64 a, TCGv_i64 b, MemOp esz)
 197 {
 198     TCGv_i64 max = tcg_constant_i64((1ull << ((8 << esz) - 1)) - 1);
 199     TCGv_i64 t = tcg_temp_new_i64();
 200
 201     tcg_gen_add_i64(t, a, b);
 202     tcg_gen_smin_i64(res, t, max);
 203     tcg_gen_xor_i64(t, t, res);
 204     tcg_gen_or_i64(qc, qc, t);
 205 }
 206
 207 void gen_suqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
 208 {
 209     TCGv_i64 max = tcg_constant_i64(INT64_MAX);
 210     TCGv_i64 t = tcg_temp_new_i64();
 211
 212     /* Maximum value that can be added to @a without overflow. */
 213     tcg_gen_sub_i64(t, max, a);
 214
 215     /* Constrain addend so that the next addition never overflows. */
 216     tcg_gen_umin_i64(t, t, b);
 217     tcg_gen_add_i64(res, a, t);
 218
 219     tcg_gen_xor_i64(t, t, b);
 220     tcg_gen_or_i64(qc, qc, t);
 221 }
 222
 223 static void gen_suqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
 224                            TCGv_vec a, TCGv_vec b)
 225 {
 226     TCGv_vec max =
 227         tcg_constant_vec_matching(t, vece, (1ull << ((8 << vece) - 1)) - 1);
 228     TCGv_vec u = tcg_temp_new_vec_matching(t);
 229
 230     /* Maximum value that can be added to @a without overflow. */
 231     tcg_gen_sub_vec(vece, u, max, a);
 232
 233     /* Constrain addend so that the next addition never overflows. */
 234     tcg_gen_umin_vec(vece, u, u, b);
 235     tcg_gen_add_vec(vece, t, u, a);
 236
 237     /* Compute QC by comparing the adjusted @b. */
 238     tcg_gen_xor_vec(vece, u, u, b);
 239     tcg_gen_or_vec(vece, qc, qc, u);
 240 }
 241
 242 void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs,
 243                         uint32_t rn_ofs, uint32_t rm_ofs,
 244                         uint32_t opr_sz, uint32_t max_sz)
 245 {
 246     static const TCGOpcode vecop_list[] = {
 247         INDEX_op_add_vec, INDEX_op_sub_vec, INDEX_op_umin_vec, 0
 248     };
 249     static const GVecGen4 ops[4] = {
 250         { .fniv = gen_suqadd_vec,
 251           .fno = gen_helper_gvec_suqadd_b,
 252           .opt_opc = vecop_list,
 253           .write_aofs = true,
 254           .vece = MO_8 },
 255         { .fniv = gen_suqadd_vec,
 256           .fno = gen_helper_gvec_suqadd_h,
 257           .opt_opc = vecop_list,
 258           .write_aofs = true,
 259           .vece = MO_16 },
 260         { .fniv = gen_suqadd_vec,
 261           .fno = gen_helper_gvec_suqadd_s,
 262           .opt_opc = vecop_list,
 263           .write_aofs = true,
 264           .vece = MO_32 },
 265         { .fniv = gen_suqadd_vec,
 266           .fni8 = gen_suqadd_d,
 267           .fno = gen_helper_gvec_suqadd_d,
 268           .opt_opc = vecop_list,
 269           .write_aofs = true,
 270           .vece = MO_64 },
 271     };
 272
 273     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
 274     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
 275                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
 276 }
 277
 278 void gen_usqadd_bhs(TCGv_i64 res, TCGv_i64 qc,
 279                     TCGv_i64 a, TCGv_i64 b, MemOp esz)
 280 {
 281     TCGv_i64 max = tcg_constant_i64(MAKE_64BIT_MASK(0, 8 << esz));
 282     TCGv_i64 zero = tcg_constant_i64(0);
 283     TCGv_i64 tmp = tcg_temp_new_i64();
 284
 285     tcg_gen_add_i64(tmp, a, b);
 286     tcg_gen_smin_i64(res, tmp, max);
 287     tcg_gen_smax_i64(res, res, zero);
 288     tcg_gen_xor_i64(tmp, tmp, res);
 289     tcg_gen_or_i64(qc, qc, tmp);
 290 }
 291
 292 void gen_usqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
 293 {
 294     TCGv_i64 tmp = tcg_temp_new_i64();
 295     TCGv_i64 tneg = tcg_temp_new_i64();
 296     TCGv_i64 tpos = tcg_temp_new_i64();
 297     TCGv_i64 max = tcg_constant_i64(UINT64_MAX);
 298     TCGv_i64 zero = tcg_constant_i64(0);
 299
 300     tcg_gen_add_i64(tmp, a, b);
 301
 302     /* If @b is positive, saturate if (a + b) < a, aka unsigned overflow. */
 303     tcg_gen_movcond_i64(TCG_COND_LTU, tpos, tmp, a, max, tmp);
 304
 305     /* If @b is negative, saturate if a < -b, ie subtraction is negative. */
 306     tcg_gen_neg_i64(tneg, b);
 307     tcg_gen_movcond_i64(TCG_COND_LTU, tneg, a, tneg, zero, tmp);
 308
 309     /* Select correct result from sign of @b. */
 310     tcg_gen_movcond_i64(TCG_COND_LT, res, b, zero, tneg, tpos);
 311     tcg_gen_xor_i64(tmp, tmp, res);
 312     tcg_gen_or_i64(qc, qc, tmp);
 313 }
 314
 315 static void gen_usqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
 316                            TCGv_vec a, TCGv_vec b)
 317 {
 318     TCGv_vec u = tcg_temp_new_vec_matching(t);
 319     TCGv_vec z = tcg_constant_vec_matching(t, vece, 0);
 320
 321     /* Compute unsigned saturation of add for +b and sub for -b. */
 322     tcg_gen_neg_vec(vece, t, b);
 323     tcg_gen_usadd_vec(vece, u, a, b);
 324     tcg_gen_ussub_vec(vece, t, a, t);
 325
 326     /* Select the correct result depending on the sign of b. */
 327     tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, b, z, t, u);
 328
 329     /* Compute QC by comparing against the non-saturated result. */
 330     tcg_gen_add_vec(vece, u, a, b);
 331     tcg_gen_xor_vec(vece, u, u, t);
 332     tcg_gen_or_vec(vece, qc, qc, u);
 333 }
 334
 335 void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs,
 336                         uint32_t rn_ofs, uint32_t rm_ofs,
 337                         uint32_t opr_sz, uint32_t max_sz)
 338 {
 339     static const TCGOpcode vecop_list[] = {
 340         INDEX_op_neg_vec, INDEX_op_add_vec,
 341         INDEX_op_usadd_vec, INDEX_op_ussub_vec,
 342         INDEX_op_cmpsel_vec, 0
 343     };
 344     static const GVecGen4 ops[4] = {
 345         { .fniv = gen_usqadd_vec,
 346           .fno = gen_helper_gvec_usqadd_b,
 347           .opt_opc = vecop_list,
 348           .write_aofs = true,
 349           .vece = MO_8 },
 350         { .fniv = gen_usqadd_vec,
 351           .fno = gen_helper_gvec_usqadd_h,
 352           .opt_opc = vecop_list,
 353           .write_aofs = true,
 354           .vece = MO_16 },
 355         { .fniv = gen_usqadd_vec,
 356           .fno = gen_helper_gvec_usqadd_s,
 357           .opt_opc = vecop_list,
 358           .write_aofs = true,
 359           .vece = MO_32 },
 360         { .fniv = gen_usqadd_vec,
 361           .fni8 = gen_usqadd_d,
 362           .fno = gen_helper_gvec_usqadd_d,
 363           .opt_opc = vecop_list,
 364           .write_aofs = true,
 365           .vece = MO_64 },
 366     };
 367
 368     tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
 369     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
 370                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
 371 }