tcg/tcg-op-vec.c

   1 /*
   2  * Tiny Code Generator for QEMU
   3  *
   4  * Copyright (c) 2018 Linaro, Inc.
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "qemu-common.h"
  22 #include "cpu.h"
  23 #include "tcg.h"
  24 #include "tcg-op.h"
  25 #include "tcg-mo.h"
  26
  27 /* Reduce the number of ifdefs below.  This assumes that all uses of
  28    TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
  29    the compiler can eliminate.  */
  30 #if TCG_TARGET_REG_BITS == 64
  31 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
  32 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
  33 #define TCGV_LOW  TCGV_LOW_link_error
  34 #define TCGV_HIGH TCGV_HIGH_link_error
  35 #endif
  36
  37 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
  38 {
  39     TCGOp *op = tcg_emit_op(opc);
  40     TCGOP_VECL(op) = type - TCG_TYPE_V64;
  41     TCGOP_VECE(op) = vece;
  42     op->args[0] = r;
  43     op->args[1] = a;
  44 }
  45
  46 void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
  47                TCGArg r, TCGArg a, TCGArg b)
  48 {
  49     TCGOp *op = tcg_emit_op(opc);
  50     TCGOP_VECL(op) = type - TCG_TYPE_V64;
  51     TCGOP_VECE(op) = vece;
  52     op->args[0] = r;
  53     op->args[1] = a;
  54     op->args[2] = b;
  55 }
  56
  57 void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
  58                TCGArg r, TCGArg a, TCGArg b, TCGArg c)
  59 {
  60     TCGOp *op = tcg_emit_op(opc);
  61     TCGOP_VECL(op) = type - TCG_TYPE_V64;
  62     TCGOP_VECE(op) = vece;
  63     op->args[0] = r;
  64     op->args[1] = a;
  65     op->args[2] = b;
  66     op->args[3] = c;
  67 }
  68
  69 static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
  70 {
  71     TCGTemp *rt = tcgv_vec_temp(r);
  72     TCGTemp *at = tcgv_vec_temp(a);
  73     TCGType type = rt->base_type;
  74
  75     /* Must enough inputs for the output.  */
  76     tcg_debug_assert(at->base_type >= type);
  77     vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
  78 }
  79
  80 static void vec_gen_op3(TCGOpcode opc, unsigned vece,
  81                         TCGv_vec r, TCGv_vec a, TCGv_vec b)
  82 {
  83     TCGTemp *rt = tcgv_vec_temp(r);
  84     TCGTemp *at = tcgv_vec_temp(a);
  85     TCGTemp *bt = tcgv_vec_temp(b);
  86     TCGType type = rt->base_type;
  87
  88     /* Must enough inputs for the output.  */
  89     tcg_debug_assert(at->base_type >= type);
  90     tcg_debug_assert(bt->base_type >= type);
  91     vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
  92 }
  93
  94 void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
  95 {
  96     if (r != a) {
  97         vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
  98     }
  99 }
 100
 101 #define MO_REG  (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32)
 102
 103 static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
 104 {
 105     TCGTemp *rt = tcgv_vec_temp(r);
 106     vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a);
 107 }
 108
 109 TCGv_vec tcg_const_zeros_vec(TCGType type)
 110 {
 111     TCGv_vec ret = tcg_temp_new_vec(type);
 112     do_dupi_vec(ret, MO_REG, 0);
 113     return ret;
 114 }
 115
 116 TCGv_vec tcg_const_ones_vec(TCGType type)
 117 {
 118     TCGv_vec ret = tcg_temp_new_vec(type);
 119     do_dupi_vec(ret, MO_REG, -1);
 120     return ret;
 121 }
 122
 123 TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
 124 {
 125     TCGTemp *t = tcgv_vec_temp(m);
 126     return tcg_const_zeros_vec(t->base_type);
 127 }
 128
 129 TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
 130 {
 131     TCGTemp *t = tcgv_vec_temp(m);
 132     return tcg_const_ones_vec(t->base_type);
 133 }
 134
 135 void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
 136 {
 137     if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
 138         do_dupi_vec(r, MO_32, a);
 139     } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
 140         do_dupi_vec(r, MO_64, a);
 141     } else {
 142         TCGv_i64 c = tcg_const_i64(a);
 143         tcg_gen_dup_i64_vec(MO_64, r, c);
 144         tcg_temp_free_i64(c);
 145     }
 146 }
 147
 148 void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
 149 {
 150     do_dupi_vec(r, MO_REG, dup_const(MO_32, a));
 151 }
 152
 153 void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
 154 {
 155     do_dupi_vec(r, MO_REG, dup_const(MO_16, a));
 156 }
 157
 158 void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
 159 {
 160     do_dupi_vec(r, MO_REG, dup_const(MO_8, a));
 161 }
 162
 163 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
 164 {
 165     do_dupi_vec(r, MO_REG, dup_const(vece, a));
 166 }
 167
 168 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
 169 {
 170     TCGArg ri = tcgv_vec_arg(r);
 171     TCGTemp *rt = arg_temp(ri);
 172     TCGType type = rt->base_type;
 173
 174     if (TCG_TARGET_REG_BITS == 64) {
 175         TCGArg ai = tcgv_i64_arg(a);
 176         vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 177     } else if (vece == MO_64) {
 178         TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
 179         TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
 180         vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
 181     } else {
 182         TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
 183         vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 184     }
 185 }
 186
 187 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
 188 {
 189     TCGArg ri = tcgv_vec_arg(r);
 190     TCGArg ai = tcgv_i32_arg(a);
 191     TCGTemp *rt = arg_temp(ri);
 192     TCGType type = rt->base_type;
 193
 194     vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 195 }
 196
 197 static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
 198 {
 199     TCGArg ri = tcgv_vec_arg(r);
 200     TCGArg bi = tcgv_ptr_arg(b);
 201     TCGTemp *rt = arg_temp(ri);
 202     TCGType type = rt->base_type;
 203
 204     vec_gen_3(opc, type, 0, ri, bi, o);
 205 }
 206
 207 void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 208 {
 209     vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
 210 }
 211
 212 void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 213 {
 214     vec_gen_ldst(INDEX_op_st_vec, r, b, o);
 215 }
 216
 217 void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
 218 {
 219     TCGArg ri = tcgv_vec_arg(r);
 220     TCGArg bi = tcgv_ptr_arg(b);
 221     TCGTemp *rt = arg_temp(ri);
 222     TCGType type = rt->base_type;
 223
 224     tcg_debug_assert(low_type >= TCG_TYPE_V64);
 225     tcg_debug_assert(low_type <= type);
 226     vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
 227 }
 228
 229 void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 230 {
 231     vec_gen_op3(INDEX_op_add_vec, vece, r, a, b);
 232 }
 233
 234 void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 235 {
 236     vec_gen_op3(INDEX_op_sub_vec, vece, r, a, b);
 237 }
 238
 239 void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 240 {
 241     vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
 242 }
 243
 244 void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 245 {
 246     vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
 247 }
 248
 249 void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 250 {
 251     vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
 252 }
 253
 254 void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 255 {
 256     if (TCG_TARGET_HAS_andc_vec) {
 257         vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
 258     } else {
 259         TCGv_vec t = tcg_temp_new_vec_matching(r);
 260         tcg_gen_not_vec(0, t, b);
 261         tcg_gen_and_vec(0, r, a, t);
 262         tcg_temp_free_vec(t);
 263     }
 264 }
 265
 266 void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 267 {
 268     if (TCG_TARGET_HAS_orc_vec) {
 269         vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
 270     } else {
 271         TCGv_vec t = tcg_temp_new_vec_matching(r);
 272         tcg_gen_not_vec(0, t, b);
 273         tcg_gen_or_vec(0, r, a, t);
 274         tcg_temp_free_vec(t);
 275     }
 276 }
 277
 278 void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 279 {
 280     /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */
 281     tcg_gen_and_vec(0, r, a, b);
 282     tcg_gen_not_vec(0, r, r);
 283 }
 284
 285 void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 286 {
 287     /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */
 288     tcg_gen_or_vec(0, r, a, b);
 289     tcg_gen_not_vec(0, r, r);
 290 }
 291
 292 void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 293 {
 294     /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */
 295     tcg_gen_xor_vec(0, r, a, b);
 296     tcg_gen_not_vec(0, r, r);
 297 }
 298
 299 void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 300 {
 301     if (TCG_TARGET_HAS_not_vec) {
 302         vec_gen_op2(INDEX_op_not_vec, 0, r, a);
 303     } else {
 304         TCGv_vec t = tcg_const_ones_vec_matching(r);
 305         tcg_gen_xor_vec(0, r, a, t);
 306         tcg_temp_free_vec(t);
 307     }
 308 }
 309
 310 void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 311 {
 312     if (TCG_TARGET_HAS_neg_vec) {
 313         vec_gen_op2(INDEX_op_neg_vec, vece, r, a);
 314     } else {
 315         TCGv_vec t = tcg_const_zeros_vec_matching(r);
 316         tcg_gen_sub_vec(vece, r, t, a);
 317         tcg_temp_free_vec(t);
 318     }
 319 }
 320
 321 static void do_shifti(TCGOpcode opc, unsigned vece,
 322                       TCGv_vec r, TCGv_vec a, int64_t i)
 323 {
 324     TCGTemp *rt = tcgv_vec_temp(r);
 325     TCGTemp *at = tcgv_vec_temp(a);
 326     TCGArg ri = temp_arg(rt);
 327     TCGArg ai = temp_arg(at);
 328     TCGType type = rt->base_type;
 329     int can;
 330
 331     tcg_debug_assert(at->base_type == type);
 332     tcg_debug_assert(i >= 0 && i < (8 << vece));
 333
 334     if (i == 0) {
 335         tcg_gen_mov_vec(r, a);
 336         return;
 337     }
 338
 339     can = tcg_can_emit_vec_op(opc, type, vece);
 340     if (can > 0) {
 341         vec_gen_3(opc, type, vece, ri, ai, i);
 342     } else {
 343         /* We leave the choice of expansion via scalar or vector shift
 344            to the target.  Often, but not always, dupi can feed a vector
 345            shift easier than a scalar.  */
 346         tcg_debug_assert(can < 0);
 347         tcg_expand_vec_op(opc, type, vece, ri, ai, i);
 348     }
 349 }
 350
 351 void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 352 {
 353     do_shifti(INDEX_op_shli_vec, vece, r, a, i);
 354 }
 355
 356 void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 357 {
 358     do_shifti(INDEX_op_shri_vec, vece, r, a, i);
 359 }
 360
 361 void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 362 {
 363     do_shifti(INDEX_op_sari_vec, vece, r, a, i);
 364 }
 365
 366 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
 367                      TCGv_vec r, TCGv_vec a, TCGv_vec b)
 368 {
 369     TCGTemp *rt = tcgv_vec_temp(r);
 370     TCGTemp *at = tcgv_vec_temp(a);
 371     TCGTemp *bt = tcgv_vec_temp(b);
 372     TCGArg ri = temp_arg(rt);
 373     TCGArg ai = temp_arg(at);
 374     TCGArg bi = temp_arg(bt);
 375     TCGType type = rt->base_type;
 376     int can;
 377
 378     tcg_debug_assert(at->base_type >= type);
 379     tcg_debug_assert(bt->base_type >= type);
 380     can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
 381     if (can > 0) {
 382         vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 383     } else {
 384         tcg_debug_assert(can < 0);
 385         tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 386     }
 387 }
 388
 389 static void do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
 390                    TCGv_vec b, TCGOpcode opc)
 391 {
 392     TCGTemp *rt = tcgv_vec_temp(r);
 393     TCGTemp *at = tcgv_vec_temp(a);
 394     TCGTemp *bt = tcgv_vec_temp(b);
 395     TCGArg ri = temp_arg(rt);
 396     TCGArg ai = temp_arg(at);
 397     TCGArg bi = temp_arg(bt);
 398     TCGType type = rt->base_type;
 399     int can;
 400
 401     tcg_debug_assert(at->base_type >= type);
 402     tcg_debug_assert(bt->base_type >= type);
 403     can = tcg_can_emit_vec_op(opc, type, vece);
 404     if (can > 0) {
 405         vec_gen_3(opc, type, vece, ri, ai, bi);
 406     } else {
 407         tcg_debug_assert(can < 0);
 408         tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
 409     }
 410 }
 411
 412 void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 413 {
 414     do_op3(vece, r, a, b, INDEX_op_mul_vec);
 415 }
 416
 417 void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 418 {
 419     do_op3(vece, r, a, b, INDEX_op_ssadd_vec);
 420 }
 421
 422 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 423 {
 424     do_op3(vece, r, a, b, INDEX_op_usadd_vec);
 425 }
 426
 427 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 428 {
 429     do_op3(vece, r, a, b, INDEX_op_sssub_vec);
 430 }
 431
 432 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 433 {
 434     do_op3(vece, r, a, b, INDEX_op_ussub_vec);
 435 }
 436
 437 void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 438 {
 439     do_op3(vece, r, a, b, INDEX_op_smin_vec);
 440 }
 441
 442 void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 443 {
 444     do_op3(vece, r, a, b, INDEX_op_umin_vec);
 445 }
 446
 447 void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 448 {
 449     do_op3(vece, r, a, b, INDEX_op_smax_vec);
 450 }
 451
 452 void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 453 {
 454     do_op3(vece, r, a, b, INDEX_op_umax_vec);
 455 }