tcg/optimize.c

   1 /*
   2  * Optimizations for Tiny Code Generator for QEMU
   3  *
   4  * Copyright (c) 2010 Samsung Electronics.
   5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a copy
   8  * of this software and associated documentation files (the "Software"), to deal
   9  * in the Software without restriction, including without limitation the rights
  10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11  * copies of the Software, and to permit persons to whom the Software is
  12  * furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included in
  15  * all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23  * THE SOFTWARE.
  24  */
  25
  26 #include "qemu/osdep.h"
  27 #include "qemu/int128.h"
  28 #include "tcg/tcg-op.h"
  29 #include "tcg-internal.h"
  30
  31 #define CASE_OP_32_64(x)                        \
  32         glue(glue(case INDEX_op_, x), _i32):    \
  33         glue(glue(case INDEX_op_, x), _i64)
  34
  35 #define CASE_OP_32_64_VEC(x)                    \
  36         glue(glue(case INDEX_op_, x), _i32):    \
  37         glue(glue(case INDEX_op_, x), _i64):    \
  38         glue(glue(case INDEX_op_, x), _vec)
  39
  40 typedef struct TempOptInfo {
  41     bool is_const;
  42     TCGTemp *prev_copy;
  43     TCGTemp *next_copy;
  44     uint64_t val;
  45     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
  46     uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
  47 } TempOptInfo;
  48
  49 typedef struct OptContext {
  50     TCGContext *tcg;
  51     TCGOp *prev_mb;
  52     TCGTempSet temps_used;
  53
  54     /* In flight values from optimization. */
  55     uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
  56     uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
  57     uint64_t s_mask;  /* mask of clrsb(value) bits */
  58     TCGType type;
  59 } OptContext;
  60
  61 /* Calculate the smask for a specific value. */
  62 static uint64_t smask_from_value(uint64_t value)
  63 {
  64     int rep = clrsb64(value);
  65     return ~(~0ull >> rep);
  66 }
  67
  68 /*
  69  * Calculate the smask for a given set of known-zeros.
  70  * If there are lots of zeros on the left, we can consider the remainder
  71  * an unsigned field, and thus the corresponding signed field is one bit
  72  * larger.
  73  */
  74 static uint64_t smask_from_zmask(uint64_t zmask)
  75 {
  76     /*
  77      * Only the 0 bits are significant for zmask, thus the msb itself
  78      * must be zero, else we have no sign information.
  79      */
  80     int rep = clz64(zmask);
  81     if (rep == 0) {
  82         return 0;
  83     }
  84     rep -= 1;
  85     return ~(~0ull >> rep);
  86 }
  87
  88 /*
  89  * Recreate a properly left-aligned smask after manipulation.
  90  * Some bit-shuffling, particularly shifts and rotates, may
  91  * retain sign bits on the left, but may scatter disconnected
  92  * sign bits on the right.  Retain only what remains to the left.
  93  */
  94 static uint64_t smask_from_smask(int64_t smask)
  95 {
  96     /* Only the 1 bits are significant for smask */
  97     return smask_from_zmask(~smask);
  98 }
  99
 100 static inline TempOptInfo *ts_info(TCGTemp *ts)
 101 {
 102     return ts->state_ptr;
 103 }
 104
 105 static inline TempOptInfo *arg_info(TCGArg arg)
 106 {
 107     return ts_info(arg_temp(arg));
 108 }
 109
 110 static inline bool ts_is_const(TCGTemp *ts)
 111 {
 112     return ts_info(ts)->is_const;
 113 }
 114
 115 static inline bool arg_is_const(TCGArg arg)
 116 {
 117     return ts_is_const(arg_temp(arg));
 118 }
 119
 120 static inline bool ts_is_copy(TCGTemp *ts)
 121 {
 122     return ts_info(ts)->next_copy != ts;
 123 }
 124
 125 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
 126 static void reset_ts(TCGTemp *ts)
 127 {
 128     TempOptInfo *ti = ts_info(ts);
 129     TempOptInfo *pi = ts_info(ti->prev_copy);
 130     TempOptInfo *ni = ts_info(ti->next_copy);
 131
 132     ni->prev_copy = ti->prev_copy;
 133     pi->next_copy = ti->next_copy;
 134     ti->next_copy = ts;
 135     ti->prev_copy = ts;
 136     ti->is_const = false;
 137     ti->z_mask = -1;
 138     ti->s_mask = 0;
 139 }
 140
 141 static void reset_temp(TCGArg arg)
 142 {
 143     reset_ts(arg_temp(arg));
 144 }
 145
 146 /* Initialize and activate a temporary.  */
 147 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
 148 {
 149     size_t idx = temp_idx(ts);
 150     TempOptInfo *ti;
 151
 152     if (test_bit(idx, ctx->temps_used.l)) {
 153         return;
 154     }
 155     set_bit(idx, ctx->temps_used.l);
 156
 157     ti = ts->state_ptr;
 158     if (ti == NULL) {
 159         ti = tcg_malloc(sizeof(TempOptInfo));
 160         ts->state_ptr = ti;
 161     }
 162
 163     ti->next_copy = ts;
 164     ti->prev_copy = ts;
 165     if (ts->kind == TEMP_CONST) {
 166         ti->is_const = true;
 167         ti->val = ts->val;
 168         ti->z_mask = ts->val;
 169         ti->s_mask = smask_from_value(ts->val);
 170     } else {
 171         ti->is_const = false;
 172         ti->z_mask = -1;
 173         ti->s_mask = 0;
 174     }
 175 }
 176
 177 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
 178 {
 179     TCGTemp *i, *g, *l;
 180
 181     /* If this is already readonly, we can't do better. */
 182     if (temp_readonly(ts)) {
 183         return ts;
 184     }
 185
 186     g = l = NULL;
 187     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
 188         if (temp_readonly(i)) {
 189             return i;
 190         } else if (i->kind > ts->kind) {
 191             if (i->kind == TEMP_GLOBAL) {
 192                 g = i;
 193             } else if (i->kind == TEMP_LOCAL) {
 194                 l = i;
 195             }
 196         }
 197     }
 198
 199     /* If we didn't find a better representation, return the same temp. */
 200     return g ? g : l ? l : ts;
 201 }
 202
 203 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
 204 {
 205     TCGTemp *i;
 206
 207     if (ts1 == ts2) {
 208         return true;
 209     }
 210
 211     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
 212         return false;
 213     }
 214
 215     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
 216         if (i == ts2) {
 217             return true;
 218         }
 219     }
 220
 221     return false;
 222 }
 223
 224 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
 225 {
 226     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 227 }
 228
 229 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 230 {
 231     TCGTemp *dst_ts = arg_temp(dst);
 232     TCGTemp *src_ts = arg_temp(src);
 233     TempOptInfo *di;
 234     TempOptInfo *si;
 235     TCGOpcode new_op;
 236
 237     if (ts_are_copies(dst_ts, src_ts)) {
 238         tcg_op_remove(ctx->tcg, op);
 239         return true;
 240     }
 241
 242     reset_ts(dst_ts);
 243     di = ts_info(dst_ts);
 244     si = ts_info(src_ts);
 245
 246     switch (ctx->type) {
 247     case TCG_TYPE_I32:
 248         new_op = INDEX_op_mov_i32;
 249         break;
 250     case TCG_TYPE_I64:
 251         new_op = INDEX_op_mov_i64;
 252         break;
 253     case TCG_TYPE_V64:
 254     case TCG_TYPE_V128:
 255     case TCG_TYPE_V256:
 256         /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
 257         new_op = INDEX_op_mov_vec;
 258         break;
 259     default:
 260         g_assert_not_reached();
 261     }
 262     op->opc = new_op;
 263     op->args[0] = dst;
 264     op->args[1] = src;
 265
 266     di->z_mask = si->z_mask;
 267     di->s_mask = si->s_mask;
 268
 269     if (src_ts->type == dst_ts->type) {
 270         TempOptInfo *ni = ts_info(si->next_copy);
 271
 272         di->next_copy = si->next_copy;
 273         di->prev_copy = src_ts;
 274         ni->prev_copy = dst_ts;
 275         si->next_copy = dst_ts;
 276         di->is_const = si->is_const;
 277         di->val = si->val;
 278     }
 279     return true;
 280 }
 281
 282 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
 283                              TCGArg dst, uint64_t val)
 284 {
 285     TCGTemp *tv;
 286
 287     if (ctx->type == TCG_TYPE_I32) {
 288         val = (int32_t)val;
 289     }
 290
 291     /* Convert movi to mov with constant temp. */
 292     tv = tcg_constant_internal(ctx->type, val);
 293     init_ts_info(ctx, tv);
 294     return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
 295 }
 296
 297 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
 298 {
 299     uint64_t l64, h64;
 300
 301     switch (op) {
 302     CASE_OP_32_64(add):
 303         return x + y;
 304
 305     CASE_OP_32_64(sub):
 306         return x - y;
 307
 308     CASE_OP_32_64(mul):
 309         return x * y;
 310
 311     CASE_OP_32_64_VEC(and):
 312         return x & y;
 313
 314     CASE_OP_32_64_VEC(or):
 315         return x | y;
 316
 317     CASE_OP_32_64_VEC(xor):
 318         return x ^ y;
 319
 320     case INDEX_op_shl_i32:
 321         return (uint32_t)x << (y & 31);
 322
 323     case INDEX_op_shl_i64:
 324         return (uint64_t)x << (y & 63);
 325
 326     case INDEX_op_shr_i32:
 327         return (uint32_t)x >> (y & 31);
 328
 329     case INDEX_op_shr_i64:
 330         return (uint64_t)x >> (y & 63);
 331
 332     case INDEX_op_sar_i32:
 333         return (int32_t)x >> (y & 31);
 334
 335     case INDEX_op_sar_i64:
 336         return (int64_t)x >> (y & 63);
 337
 338     case INDEX_op_rotr_i32:
 339         return ror32(x, y & 31);
 340
 341     case INDEX_op_rotr_i64:
 342         return ror64(x, y & 63);
 343
 344     case INDEX_op_rotl_i32:
 345         return rol32(x, y & 31);
 346
 347     case INDEX_op_rotl_i64:
 348         return rol64(x, y & 63);
 349
 350     CASE_OP_32_64_VEC(not):
 351         return ~x;
 352
 353     CASE_OP_32_64(neg):
 354         return -x;
 355
 356     CASE_OP_32_64_VEC(andc):
 357         return x & ~y;
 358
 359     CASE_OP_32_64_VEC(orc):
 360         return x | ~y;
 361
 362     CASE_OP_32_64_VEC(eqv):
 363         return ~(x ^ y);
 364
 365     CASE_OP_32_64_VEC(nand):
 366         return ~(x & y);
 367
 368     CASE_OP_32_64_VEC(nor):
 369         return ~(x | y);
 370
 371     case INDEX_op_clz_i32:
 372         return (uint32_t)x ? clz32(x) : y;
 373
 374     case INDEX_op_clz_i64:
 375         return x ? clz64(x) : y;
 376
 377     case INDEX_op_ctz_i32:
 378         return (uint32_t)x ? ctz32(x) : y;
 379
 380     case INDEX_op_ctz_i64:
 381         return x ? ctz64(x) : y;
 382
 383     case INDEX_op_ctpop_i32:
 384         return ctpop32(x);
 385
 386     case INDEX_op_ctpop_i64:
 387         return ctpop64(x);
 388
 389     CASE_OP_32_64(ext8s):
 390         return (int8_t)x;
 391
 392     CASE_OP_32_64(ext16s):
 393         return (int16_t)x;
 394
 395     CASE_OP_32_64(ext8u):
 396         return (uint8_t)x;
 397
 398     CASE_OP_32_64(ext16u):
 399         return (uint16_t)x;
 400
 401     CASE_OP_32_64(bswap16):
 402         x = bswap16(x);
 403         return y & TCG_BSWAP_OS ? (int16_t)x : x;
 404
 405     CASE_OP_32_64(bswap32):
 406         x = bswap32(x);
 407         return y & TCG_BSWAP_OS ? (int32_t)x : x;
 408
 409     case INDEX_op_bswap64_i64:
 410         return bswap64(x);
 411
 412     case INDEX_op_ext_i32_i64:
 413     case INDEX_op_ext32s_i64:
 414         return (int32_t)x;
 415
 416     case INDEX_op_extu_i32_i64:
 417     case INDEX_op_extrl_i64_i32:
 418     case INDEX_op_ext32u_i64:
 419         return (uint32_t)x;
 420
 421     case INDEX_op_extrh_i64_i32:
 422         return (uint64_t)x >> 32;
 423
 424     case INDEX_op_muluh_i32:
 425         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
 426     case INDEX_op_mulsh_i32:
 427         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
 428
 429     case INDEX_op_muluh_i64:
 430         mulu64(&l64, &h64, x, y);
 431         return h64;
 432     case INDEX_op_mulsh_i64:
 433         muls64(&l64, &h64, x, y);
 434         return h64;
 435
 436     case INDEX_op_div_i32:
 437         /* Avoid crashing on divide by zero, otherwise undefined.  */
 438         return (int32_t)x / ((int32_t)y ? : 1);
 439     case INDEX_op_divu_i32:
 440         return (uint32_t)x / ((uint32_t)y ? : 1);
 441     case INDEX_op_div_i64:
 442         return (int64_t)x / ((int64_t)y ? : 1);
 443     case INDEX_op_divu_i64:
 444         return (uint64_t)x / ((uint64_t)y ? : 1);
 445
 446     case INDEX_op_rem_i32:
 447         return (int32_t)x % ((int32_t)y ? : 1);
 448     case INDEX_op_remu_i32:
 449         return (uint32_t)x % ((uint32_t)y ? : 1);
 450     case INDEX_op_rem_i64:
 451         return (int64_t)x % ((int64_t)y ? : 1);
 452     case INDEX_op_remu_i64:
 453         return (uint64_t)x % ((uint64_t)y ? : 1);
 454
 455     default:
 456         fprintf(stderr,
 457                 "Unrecognized operation %d in do_constant_folding.\n", op);
 458         tcg_abort();
 459     }
 460 }
 461
 462 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
 463                                     uint64_t x, uint64_t y)
 464 {
 465     uint64_t res = do_constant_folding_2(op, x, y);
 466     if (type == TCG_TYPE_I32) {
 467         res = (int32_t)res;
 468     }
 469     return res;
 470 }
 471
 472 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 473 {
 474     switch (c) {
 475     case TCG_COND_EQ:
 476         return x == y;
 477     case TCG_COND_NE:
 478         return x != y;
 479     case TCG_COND_LT:
 480         return (int32_t)x < (int32_t)y;
 481     case TCG_COND_GE:
 482         return (int32_t)x >= (int32_t)y;
 483     case TCG_COND_LE:
 484         return (int32_t)x <= (int32_t)y;
 485     case TCG_COND_GT:
 486         return (int32_t)x > (int32_t)y;
 487     case TCG_COND_LTU:
 488         return x < y;
 489     case TCG_COND_GEU:
 490         return x >= y;
 491     case TCG_COND_LEU:
 492         return x <= y;
 493     case TCG_COND_GTU:
 494         return x > y;
 495     default:
 496         tcg_abort();
 497     }
 498 }
 499
 500 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 501 {
 502     switch (c) {
 503     case TCG_COND_EQ:
 504         return x == y;
 505     case TCG_COND_NE:
 506         return x != y;
 507     case TCG_COND_LT:
 508         return (int64_t)x < (int64_t)y;
 509     case TCG_COND_GE:
 510         return (int64_t)x >= (int64_t)y;
 511     case TCG_COND_LE:
 512         return (int64_t)x <= (int64_t)y;
 513     case TCG_COND_GT:
 514         return (int64_t)x > (int64_t)y;
 515     case TCG_COND_LTU:
 516         return x < y;
 517     case TCG_COND_GEU:
 518         return x >= y;
 519     case TCG_COND_LEU:
 520         return x <= y;
 521     case TCG_COND_GTU:
 522         return x > y;
 523     default:
 524         tcg_abort();
 525     }
 526 }
 527
 528 static bool do_constant_folding_cond_eq(TCGCond c)
 529 {
 530     switch (c) {
 531     case TCG_COND_GT:
 532     case TCG_COND_LTU:
 533     case TCG_COND_LT:
 534     case TCG_COND_GTU:
 535     case TCG_COND_NE:
 536         return 0;
 537     case TCG_COND_GE:
 538     case TCG_COND_GEU:
 539     case TCG_COND_LE:
 540     case TCG_COND_LEU:
 541     case TCG_COND_EQ:
 542         return 1;
 543     default:
 544         tcg_abort();
 545     }
 546 }
 547
 548 /*
 549  * Return -1 if the condition can't be simplified,
 550  * and the result of the condition (0 or 1) if it can.
 551  */
 552 static int do_constant_folding_cond(TCGType type, TCGArg x,
 553                                     TCGArg y, TCGCond c)
 554 {
 555     if (arg_is_const(x) && arg_is_const(y)) {
 556         uint64_t xv = arg_info(x)->val;
 557         uint64_t yv = arg_info(y)->val;
 558
 559         switch (type) {
 560         case TCG_TYPE_I32:
 561             return do_constant_folding_cond_32(xv, yv, c);
 562         case TCG_TYPE_I64:
 563             return do_constant_folding_cond_64(xv, yv, c);
 564         default:
 565             /* Only scalar comparisons are optimizable */
 566             return -1;
 567         }
 568     } else if (args_are_copies(x, y)) {
 569         return do_constant_folding_cond_eq(c);
 570     } else if (arg_is_const(y) && arg_info(y)->val == 0) {
 571         switch (c) {
 572         case TCG_COND_LTU:
 573             return 0;
 574         case TCG_COND_GEU:
 575             return 1;
 576         default:
 577             return -1;
 578         }
 579     }
 580     return -1;
 581 }
 582
 583 /*
 584  * Return -1 if the condition can't be simplified,
 585  * and the result of the condition (0 or 1) if it can.
 586  */
 587 static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 588 {
 589     TCGArg al = p1[0], ah = p1[1];
 590     TCGArg bl = p2[0], bh = p2[1];
 591
 592     if (arg_is_const(bl) && arg_is_const(bh)) {
 593         tcg_target_ulong blv = arg_info(bl)->val;
 594         tcg_target_ulong bhv = arg_info(bh)->val;
 595         uint64_t b = deposit64(blv, 32, 32, bhv);
 596
 597         if (arg_is_const(al) && arg_is_const(ah)) {
 598             tcg_target_ulong alv = arg_info(al)->val;
 599             tcg_target_ulong ahv = arg_info(ah)->val;
 600             uint64_t a = deposit64(alv, 32, 32, ahv);
 601             return do_constant_folding_cond_64(a, b, c);
 602         }
 603         if (b == 0) {
 604             switch (c) {
 605             case TCG_COND_LTU:
 606                 return 0;
 607             case TCG_COND_GEU:
 608                 return 1;
 609             default:
 610                 break;
 611             }
 612         }
 613     }
 614     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
 615         return do_constant_folding_cond_eq(c);
 616     }
 617     return -1;
 618 }
 619
 620 /**
 621  * swap_commutative:
 622  * @dest: TCGArg of the destination argument, or NO_DEST.
 623  * @p1: first paired argument
 624  * @p2: second paired argument
 625  *
 626  * If *@p1 is a constant and *@p2 is not, swap.
 627  * If *@p2 matches @dest, swap.
 628  * Return true if a swap was performed.
 629  */
 630
 631 #define NO_DEST  temp_arg(NULL)
 632
 633 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 634 {
 635     TCGArg a1 = *p1, a2 = *p2;
 636     int sum = 0;
 637     sum += arg_is_const(a1);
 638     sum -= arg_is_const(a2);
 639
 640     /* Prefer the constant in second argument, and then the form
 641        op a, a, b, which is better handled on non-RISC hosts. */
 642     if (sum > 0 || (sum == 0 && dest == a2)) {
 643         *p1 = a2;
 644         *p2 = a1;
 645         return true;
 646     }
 647     return false;
 648 }
 649
 650 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 651 {
 652     int sum = 0;
 653     sum += arg_is_const(p1[0]);
 654     sum += arg_is_const(p1[1]);
 655     sum -= arg_is_const(p2[0]);
 656     sum -= arg_is_const(p2[1]);
 657     if (sum > 0) {
 658         TCGArg t;
 659         t = p1[0], p1[0] = p2[0], p2[0] = t;
 660         t = p1[1], p1[1] = p2[1], p2[1] = t;
 661         return true;
 662     }
 663     return false;
 664 }
 665
 666 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
 667 {
 668     for (int i = 0; i < nb_args; i++) {
 669         TCGTemp *ts = arg_temp(op->args[i]);
 670         if (ts) {
 671             init_ts_info(ctx, ts);
 672         }
 673     }
 674 }
 675
 676 static void copy_propagate(OptContext *ctx, TCGOp *op,
 677                            int nb_oargs, int nb_iargs)
 678 {
 679     TCGContext *s = ctx->tcg;
 680
 681     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 682         TCGTemp *ts = arg_temp(op->args[i]);
 683         if (ts && ts_is_copy(ts)) {
 684             op->args[i] = temp_arg(find_better_copy(s, ts));
 685         }
 686     }
 687 }
 688
 689 static void finish_folding(OptContext *ctx, TCGOp *op)
 690 {
 691     const TCGOpDef *def = &tcg_op_defs[op->opc];
 692     int i, nb_oargs;
 693
 694     /*
 695      * For an opcode that ends a BB, reset all temp data.
 696      * We do no cross-BB optimization.
 697      */
 698     if (def->flags & TCG_OPF_BB_END) {
 699         memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
 700         ctx->prev_mb = NULL;
 701         return;
 702     }
 703
 704     nb_oargs = def->nb_oargs;
 705     for (i = 0; i < nb_oargs; i++) {
 706         TCGTemp *ts = arg_temp(op->args[i]);
 707         reset_ts(ts);
 708         /*
 709          * Save the corresponding known-zero/sign bits mask for the
 710          * first output argument (only one supported so far).
 711          */
 712         if (i == 0) {
 713             ts_info(ts)->z_mask = ctx->z_mask;
 714             ts_info(ts)->s_mask = ctx->s_mask;
 715         }
 716     }
 717 }
 718
 719 /*
 720  * The fold_* functions return true when processing is complete,
 721  * usually by folding the operation to a constant or to a copy,
 722  * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
 723  * like collect information about the value produced, for use in
 724  * optimizing a subsequent operation.
 725  *
 726  * These first fold_* functions are all helpers, used by other
 727  * folders for more specific operations.
 728  */
 729
 730 static bool fold_const1(OptContext *ctx, TCGOp *op)
 731 {
 732     if (arg_is_const(op->args[1])) {
 733         uint64_t t;
 734
 735         t = arg_info(op->args[1])->val;
 736         t = do_constant_folding(op->opc, ctx->type, t, 0);
 737         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
 738     }
 739     return false;
 740 }
 741
 742 static bool fold_const2(OptContext *ctx, TCGOp *op)
 743 {
 744     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
 745         uint64_t t1 = arg_info(op->args[1])->val;
 746         uint64_t t2 = arg_info(op->args[2])->val;
 747
 748         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
 749         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
 750     }
 751     return false;
 752 }
 753
 754 static bool fold_commutative(OptContext *ctx, TCGOp *op)
 755 {
 756     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 757     return false;
 758 }
 759
 760 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
 761 {
 762     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 763     return fold_const2(ctx, op);
 764 }
 765
 766 static bool fold_masks(OptContext *ctx, TCGOp *op)
 767 {
 768     uint64_t a_mask = ctx->a_mask;
 769     uint64_t z_mask = ctx->z_mask;
 770     uint64_t s_mask = ctx->s_mask;
 771
 772     /*
 773      * 32-bit ops generate 32-bit results, which for the purpose of
 774      * simplifying tcg are sign-extended.  Certainly that's how we
 775      * represent our constants elsewhere.  Note that the bits will
 776      * be reset properly for a 64-bit value when encountering the
 777      * type changing opcodes.
 778      */
 779     if (ctx->type == TCG_TYPE_I32) {
 780         a_mask = (int32_t)a_mask;
 781         z_mask = (int32_t)z_mask;
 782         s_mask |= MAKE_64BIT_MASK(32, 32);
 783         ctx->z_mask = z_mask;
 784         ctx->s_mask = s_mask;
 785     }
 786
 787     if (z_mask == 0) {
 788         return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
 789     }
 790     if (a_mask == 0) {
 791         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 792     }
 793     return false;
 794 }
 795
 796 /*
 797  * Convert @op to NOT, if NOT is supported by the host.
 798  * Return true f the conversion is successful, which will still
 799  * indicate that the processing is complete.
 800  */
 801 static bool fold_not(OptContext *ctx, TCGOp *op);
 802 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
 803 {
 804     TCGOpcode not_op;
 805     bool have_not;
 806
 807     switch (ctx->type) {
 808     case TCG_TYPE_I32:
 809         not_op = INDEX_op_not_i32;
 810         have_not = TCG_TARGET_HAS_not_i32;
 811         break;
 812     case TCG_TYPE_I64:
 813         not_op = INDEX_op_not_i64;
 814         have_not = TCG_TARGET_HAS_not_i64;
 815         break;
 816     case TCG_TYPE_V64:
 817     case TCG_TYPE_V128:
 818     case TCG_TYPE_V256:
 819         not_op = INDEX_op_not_vec;
 820         have_not = TCG_TARGET_HAS_not_vec;
 821         break;
 822     default:
 823         g_assert_not_reached();
 824     }
 825     if (have_not) {
 826         op->opc = not_op;
 827         op->args[1] = op->args[idx];
 828         return fold_not(ctx, op);
 829     }
 830     return false;
 831 }
 832
 833 /* If the binary operation has first argument @i, fold to @i. */
 834 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 835 {
 836     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
 837         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 838     }
 839     return false;
 840 }
 841
 842 /* If the binary operation has first argument @i, fold to NOT. */
 843 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 844 {
 845     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
 846         return fold_to_not(ctx, op, 2);
 847     }
 848     return false;
 849 }
 850
 851 /* If the binary operation has second argument @i, fold to @i. */
 852 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 853 {
 854     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 855         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 856     }
 857     return false;
 858 }
 859
 860 /* If the binary operation has second argument @i, fold to identity. */
 861 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
 862 {
 863     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 864         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 865     }
 866     return false;
 867 }
 868
 869 /* If the binary operation has second argument @i, fold to NOT. */
 870 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 871 {
 872     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 873         return fold_to_not(ctx, op, 1);
 874     }
 875     return false;
 876 }
 877
 878 /* If the binary operation has both arguments equal, fold to @i. */
 879 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 880 {
 881     if (args_are_copies(op->args[1], op->args[2])) {
 882         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 883     }
 884     return false;
 885 }
 886
 887 /* If the binary operation has both arguments equal, fold to identity. */
 888 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
 889 {
 890     if (args_are_copies(op->args[1], op->args[2])) {
 891         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 892     }
 893     return false;
 894 }
 895
 896 /*
 897  * These outermost fold_<op> functions are sorted alphabetically.
 898  *
 899  * The ordering of the transformations should be:
 900  *   1) those that produce a constant
 901  *   2) those that produce a copy
 902  *   3) those that produce information about the result value.
 903  */
 904
 905 static bool fold_add(OptContext *ctx, TCGOp *op)
 906 {
 907     if (fold_const2_commutative(ctx, op) ||
 908         fold_xi_to_x(ctx, op, 0)) {
 909         return true;
 910     }
 911     return false;
 912 }
 913
 914 /* We cannot as yet do_constant_folding with vectors. */
 915 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
 916 {
 917     if (fold_commutative(ctx, op) ||
 918         fold_xi_to_x(ctx, op, 0)) {
 919         return true;
 920     }
 921     return false;
 922 }
 923
 924 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 925 {
 926     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
 927         arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
 928         uint64_t al = arg_info(op->args[2])->val;
 929         uint64_t ah = arg_info(op->args[3])->val;
 930         uint64_t bl = arg_info(op->args[4])->val;
 931         uint64_t bh = arg_info(op->args[5])->val;
 932         TCGArg rl, rh;
 933         TCGOp *op2;
 934
 935         if (ctx->type == TCG_TYPE_I32) {
 936             uint64_t a = deposit64(al, 32, 32, ah);
 937             uint64_t b = deposit64(bl, 32, 32, bh);
 938
 939             if (add) {
 940                 a += b;
 941             } else {
 942                 a -= b;
 943             }
 944
 945             al = sextract64(a, 0, 32);
 946             ah = sextract64(a, 32, 32);
 947         } else {
 948             Int128 a = int128_make128(al, ah);
 949             Int128 b = int128_make128(bl, bh);
 950
 951             if (add) {
 952                 a = int128_add(a, b);
 953             } else {
 954                 a = int128_sub(a, b);
 955             }
 956
 957             al = int128_getlo(a);
 958             ah = int128_gethi(a);
 959         }
 960
 961         rl = op->args[0];
 962         rh = op->args[1];
 963
 964         /* The proper opcode is supplied by tcg_opt_gen_mov. */
 965         op2 = tcg_op_insert_before(ctx->tcg, op, 0);
 966
 967         tcg_opt_gen_movi(ctx, op, rl, al);
 968         tcg_opt_gen_movi(ctx, op2, rh, ah);
 969         return true;
 970     }
 971     return false;
 972 }
 973
 974 static bool fold_add2(OptContext *ctx, TCGOp *op)
 975 {
 976     /* Note that the high and low parts may be independently swapped. */
 977     swap_commutative(op->args[0], &op->args[2], &op->args[4]);
 978     swap_commutative(op->args[1], &op->args[3], &op->args[5]);
 979
 980     return fold_addsub2(ctx, op, true);
 981 }
 982
 983 static bool fold_and(OptContext *ctx, TCGOp *op)
 984 {
 985     uint64_t z1, z2;
 986
 987     if (fold_const2_commutative(ctx, op) ||
 988         fold_xi_to_i(ctx, op, 0) ||
 989         fold_xi_to_x(ctx, op, -1) ||
 990         fold_xx_to_x(ctx, op)) {
 991         return true;
 992     }
 993
 994     z1 = arg_info(op->args[1])->z_mask;
 995     z2 = arg_info(op->args[2])->z_mask;
 996     ctx->z_mask = z1 & z2;
 997
 998     /*
 999      * Sign repetitions are perforce all identical, whether they are 1 or 0.
1000      * Bitwise operations preserve the relative quantity of the repetitions.
1001      */
1002     ctx->s_mask = arg_info(op->args[1])->s_mask
1003                 & arg_info(op->args[2])->s_mask;
1004
1005     /*
1006      * Known-zeros does not imply known-ones.  Therefore unless
1007      * arg2 is constant, we can't infer affected bits from it.
1008      */
1009     if (arg_is_const(op->args[2])) {
1010         ctx->a_mask = z1 & ~z2;
1011     }
1012
1013     return fold_masks(ctx, op);
1014 }
1015
1016 static bool fold_andc(OptContext *ctx, TCGOp *op)
1017 {
1018     uint64_t z1;
1019
1020     if (fold_const2(ctx, op) ||
1021         fold_xx_to_i(ctx, op, 0) ||
1022         fold_xi_to_x(ctx, op, 0) ||
1023         fold_ix_to_not(ctx, op, -1)) {
1024         return true;
1025     }
1026
1027     z1 = arg_info(op->args[1])->z_mask;
1028
1029     /*
1030      * Known-zeros does not imply known-ones.  Therefore unless
1031      * arg2 is constant, we can't infer anything from it.
1032      */
1033     if (arg_is_const(op->args[2])) {
1034         uint64_t z2 = ~arg_info(op->args[2])->z_mask;
1035         ctx->a_mask = z1 & ~z2;
1036         z1 &= z2;
1037     }
1038     ctx->z_mask = z1;
1039
1040     ctx->s_mask = arg_info(op->args[1])->s_mask
1041                 & arg_info(op->args[2])->s_mask;
1042     return fold_masks(ctx, op);
1043 }
1044
1045 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1046 {
1047     TCGCond cond = op->args[2];
1048     int i;
1049
1050     if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
1051         op->args[2] = cond = tcg_swap_cond(cond);
1052     }
1053
1054     i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
1055     if (i == 0) {
1056         tcg_op_remove(ctx->tcg, op);
1057         return true;
1058     }
1059     if (i > 0) {
1060         op->opc = INDEX_op_br;
1061         op->args[0] = op->args[3];
1062     }
1063     return false;
1064 }
1065
1066 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1067 {
1068     TCGCond cond = op->args[4];
1069     TCGArg label = op->args[5];
1070     int i, inv = 0;
1071
1072     if (swap_commutative2(&op->args[0], &op->args[2])) {
1073         op->args[4] = cond = tcg_swap_cond(cond);
1074     }
1075
1076     i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
1077     if (i >= 0) {
1078         goto do_brcond_const;
1079     }
1080
1081     switch (cond) {
1082     case TCG_COND_LT:
1083     case TCG_COND_GE:
1084         /*
1085          * Simplify LT/GE comparisons vs zero to a single compare
1086          * vs the high word of the input.
1087          */
1088         if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
1089             arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
1090             goto do_brcond_high;
1091         }
1092         break;
1093
1094     case TCG_COND_NE:
1095         inv = 1;
1096         QEMU_FALLTHROUGH;
1097     case TCG_COND_EQ:
1098         /*
1099          * Simplify EQ/NE comparisons where one of the pairs
1100          * can be simplified.
1101          */
1102         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1103                                      op->args[2], cond);
1104         switch (i ^ inv) {
1105         case 0:
1106             goto do_brcond_const;
1107         case 1:
1108             goto do_brcond_high;
1109         }
1110
1111         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1112                                      op->args[3], cond);
1113         switch (i ^ inv) {
1114         case 0:
1115             goto do_brcond_const;
1116         case 1:
1117             op->opc = INDEX_op_brcond_i32;
1118             op->args[1] = op->args[2];
1119             op->args[2] = cond;
1120             op->args[3] = label;
1121             break;
1122         }
1123         break;
1124
1125     default:
1126         break;
1127
1128     do_brcond_high:
1129         op->opc = INDEX_op_brcond_i32;
1130         op->args[0] = op->args[1];
1131         op->args[1] = op->args[3];
1132         op->args[2] = cond;
1133         op->args[3] = label;
1134         break;
1135
1136     do_brcond_const:
1137         if (i == 0) {
1138             tcg_op_remove(ctx->tcg, op);
1139             return true;
1140         }
1141         op->opc = INDEX_op_br;
1142         op->args[0] = label;
1143         break;
1144     }
1145     return false;
1146 }
1147
1148 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1149 {
1150     uint64_t z_mask, s_mask, sign;
1151
1152     if (arg_is_const(op->args[1])) {
1153         uint64_t t = arg_info(op->args[1])->val;
1154
1155         t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
1156         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1157     }
1158
1159     z_mask = arg_info(op->args[1])->z_mask;
1160
1161     switch (op->opc) {
1162     case INDEX_op_bswap16_i32:
1163     case INDEX_op_bswap16_i64:
1164         z_mask = bswap16(z_mask);
1165         sign = INT16_MIN;
1166         break;
1167     case INDEX_op_bswap32_i32:
1168     case INDEX_op_bswap32_i64:
1169         z_mask = bswap32(z_mask);
1170         sign = INT32_MIN;
1171         break;
1172     case INDEX_op_bswap64_i64:
1173         z_mask = bswap64(z_mask);
1174         sign = INT64_MIN;
1175         break;
1176     default:
1177         g_assert_not_reached();
1178     }
1179     s_mask = smask_from_zmask(z_mask);
1180
1181     switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1182     case TCG_BSWAP_OZ:
1183         break;
1184     case TCG_BSWAP_OS:
1185         /* If the sign bit may be 1, force all the bits above to 1. */
1186         if (z_mask & sign) {
1187             z_mask |= sign;
1188             s_mask = sign << 1;
1189         }
1190         break;
1191     default:
1192         /* The high bits are undefined: force all bits above the sign to 1. */
1193         z_mask |= sign << 1;
1194         s_mask = 0;
1195         break;
1196     }
1197     ctx->z_mask = z_mask;
1198     ctx->s_mask = s_mask;
1199
1200     return fold_masks(ctx, op);
1201 }
1202
1203 static bool fold_call(OptContext *ctx, TCGOp *op)
1204 {
1205     TCGContext *s = ctx->tcg;
1206     int nb_oargs = TCGOP_CALLO(op);
1207     int nb_iargs = TCGOP_CALLI(op);
1208     int flags, i;
1209
1210     init_arguments(ctx, op, nb_oargs + nb_iargs);
1211     copy_propagate(ctx, op, nb_oargs, nb_iargs);
1212
1213     /* If the function reads or writes globals, reset temp data. */
1214     flags = tcg_call_flags(op);
1215     if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1216         int nb_globals = s->nb_globals;
1217
1218         for (i = 0; i < nb_globals; i++) {
1219             if (test_bit(i, ctx->temps_used.l)) {
1220                 reset_ts(&ctx->tcg->temps[i]);
1221             }
1222         }
1223     }
1224
1225     /* Reset temp data for outputs. */
1226     for (i = 0; i < nb_oargs; i++) {
1227         reset_temp(op->args[i]);
1228     }
1229
1230     /* Stop optimizing MB across calls. */
1231     ctx->prev_mb = NULL;
1232     return true;
1233 }
1234
1235 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1236 {
1237     uint64_t z_mask;
1238
1239     if (arg_is_const(op->args[1])) {
1240         uint64_t t = arg_info(op->args[1])->val;
1241
1242         if (t != 0) {
1243             t = do_constant_folding(op->opc, ctx->type, t, 0);
1244             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1245         }
1246         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1247     }
1248
1249     switch (ctx->type) {
1250     case TCG_TYPE_I32:
1251         z_mask = 31;
1252         break;
1253     case TCG_TYPE_I64:
1254         z_mask = 63;
1255         break;
1256     default:
1257         g_assert_not_reached();
1258     }
1259     ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
1260     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1261     return false;
1262 }
1263
1264 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1265 {
1266     if (fold_const1(ctx, op)) {
1267         return true;
1268     }
1269
1270     switch (ctx->type) {
1271     case TCG_TYPE_I32:
1272         ctx->z_mask = 32 | 31;
1273         break;
1274     case TCG_TYPE_I64:
1275         ctx->z_mask = 64 | 63;
1276         break;
1277     default:
1278         g_assert_not_reached();
1279     }
1280     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1281     return false;
1282 }
1283
1284 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1285 {
1286     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1287         uint64_t t1 = arg_info(op->args[1])->val;
1288         uint64_t t2 = arg_info(op->args[2])->val;
1289
1290         t1 = deposit64(t1, op->args[3], op->args[4], t2);
1291         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1292     }
1293
1294     ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1295                             op->args[3], op->args[4],
1296                             arg_info(op->args[2])->z_mask);
1297     return false;
1298 }
1299
1300 static bool fold_divide(OptContext *ctx, TCGOp *op)
1301 {
1302     if (fold_const2(ctx, op) ||
1303         fold_xi_to_x(ctx, op, 1)) {
1304         return true;
1305     }
1306     return false;
1307 }
1308
1309 static bool fold_dup(OptContext *ctx, TCGOp *op)
1310 {
1311     if (arg_is_const(op->args[1])) {
1312         uint64_t t = arg_info(op->args[1])->val;
1313         t = dup_const(TCGOP_VECE(op), t);
1314         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1315     }
1316     return false;
1317 }
1318
1319 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1320 {
1321     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1322         uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1323                                arg_info(op->args[2])->val);
1324         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1325     }
1326
1327     if (args_are_copies(op->args[1], op->args[2])) {
1328         op->opc = INDEX_op_dup_vec;
1329         TCGOP_VECE(op) = MO_32;
1330     }
1331     return false;
1332 }
1333
1334 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1335 {
1336     if (fold_const2_commutative(ctx, op) ||
1337         fold_xi_to_x(ctx, op, -1) ||
1338         fold_xi_to_not(ctx, op, 0)) {
1339         return true;
1340     }
1341
1342     ctx->s_mask = arg_info(op->args[1])->s_mask
1343                 & arg_info(op->args[2])->s_mask;
1344     return false;
1345 }
1346
1347 static bool fold_extract(OptContext *ctx, TCGOp *op)
1348 {
1349     uint64_t z_mask_old, z_mask;
1350     int pos = op->args[2];
1351     int len = op->args[3];
1352
1353     if (arg_is_const(op->args[1])) {
1354         uint64_t t;
1355
1356         t = arg_info(op->args[1])->val;
1357         t = extract64(t, pos, len);
1358         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1359     }
1360
1361     z_mask_old = arg_info(op->args[1])->z_mask;
1362     z_mask = extract64(z_mask_old, pos, len);
1363     if (pos == 0) {
1364         ctx->a_mask = z_mask_old ^ z_mask;
1365     }
1366     ctx->z_mask = z_mask;
1367     ctx->s_mask = smask_from_zmask(z_mask);
1368
1369     return fold_masks(ctx, op);
1370 }
1371
1372 static bool fold_extract2(OptContext *ctx, TCGOp *op)
1373 {
1374     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1375         uint64_t v1 = arg_info(op->args[1])->val;
1376         uint64_t v2 = arg_info(op->args[2])->val;
1377         int shr = op->args[3];
1378
1379         if (op->opc == INDEX_op_extract2_i64) {
1380             v1 >>= shr;
1381             v2 <<= 64 - shr;
1382         } else {
1383             v1 = (uint32_t)v1 >> shr;
1384             v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1385         }
1386         return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1387     }
1388     return false;
1389 }
1390
1391 static bool fold_exts(OptContext *ctx, TCGOp *op)
1392 {
1393     uint64_t s_mask_old, s_mask, z_mask, sign;
1394     bool type_change = false;
1395
1396     if (fold_const1(ctx, op)) {
1397         return true;
1398     }
1399
1400     z_mask = arg_info(op->args[1])->z_mask;
1401     s_mask = arg_info(op->args[1])->s_mask;
1402     s_mask_old = s_mask;
1403
1404     switch (op->opc) {
1405     CASE_OP_32_64(ext8s):
1406         sign = INT8_MIN;
1407         z_mask = (uint8_t)z_mask;
1408         break;
1409     CASE_OP_32_64(ext16s):
1410         sign = INT16_MIN;
1411         z_mask = (uint16_t)z_mask;
1412         break;
1413     case INDEX_op_ext_i32_i64:
1414         type_change = true;
1415         QEMU_FALLTHROUGH;
1416     case INDEX_op_ext32s_i64:
1417         sign = INT32_MIN;
1418         z_mask = (uint32_t)z_mask;
1419         break;
1420     default:
1421         g_assert_not_reached();
1422     }
1423
1424     if (z_mask & sign) {
1425         z_mask |= sign;
1426     }
1427     s_mask |= sign << 1;
1428
1429     ctx->z_mask = z_mask;
1430     ctx->s_mask = s_mask;
1431     if (!type_change) {
1432         ctx->a_mask = s_mask & ~s_mask_old;
1433     }
1434
1435     return fold_masks(ctx, op);
1436 }
1437
1438 static bool fold_extu(OptContext *ctx, TCGOp *op)
1439 {
1440     uint64_t z_mask_old, z_mask;
1441     bool type_change = false;
1442
1443     if (fold_const1(ctx, op)) {
1444         return true;
1445     }
1446
1447     z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1448
1449     switch (op->opc) {
1450     CASE_OP_32_64(ext8u):
1451         z_mask = (uint8_t)z_mask;
1452         break;
1453     CASE_OP_32_64(ext16u):
1454         z_mask = (uint16_t)z_mask;
1455         break;
1456     case INDEX_op_extrl_i64_i32:
1457     case INDEX_op_extu_i32_i64:
1458         type_change = true;
1459         QEMU_FALLTHROUGH;
1460     case INDEX_op_ext32u_i64:
1461         z_mask = (uint32_t)z_mask;
1462         break;
1463     case INDEX_op_extrh_i64_i32:
1464         type_change = true;
1465         z_mask >>= 32;
1466         break;
1467     default:
1468         g_assert_not_reached();
1469     }
1470
1471     ctx->z_mask = z_mask;
1472     ctx->s_mask = smask_from_zmask(z_mask);
1473     if (!type_change) {
1474         ctx->a_mask = z_mask_old ^ z_mask;
1475     }
1476     return fold_masks(ctx, op);
1477 }
1478
1479 static bool fold_mb(OptContext *ctx, TCGOp *op)
1480 {
1481     /* Eliminate duplicate and redundant fence instructions.  */
1482     if (ctx->prev_mb) {
1483         /*
1484          * Merge two barriers of the same type into one,
1485          * or a weaker barrier into a stronger one,
1486          * or two weaker barriers into a stronger one.
1487          *   mb X; mb Y => mb X|Y
1488          *   mb; strl => mb; st
1489          *   ldaq; mb => ld; mb
1490          *   ldaq; strl => ld; mb; st
1491          * Other combinations are also merged into a strong
1492          * barrier.  This is stricter than specified but for
1493          * the purposes of TCG is better than not optimizing.
1494          */
1495         ctx->prev_mb->args[0] |= op->args[0];
1496         tcg_op_remove(ctx->tcg, op);
1497     } else {
1498         ctx->prev_mb = op;
1499     }
1500     return true;
1501 }
1502
1503 static bool fold_mov(OptContext *ctx, TCGOp *op)
1504 {
1505     return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1506 }
1507
1508 static bool fold_movcond(OptContext *ctx, TCGOp *op)
1509 {
1510     TCGCond cond = op->args[5];
1511     int i;
1512
1513     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1514         op->args[5] = cond = tcg_swap_cond(cond);
1515     }
1516     /*
1517      * Canonicalize the "false" input reg to match the destination reg so
1518      * that the tcg backend can implement a "move if true" operation.
1519      */
1520     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1521         op->args[5] = cond = tcg_invert_cond(cond);
1522     }
1523
1524     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1525     if (i >= 0) {
1526         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1527     }
1528
1529     ctx->z_mask = arg_info(op->args[3])->z_mask
1530                 | arg_info(op->args[4])->z_mask;
1531     ctx->s_mask = arg_info(op->args[3])->s_mask
1532                 & arg_info(op->args[4])->s_mask;
1533
1534     if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1535         uint64_t tv = arg_info(op->args[3])->val;
1536         uint64_t fv = arg_info(op->args[4])->val;
1537         TCGOpcode opc;
1538
1539         switch (ctx->type) {
1540         case TCG_TYPE_I32:
1541             opc = INDEX_op_setcond_i32;
1542             break;
1543         case TCG_TYPE_I64:
1544             opc = INDEX_op_setcond_i64;
1545             break;
1546         default:
1547             g_assert_not_reached();
1548         }
1549
1550         if (tv == 1 && fv == 0) {
1551             op->opc = opc;
1552             op->args[3] = cond;
1553         } else if (fv == 1 && tv == 0) {
1554             op->opc = opc;
1555             op->args[3] = tcg_invert_cond(cond);
1556         }
1557     }
1558     return false;
1559 }
1560
1561 static bool fold_mul(OptContext *ctx, TCGOp *op)
1562 {
1563     if (fold_const2(ctx, op) ||
1564         fold_xi_to_i(ctx, op, 0) ||
1565         fold_xi_to_x(ctx, op, 1)) {
1566         return true;
1567     }
1568     return false;
1569 }
1570
1571 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
1572 {
1573     if (fold_const2_commutative(ctx, op) ||
1574         fold_xi_to_i(ctx, op, 0)) {
1575         return true;
1576     }
1577     return false;
1578 }
1579
1580 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
1581 {
1582     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1583
1584     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1585         uint64_t a = arg_info(op->args[2])->val;
1586         uint64_t b = arg_info(op->args[3])->val;
1587         uint64_t h, l;
1588         TCGArg rl, rh;
1589         TCGOp *op2;
1590
1591         switch (op->opc) {
1592         case INDEX_op_mulu2_i32:
1593             l = (uint64_t)(uint32_t)a * (uint32_t)b;
1594             h = (int32_t)(l >> 32);
1595             l = (int32_t)l;
1596             break;
1597         case INDEX_op_muls2_i32:
1598             l = (int64_t)(int32_t)a * (int32_t)b;
1599             h = l >> 32;
1600             l = (int32_t)l;
1601             break;
1602         case INDEX_op_mulu2_i64:
1603             mulu64(&l, &h, a, b);
1604             break;
1605         case INDEX_op_muls2_i64:
1606             muls64(&l, &h, a, b);
1607             break;
1608         default:
1609             g_assert_not_reached();
1610         }
1611
1612         rl = op->args[0];
1613         rh = op->args[1];
1614
1615         /* The proper opcode is supplied by tcg_opt_gen_mov. */
1616         op2 = tcg_op_insert_before(ctx->tcg, op, 0);
1617
1618         tcg_opt_gen_movi(ctx, op, rl, l);
1619         tcg_opt_gen_movi(ctx, op2, rh, h);
1620         return true;
1621     }
1622     return false;
1623 }
1624
1625 static bool fold_nand(OptContext *ctx, TCGOp *op)
1626 {
1627     if (fold_const2_commutative(ctx, op) ||
1628         fold_xi_to_not(ctx, op, -1)) {
1629         return true;
1630     }
1631
1632     ctx->s_mask = arg_info(op->args[1])->s_mask
1633                 & arg_info(op->args[2])->s_mask;
1634     return false;
1635 }
1636
1637 static bool fold_neg(OptContext *ctx, TCGOp *op)
1638 {
1639     uint64_t z_mask;
1640
1641     if (fold_const1(ctx, op)) {
1642         return true;
1643     }
1644
1645     /* Set to 1 all bits to the left of the rightmost.  */
1646     z_mask = arg_info(op->args[1])->z_mask;
1647     ctx->z_mask = -(z_mask & -z_mask);
1648
1649     /*
1650      * Because of fold_sub_to_neg, we want to always return true,
1651      * via finish_folding.
1652      */
1653     finish_folding(ctx, op);
1654     return true;
1655 }
1656
1657 static bool fold_nor(OptContext *ctx, TCGOp *op)
1658 {
1659     if (fold_const2_commutative(ctx, op) ||
1660         fold_xi_to_not(ctx, op, 0)) {
1661         return true;
1662     }
1663
1664     ctx->s_mask = arg_info(op->args[1])->s_mask
1665                 & arg_info(op->args[2])->s_mask;
1666     return false;
1667 }
1668
1669 static bool fold_not(OptContext *ctx, TCGOp *op)
1670 {
1671     if (fold_const1(ctx, op)) {
1672         return true;
1673     }
1674
1675     ctx->s_mask = arg_info(op->args[1])->s_mask;
1676
1677     /* Because of fold_to_not, we want to always return true, via finish. */
1678     finish_folding(ctx, op);
1679     return true;
1680 }
1681
1682 static bool fold_or(OptContext *ctx, TCGOp *op)
1683 {
1684     if (fold_const2_commutative(ctx, op) ||
1685         fold_xi_to_x(ctx, op, 0) ||
1686         fold_xx_to_x(ctx, op)) {
1687         return true;
1688     }
1689
1690     ctx->z_mask = arg_info(op->args[1])->z_mask
1691                 | arg_info(op->args[2])->z_mask;
1692     ctx->s_mask = arg_info(op->args[1])->s_mask
1693                 & arg_info(op->args[2])->s_mask;
1694     return fold_masks(ctx, op);
1695 }
1696
1697 static bool fold_orc(OptContext *ctx, TCGOp *op)
1698 {
1699     if (fold_const2(ctx, op) ||
1700         fold_xx_to_i(ctx, op, -1) ||
1701         fold_xi_to_x(ctx, op, -1) ||
1702         fold_ix_to_not(ctx, op, 0)) {
1703         return true;
1704     }
1705
1706     ctx->s_mask = arg_info(op->args[1])->s_mask
1707                 & arg_info(op->args[2])->s_mask;
1708     return false;
1709 }
1710
1711 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
1712 {
1713     const TCGOpDef *def = &tcg_op_defs[op->opc];
1714     MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
1715     MemOp mop = get_memop(oi);
1716     int width = 8 * memop_size(mop);
1717
1718     if (width < 64) {
1719         ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
1720         if (!(mop & MO_SIGN)) {
1721             ctx->z_mask = MAKE_64BIT_MASK(0, width);
1722             ctx->s_mask <<= 1;
1723         }
1724     }
1725
1726     /* Opcodes that touch guest memory stop the mb optimization.  */
1727     ctx->prev_mb = NULL;
1728     return false;
1729 }
1730
1731 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
1732 {
1733     /* Opcodes that touch guest memory stop the mb optimization.  */
1734     ctx->prev_mb = NULL;
1735     return false;
1736 }
1737
1738 static bool fold_remainder(OptContext *ctx, TCGOp *op)
1739 {
1740     if (fold_const2(ctx, op) ||
1741         fold_xx_to_i(ctx, op, 0)) {
1742         return true;
1743     }
1744     return false;
1745 }
1746
1747 static bool fold_setcond(OptContext *ctx, TCGOp *op)
1748 {
1749     TCGCond cond = op->args[3];
1750     int i;
1751
1752     if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1753         op->args[3] = cond = tcg_swap_cond(cond);
1754     }
1755
1756     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1757     if (i >= 0) {
1758         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1759     }
1760
1761     ctx->z_mask = 1;
1762     ctx->s_mask = smask_from_zmask(1);
1763     return false;
1764 }
1765
1766 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
1767 {
1768     TCGCond cond = op->args[5];
1769     int i, inv = 0;
1770
1771     if (swap_commutative2(&op->args[1], &op->args[3])) {
1772         op->args[5] = cond = tcg_swap_cond(cond);
1773     }
1774
1775     i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
1776     if (i >= 0) {
1777         goto do_setcond_const;
1778     }
1779
1780     switch (cond) {
1781     case TCG_COND_LT:
1782     case TCG_COND_GE:
1783         /*
1784          * Simplify LT/GE comparisons vs zero to a single compare
1785          * vs the high word of the input.
1786          */
1787         if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
1788             arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
1789             goto do_setcond_high;
1790         }
1791         break;
1792
1793     case TCG_COND_NE:
1794         inv = 1;
1795         QEMU_FALLTHROUGH;
1796     case TCG_COND_EQ:
1797         /*
1798          * Simplify EQ/NE comparisons where one of the pairs
1799          * can be simplified.
1800          */
1801         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1802                                      op->args[3], cond);
1803         switch (i ^ inv) {
1804         case 0:
1805             goto do_setcond_const;
1806         case 1:
1807             goto do_setcond_high;
1808         }
1809
1810         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
1811                                      op->args[4], cond);
1812         switch (i ^ inv) {
1813         case 0:
1814             goto do_setcond_const;
1815         case 1:
1816             op->args[2] = op->args[3];
1817             op->args[3] = cond;
1818             op->opc = INDEX_op_setcond_i32;
1819             break;
1820         }
1821         break;
1822
1823     default:
1824         break;
1825
1826     do_setcond_high:
1827         op->args[1] = op->args[2];
1828         op->args[2] = op->args[4];
1829         op->args[3] = cond;
1830         op->opc = INDEX_op_setcond_i32;
1831         break;
1832     }
1833
1834     ctx->z_mask = 1;
1835     ctx->s_mask = smask_from_zmask(1);
1836     return false;
1837
1838  do_setcond_const:
1839     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1840 }
1841
1842 static bool fold_sextract(OptContext *ctx, TCGOp *op)
1843 {
1844     uint64_t z_mask, s_mask, s_mask_old;
1845     int pos = op->args[2];
1846     int len = op->args[3];
1847
1848     if (arg_is_const(op->args[1])) {
1849         uint64_t t;
1850
1851         t = arg_info(op->args[1])->val;
1852         t = sextract64(t, pos, len);
1853         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1854     }
1855
1856     z_mask = arg_info(op->args[1])->z_mask;
1857     z_mask = sextract64(z_mask, pos, len);
1858     ctx->z_mask = z_mask;
1859
1860     s_mask_old = arg_info(op->args[1])->s_mask;
1861     s_mask = sextract64(s_mask_old, pos, len);
1862     s_mask |= MAKE_64BIT_MASK(len, 64 - len);
1863     ctx->s_mask = s_mask;
1864
1865     if (pos == 0) {
1866         ctx->a_mask = s_mask & ~s_mask_old;
1867     }
1868
1869     return fold_masks(ctx, op);
1870 }
1871
1872 static bool fold_shift(OptContext *ctx, TCGOp *op)
1873 {
1874     uint64_t s_mask, z_mask, sign;
1875
1876     if (fold_const2(ctx, op) ||
1877         fold_ix_to_i(ctx, op, 0) ||
1878         fold_xi_to_x(ctx, op, 0)) {
1879         return true;
1880     }
1881
1882     s_mask = arg_info(op->args[1])->s_mask;
1883     z_mask = arg_info(op->args[1])->z_mask;
1884
1885     if (arg_is_const(op->args[2])) {
1886         int sh = arg_info(op->args[2])->val;
1887
1888         ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
1889
1890         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
1891         ctx->s_mask = smask_from_smask(s_mask);
1892
1893         return fold_masks(ctx, op);
1894     }
1895
1896     switch (op->opc) {
1897     CASE_OP_32_64(sar):
1898         /*
1899          * Arithmetic right shift will not reduce the number of
1900          * input sign repetitions.
1901          */
1902         ctx->s_mask = s_mask;
1903         break;
1904     CASE_OP_32_64(shr):
1905         /*
1906          * If the sign bit is known zero, then logical right shift
1907          * will not reduced the number of input sign repetitions.
1908          */
1909         sign = (s_mask & -s_mask) >> 1;
1910         if (!(z_mask & sign)) {
1911             ctx->s_mask = s_mask;
1912         }
1913         break;
1914     default:
1915         break;
1916     }
1917
1918     return false;
1919 }
1920
1921 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
1922 {
1923     TCGOpcode neg_op;
1924     bool have_neg;
1925
1926     if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
1927         return false;
1928     }
1929
1930     switch (ctx->type) {
1931     case TCG_TYPE_I32:
1932         neg_op = INDEX_op_neg_i32;
1933         have_neg = TCG_TARGET_HAS_neg_i32;
1934         break;
1935     case TCG_TYPE_I64:
1936         neg_op = INDEX_op_neg_i64;
1937         have_neg = TCG_TARGET_HAS_neg_i64;
1938         break;
1939     case TCG_TYPE_V64:
1940     case TCG_TYPE_V128:
1941     case TCG_TYPE_V256:
1942         neg_op = INDEX_op_neg_vec;
1943         have_neg = (TCG_TARGET_HAS_neg_vec &&
1944                     tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
1945         break;
1946     default:
1947         g_assert_not_reached();
1948     }
1949     if (have_neg) {
1950         op->opc = neg_op;
1951         op->args[1] = op->args[2];
1952         return fold_neg(ctx, op);
1953     }
1954     return false;
1955 }
1956
1957 /* We cannot as yet do_constant_folding with vectors. */
1958 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
1959 {
1960     if (fold_xx_to_i(ctx, op, 0) ||
1961         fold_xi_to_x(ctx, op, 0) ||
1962         fold_sub_to_neg(ctx, op)) {
1963         return true;
1964     }
1965     return false;
1966 }
1967
1968 static bool fold_sub(OptContext *ctx, TCGOp *op)
1969 {
1970     return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
1971 }
1972
1973 static bool fold_sub2(OptContext *ctx, TCGOp *op)
1974 {
1975     return fold_addsub2(ctx, op, false);
1976 }
1977
1978 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
1979 {
1980     /* We can't do any folding with a load, but we can record bits. */
1981     switch (op->opc) {
1982     CASE_OP_32_64(ld8s):
1983         ctx->s_mask = MAKE_64BIT_MASK(8, 56);
1984         break;
1985     CASE_OP_32_64(ld8u):
1986         ctx->z_mask = MAKE_64BIT_MASK(0, 8);
1987         ctx->s_mask = MAKE_64BIT_MASK(9, 55);
1988         break;
1989     CASE_OP_32_64(ld16s):
1990         ctx->s_mask = MAKE_64BIT_MASK(16, 48);
1991         break;
1992     CASE_OP_32_64(ld16u):
1993         ctx->z_mask = MAKE_64BIT_MASK(0, 16);
1994         ctx->s_mask = MAKE_64BIT_MASK(17, 47);
1995         break;
1996     case INDEX_op_ld32s_i64:
1997         ctx->s_mask = MAKE_64BIT_MASK(32, 32);
1998         break;
1999     case INDEX_op_ld32u_i64:
2000         ctx->z_mask = MAKE_64BIT_MASK(0, 32);
2001         ctx->s_mask = MAKE_64BIT_MASK(33, 31);
2002         break;
2003     default:
2004         g_assert_not_reached();
2005     }
2006     return false;
2007 }
2008
2009 static bool fold_xor(OptContext *ctx, TCGOp *op)
2010 {
2011     if (fold_const2_commutative(ctx, op) ||
2012         fold_xx_to_i(ctx, op, 0) ||
2013         fold_xi_to_x(ctx, op, 0) ||
2014         fold_xi_to_not(ctx, op, -1)) {
2015         return true;
2016     }
2017
2018     ctx->z_mask = arg_info(op->args[1])->z_mask
2019                 | arg_info(op->args[2])->z_mask;
2020     ctx->s_mask = arg_info(op->args[1])->s_mask
2021                 & arg_info(op->args[2])->s_mask;
2022     return fold_masks(ctx, op);
2023 }
2024
2025 /* Propagate constants and copies, fold constant expressions. */
2026 void tcg_optimize(TCGContext *s)
2027 {
2028     int nb_temps, i;
2029     TCGOp *op, *op_next;
2030     OptContext ctx = { .tcg = s };
2031
2032     /* Array VALS has an element for each temp.
2033        If this temp holds a constant then its value is kept in VALS' element.
2034        If this temp is a copy of other ones then the other copies are
2035        available through the doubly linked circular list. */
2036
2037     nb_temps = s->nb_temps;
2038     for (i = 0; i < nb_temps; ++i) {
2039         s->temps[i].state_ptr = NULL;
2040     }
2041
2042     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2043         TCGOpcode opc = op->opc;
2044         const TCGOpDef *def;
2045         bool done = false;
2046
2047         /* Calls are special. */
2048         if (opc == INDEX_op_call) {
2049             fold_call(&ctx, op);
2050             continue;
2051         }
2052
2053         def = &tcg_op_defs[opc];
2054         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2055         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2056
2057         /* Pre-compute the type of the operation. */
2058         if (def->flags & TCG_OPF_VECTOR) {
2059             ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
2060         } else if (def->flags & TCG_OPF_64BIT) {
2061             ctx.type = TCG_TYPE_I64;
2062         } else {
2063             ctx.type = TCG_TYPE_I32;
2064         }
2065
2066         /* Assume all bits affected, no bits known zero, no sign reps. */
2067         ctx.a_mask = -1;
2068         ctx.z_mask = -1;
2069         ctx.s_mask = 0;
2070
2071         /*
2072          * Process each opcode.
2073          * Sorted alphabetically by opcode as much as possible.
2074          */
2075         switch (opc) {
2076         CASE_OP_32_64(add):
2077             done = fold_add(&ctx, op);
2078             break;
2079         case INDEX_op_add_vec:
2080             done = fold_add_vec(&ctx, op);
2081             break;
2082         CASE_OP_32_64(add2):
2083             done = fold_add2(&ctx, op);
2084             break;
2085         CASE_OP_32_64_VEC(and):
2086             done = fold_and(&ctx, op);
2087             break;
2088         CASE_OP_32_64_VEC(andc):
2089             done = fold_andc(&ctx, op);
2090             break;
2091         CASE_OP_32_64(brcond):
2092             done = fold_brcond(&ctx, op);
2093             break;
2094         case INDEX_op_brcond2_i32:
2095             done = fold_brcond2(&ctx, op);
2096             break;
2097         CASE_OP_32_64(bswap16):
2098         CASE_OP_32_64(bswap32):
2099         case INDEX_op_bswap64_i64:
2100             done = fold_bswap(&ctx, op);
2101             break;
2102         CASE_OP_32_64(clz):
2103         CASE_OP_32_64(ctz):
2104             done = fold_count_zeros(&ctx, op);
2105             break;
2106         CASE_OP_32_64(ctpop):
2107             done = fold_ctpop(&ctx, op);
2108             break;
2109         CASE_OP_32_64(deposit):
2110             done = fold_deposit(&ctx, op);
2111             break;
2112         CASE_OP_32_64(div):
2113         CASE_OP_32_64(divu):
2114             done = fold_divide(&ctx, op);
2115             break;
2116         case INDEX_op_dup_vec:
2117             done = fold_dup(&ctx, op);
2118             break;
2119         case INDEX_op_dup2_vec:
2120             done = fold_dup2(&ctx, op);
2121             break;
2122         CASE_OP_32_64_VEC(eqv):
2123             done = fold_eqv(&ctx, op);
2124             break;
2125         CASE_OP_32_64(extract):
2126             done = fold_extract(&ctx, op);
2127             break;
2128         CASE_OP_32_64(extract2):
2129             done = fold_extract2(&ctx, op);
2130             break;
2131         CASE_OP_32_64(ext8s):
2132         CASE_OP_32_64(ext16s):
2133         case INDEX_op_ext32s_i64:
2134         case INDEX_op_ext_i32_i64:
2135             done = fold_exts(&ctx, op);
2136             break;
2137         CASE_OP_32_64(ext8u):
2138         CASE_OP_32_64(ext16u):
2139         case INDEX_op_ext32u_i64:
2140         case INDEX_op_extu_i32_i64:
2141         case INDEX_op_extrl_i64_i32:
2142         case INDEX_op_extrh_i64_i32:
2143             done = fold_extu(&ctx, op);
2144             break;
2145         CASE_OP_32_64(ld8s):
2146         CASE_OP_32_64(ld8u):
2147         CASE_OP_32_64(ld16s):
2148         CASE_OP_32_64(ld16u):
2149         case INDEX_op_ld32s_i64:
2150         case INDEX_op_ld32u_i64:
2151             done = fold_tcg_ld(&ctx, op);
2152             break;
2153         case INDEX_op_mb:
2154             done = fold_mb(&ctx, op);
2155             break;
2156         CASE_OP_32_64_VEC(mov):
2157             done = fold_mov(&ctx, op);
2158             break;
2159         CASE_OP_32_64(movcond):
2160             done = fold_movcond(&ctx, op);
2161             break;
2162         CASE_OP_32_64(mul):
2163             done = fold_mul(&ctx, op);
2164             break;
2165         CASE_OP_32_64(mulsh):
2166         CASE_OP_32_64(muluh):
2167             done = fold_mul_highpart(&ctx, op);
2168             break;
2169         CASE_OP_32_64(muls2):
2170         CASE_OP_32_64(mulu2):
2171             done = fold_multiply2(&ctx, op);
2172             break;
2173         CASE_OP_32_64_VEC(nand):
2174             done = fold_nand(&ctx, op);
2175             break;
2176         CASE_OP_32_64(neg):
2177             done = fold_neg(&ctx, op);
2178             break;
2179         CASE_OP_32_64_VEC(nor):
2180             done = fold_nor(&ctx, op);
2181             break;
2182         CASE_OP_32_64_VEC(not):
2183             done = fold_not(&ctx, op);
2184             break;
2185         CASE_OP_32_64_VEC(or):
2186             done = fold_or(&ctx, op);
2187             break;
2188         CASE_OP_32_64_VEC(orc):
2189             done = fold_orc(&ctx, op);
2190             break;
2191         case INDEX_op_qemu_ld_i32:
2192         case INDEX_op_qemu_ld_i64:
2193             done = fold_qemu_ld(&ctx, op);
2194             break;
2195         case INDEX_op_qemu_st_i32:
2196         case INDEX_op_qemu_st8_i32:
2197         case INDEX_op_qemu_st_i64:
2198             done = fold_qemu_st(&ctx, op);
2199             break;
2200         CASE_OP_32_64(rem):
2201         CASE_OP_32_64(remu):
2202             done = fold_remainder(&ctx, op);
2203             break;
2204         CASE_OP_32_64(rotl):
2205         CASE_OP_32_64(rotr):
2206         CASE_OP_32_64(sar):
2207         CASE_OP_32_64(shl):
2208         CASE_OP_32_64(shr):
2209             done = fold_shift(&ctx, op);
2210             break;
2211         CASE_OP_32_64(setcond):
2212             done = fold_setcond(&ctx, op);
2213             break;
2214         case INDEX_op_setcond2_i32:
2215             done = fold_setcond2(&ctx, op);
2216             break;
2217         CASE_OP_32_64(sextract):
2218             done = fold_sextract(&ctx, op);
2219             break;
2220         CASE_OP_32_64(sub):
2221             done = fold_sub(&ctx, op);
2222             break;
2223         case INDEX_op_sub_vec:
2224             done = fold_sub_vec(&ctx, op);
2225             break;
2226         CASE_OP_32_64(sub2):
2227             done = fold_sub2(&ctx, op);
2228             break;
2229         CASE_OP_32_64_VEC(xor):
2230             done = fold_xor(&ctx, op);
2231             break;
2232         default:
2233             break;
2234         }
2235
2236         if (!done) {
2237             finish_folding(&ctx, op);
2238         }
2239     }
2240 }