tcg/optimize.c

   1 /*
   2  * Optimizations for Tiny Code Generator for QEMU
   3  *
   4  * Copyright (c) 2010 Samsung Electronics.
   5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a copy
   8  * of this software and associated documentation files (the "Software"), to deal
   9  * in the Software without restriction, including without limitation the rights
  10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11  * copies of the Software, and to permit persons to whom the Software is
  12  * furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included in
  15  * all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23  * THE SOFTWARE.
  24  */
  25
  26 #include "qemu/osdep.h"
  27 #include "qemu/int128.h"
  28 #include "tcg/tcg-op-common.h"
  29 #include "tcg-internal.h"
  30
  31 #define CASE_OP_32_64(x)                        \
  32         glue(glue(case INDEX_op_, x), _i32):    \
  33         glue(glue(case INDEX_op_, x), _i64)
  34
  35 #define CASE_OP_32_64_VEC(x)                    \
  36         glue(glue(case INDEX_op_, x), _i32):    \
  37         glue(glue(case INDEX_op_, x), _i64):    \
  38         glue(glue(case INDEX_op_, x), _vec)
  39
  40 typedef struct TempOptInfo {
  41     bool is_const;
  42     TCGTemp *prev_copy;
  43     TCGTemp *next_copy;
  44     uint64_t val;
  45     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
  46     uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
  47 } TempOptInfo;
  48
  49 typedef struct OptContext {
  50     TCGContext *tcg;
  51     TCGOp *prev_mb;
  52     TCGTempSet temps_used;
  53
  54     /* In flight values from optimization. */
  55     uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
  56     uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
  57     uint64_t s_mask;  /* mask of clrsb(value) bits */
  58     TCGType type;
  59 } OptContext;
  60
  61 /* Calculate the smask for a specific value. */
  62 static uint64_t smask_from_value(uint64_t value)
  63 {
  64     int rep = clrsb64(value);
  65     return ~(~0ull >> rep);
  66 }
  67
  68 /*
  69  * Calculate the smask for a given set of known-zeros.
  70  * If there are lots of zeros on the left, we can consider the remainder
  71  * an unsigned field, and thus the corresponding signed field is one bit
  72  * larger.
  73  */
  74 static uint64_t smask_from_zmask(uint64_t zmask)
  75 {
  76     /*
  77      * Only the 0 bits are significant for zmask, thus the msb itself
  78      * must be zero, else we have no sign information.
  79      */
  80     int rep = clz64(zmask);
  81     if (rep == 0) {
  82         return 0;
  83     }
  84     rep -= 1;
  85     return ~(~0ull >> rep);
  86 }
  87
  88 /*
  89  * Recreate a properly left-aligned smask after manipulation.
  90  * Some bit-shuffling, particularly shifts and rotates, may
  91  * retain sign bits on the left, but may scatter disconnected
  92  * sign bits on the right.  Retain only what remains to the left.
  93  */
  94 static uint64_t smask_from_smask(int64_t smask)
  95 {
  96     /* Only the 1 bits are significant for smask */
  97     return smask_from_zmask(~smask);
  98 }
  99
 100 static inline TempOptInfo *ts_info(TCGTemp *ts)
 101 {
 102     return ts->state_ptr;
 103 }
 104
 105 static inline TempOptInfo *arg_info(TCGArg arg)
 106 {
 107     return ts_info(arg_temp(arg));
 108 }
 109
 110 static inline bool ts_is_const(TCGTemp *ts)
 111 {
 112     return ts_info(ts)->is_const;
 113 }
 114
 115 static inline bool arg_is_const(TCGArg arg)
 116 {
 117     return ts_is_const(arg_temp(arg));
 118 }
 119
 120 static inline bool ts_is_copy(TCGTemp *ts)
 121 {
 122     return ts_info(ts)->next_copy != ts;
 123 }
 124
 125 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
 126 static void reset_ts(TCGTemp *ts)
 127 {
 128     TempOptInfo *ti = ts_info(ts);
 129     TempOptInfo *pi = ts_info(ti->prev_copy);
 130     TempOptInfo *ni = ts_info(ti->next_copy);
 131
 132     ni->prev_copy = ti->prev_copy;
 133     pi->next_copy = ti->next_copy;
 134     ti->next_copy = ts;
 135     ti->prev_copy = ts;
 136     ti->is_const = false;
 137     ti->z_mask = -1;
 138     ti->s_mask = 0;
 139 }
 140
 141 static void reset_temp(TCGArg arg)
 142 {
 143     reset_ts(arg_temp(arg));
 144 }
 145
 146 /* Initialize and activate a temporary.  */
 147 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
 148 {
 149     size_t idx = temp_idx(ts);
 150     TempOptInfo *ti;
 151
 152     if (test_bit(idx, ctx->temps_used.l)) {
 153         return;
 154     }
 155     set_bit(idx, ctx->temps_used.l);
 156
 157     ti = ts->state_ptr;
 158     if (ti == NULL) {
 159         ti = tcg_malloc(sizeof(TempOptInfo));
 160         ts->state_ptr = ti;
 161     }
 162
 163     ti->next_copy = ts;
 164     ti->prev_copy = ts;
 165     if (ts->kind == TEMP_CONST) {
 166         ti->is_const = true;
 167         ti->val = ts->val;
 168         ti->z_mask = ts->val;
 169         ti->s_mask = smask_from_value(ts->val);
 170     } else {
 171         ti->is_const = false;
 172         ti->z_mask = -1;
 173         ti->s_mask = 0;
 174     }
 175 }
 176
 177 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
 178 {
 179     TCGTemp *i, *g, *l;
 180
 181     /* If this is already readonly, we can't do better. */
 182     if (temp_readonly(ts)) {
 183         return ts;
 184     }
 185
 186     g = l = NULL;
 187     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
 188         if (temp_readonly(i)) {
 189             return i;
 190         } else if (i->kind > ts->kind) {
 191             if (i->kind == TEMP_GLOBAL) {
 192                 g = i;
 193             } else if (i->kind == TEMP_TB) {
 194                 l = i;
 195             }
 196         }
 197     }
 198
 199     /* If we didn't find a better representation, return the same temp. */
 200     return g ? g : l ? l : ts;
 201 }
 202
 203 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
 204 {
 205     TCGTemp *i;
 206
 207     if (ts1 == ts2) {
 208         return true;
 209     }
 210
 211     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
 212         return false;
 213     }
 214
 215     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
 216         if (i == ts2) {
 217             return true;
 218         }
 219     }
 220
 221     return false;
 222 }
 223
 224 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
 225 {
 226     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 227 }
 228
 229 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 230 {
 231     TCGTemp *dst_ts = arg_temp(dst);
 232     TCGTemp *src_ts = arg_temp(src);
 233     TempOptInfo *di;
 234     TempOptInfo *si;
 235     TCGOpcode new_op;
 236
 237     if (ts_are_copies(dst_ts, src_ts)) {
 238         tcg_op_remove(ctx->tcg, op);
 239         return true;
 240     }
 241
 242     reset_ts(dst_ts);
 243     di = ts_info(dst_ts);
 244     si = ts_info(src_ts);
 245
 246     switch (ctx->type) {
 247     case TCG_TYPE_I32:
 248         new_op = INDEX_op_mov_i32;
 249         break;
 250     case TCG_TYPE_I64:
 251         new_op = INDEX_op_mov_i64;
 252         break;
 253     case TCG_TYPE_V64:
 254     case TCG_TYPE_V128:
 255     case TCG_TYPE_V256:
 256         /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
 257         new_op = INDEX_op_mov_vec;
 258         break;
 259     default:
 260         g_assert_not_reached();
 261     }
 262     op->opc = new_op;
 263     op->args[0] = dst;
 264     op->args[1] = src;
 265
 266     di->z_mask = si->z_mask;
 267     di->s_mask = si->s_mask;
 268
 269     if (src_ts->type == dst_ts->type) {
 270         TempOptInfo *ni = ts_info(si->next_copy);
 271
 272         di->next_copy = si->next_copy;
 273         di->prev_copy = src_ts;
 274         ni->prev_copy = dst_ts;
 275         si->next_copy = dst_ts;
 276         di->is_const = si->is_const;
 277         di->val = si->val;
 278     }
 279     return true;
 280 }
 281
 282 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
 283                              TCGArg dst, uint64_t val)
 284 {
 285     TCGTemp *tv;
 286
 287     if (ctx->type == TCG_TYPE_I32) {
 288         val = (int32_t)val;
 289     }
 290
 291     /* Convert movi to mov with constant temp. */
 292     tv = tcg_constant_internal(ctx->type, val);
 293     init_ts_info(ctx, tv);
 294     return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
 295 }
 296
 297 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
 298 {
 299     uint64_t l64, h64;
 300
 301     switch (op) {
 302     CASE_OP_32_64(add):
 303         return x + y;
 304
 305     CASE_OP_32_64(sub):
 306         return x - y;
 307
 308     CASE_OP_32_64(mul):
 309         return x * y;
 310
 311     CASE_OP_32_64_VEC(and):
 312         return x & y;
 313
 314     CASE_OP_32_64_VEC(or):
 315         return x | y;
 316
 317     CASE_OP_32_64_VEC(xor):
 318         return x ^ y;
 319
 320     case INDEX_op_shl_i32:
 321         return (uint32_t)x << (y & 31);
 322
 323     case INDEX_op_shl_i64:
 324         return (uint64_t)x << (y & 63);
 325
 326     case INDEX_op_shr_i32:
 327         return (uint32_t)x >> (y & 31);
 328
 329     case INDEX_op_shr_i64:
 330         return (uint64_t)x >> (y & 63);
 331
 332     case INDEX_op_sar_i32:
 333         return (int32_t)x >> (y & 31);
 334
 335     case INDEX_op_sar_i64:
 336         return (int64_t)x >> (y & 63);
 337
 338     case INDEX_op_rotr_i32:
 339         return ror32(x, y & 31);
 340
 341     case INDEX_op_rotr_i64:
 342         return ror64(x, y & 63);
 343
 344     case INDEX_op_rotl_i32:
 345         return rol32(x, y & 31);
 346
 347     case INDEX_op_rotl_i64:
 348         return rol64(x, y & 63);
 349
 350     CASE_OP_32_64_VEC(not):
 351         return ~x;
 352
 353     CASE_OP_32_64(neg):
 354         return -x;
 355
 356     CASE_OP_32_64_VEC(andc):
 357         return x & ~y;
 358
 359     CASE_OP_32_64_VEC(orc):
 360         return x | ~y;
 361
 362     CASE_OP_32_64_VEC(eqv):
 363         return ~(x ^ y);
 364
 365     CASE_OP_32_64_VEC(nand):
 366         return ~(x & y);
 367
 368     CASE_OP_32_64_VEC(nor):
 369         return ~(x | y);
 370
 371     case INDEX_op_clz_i32:
 372         return (uint32_t)x ? clz32(x) : y;
 373
 374     case INDEX_op_clz_i64:
 375         return x ? clz64(x) : y;
 376
 377     case INDEX_op_ctz_i32:
 378         return (uint32_t)x ? ctz32(x) : y;
 379
 380     case INDEX_op_ctz_i64:
 381         return x ? ctz64(x) : y;
 382
 383     case INDEX_op_ctpop_i32:
 384         return ctpop32(x);
 385
 386     case INDEX_op_ctpop_i64:
 387         return ctpop64(x);
 388
 389     CASE_OP_32_64(ext8s):
 390         return (int8_t)x;
 391
 392     CASE_OP_32_64(ext16s):
 393         return (int16_t)x;
 394
 395     CASE_OP_32_64(ext8u):
 396         return (uint8_t)x;
 397
 398     CASE_OP_32_64(ext16u):
 399         return (uint16_t)x;
 400
 401     CASE_OP_32_64(bswap16):
 402         x = bswap16(x);
 403         return y & TCG_BSWAP_OS ? (int16_t)x : x;
 404
 405     CASE_OP_32_64(bswap32):
 406         x = bswap32(x);
 407         return y & TCG_BSWAP_OS ? (int32_t)x : x;
 408
 409     case INDEX_op_bswap64_i64:
 410         return bswap64(x);
 411
 412     case INDEX_op_ext_i32_i64:
 413     case INDEX_op_ext32s_i64:
 414         return (int32_t)x;
 415
 416     case INDEX_op_extu_i32_i64:
 417     case INDEX_op_extrl_i64_i32:
 418     case INDEX_op_ext32u_i64:
 419         return (uint32_t)x;
 420
 421     case INDEX_op_extrh_i64_i32:
 422         return (uint64_t)x >> 32;
 423
 424     case INDEX_op_muluh_i32:
 425         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
 426     case INDEX_op_mulsh_i32:
 427         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
 428
 429     case INDEX_op_muluh_i64:
 430         mulu64(&l64, &h64, x, y);
 431         return h64;
 432     case INDEX_op_mulsh_i64:
 433         muls64(&l64, &h64, x, y);
 434         return h64;
 435
 436     case INDEX_op_div_i32:
 437         /* Avoid crashing on divide by zero, otherwise undefined.  */
 438         return (int32_t)x / ((int32_t)y ? : 1);
 439     case INDEX_op_divu_i32:
 440         return (uint32_t)x / ((uint32_t)y ? : 1);
 441     case INDEX_op_div_i64:
 442         return (int64_t)x / ((int64_t)y ? : 1);
 443     case INDEX_op_divu_i64:
 444         return (uint64_t)x / ((uint64_t)y ? : 1);
 445
 446     case INDEX_op_rem_i32:
 447         return (int32_t)x % ((int32_t)y ? : 1);
 448     case INDEX_op_remu_i32:
 449         return (uint32_t)x % ((uint32_t)y ? : 1);
 450     case INDEX_op_rem_i64:
 451         return (int64_t)x % ((int64_t)y ? : 1);
 452     case INDEX_op_remu_i64:
 453         return (uint64_t)x % ((uint64_t)y ? : 1);
 454
 455     default:
 456         g_assert_not_reached();
 457     }
 458 }
 459
 460 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
 461                                     uint64_t x, uint64_t y)
 462 {
 463     uint64_t res = do_constant_folding_2(op, x, y);
 464     if (type == TCG_TYPE_I32) {
 465         res = (int32_t)res;
 466     }
 467     return res;
 468 }
 469
 470 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
 471 {
 472     switch (c) {
 473     case TCG_COND_EQ:
 474         return x == y;
 475     case TCG_COND_NE:
 476         return x != y;
 477     case TCG_COND_LT:
 478         return (int32_t)x < (int32_t)y;
 479     case TCG_COND_GE:
 480         return (int32_t)x >= (int32_t)y;
 481     case TCG_COND_LE:
 482         return (int32_t)x <= (int32_t)y;
 483     case TCG_COND_GT:
 484         return (int32_t)x > (int32_t)y;
 485     case TCG_COND_LTU:
 486         return x < y;
 487     case TCG_COND_GEU:
 488         return x >= y;
 489     case TCG_COND_LEU:
 490         return x <= y;
 491     case TCG_COND_GTU:
 492         return x > y;
 493     default:
 494         g_assert_not_reached();
 495     }
 496 }
 497
 498 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
 499 {
 500     switch (c) {
 501     case TCG_COND_EQ:
 502         return x == y;
 503     case TCG_COND_NE:
 504         return x != y;
 505     case TCG_COND_LT:
 506         return (int64_t)x < (int64_t)y;
 507     case TCG_COND_GE:
 508         return (int64_t)x >= (int64_t)y;
 509     case TCG_COND_LE:
 510         return (int64_t)x <= (int64_t)y;
 511     case TCG_COND_GT:
 512         return (int64_t)x > (int64_t)y;
 513     case TCG_COND_LTU:
 514         return x < y;
 515     case TCG_COND_GEU:
 516         return x >= y;
 517     case TCG_COND_LEU:
 518         return x <= y;
 519     case TCG_COND_GTU:
 520         return x > y;
 521     default:
 522         g_assert_not_reached();
 523     }
 524 }
 525
 526 static bool do_constant_folding_cond_eq(TCGCond c)
 527 {
 528     switch (c) {
 529     case TCG_COND_GT:
 530     case TCG_COND_LTU:
 531     case TCG_COND_LT:
 532     case TCG_COND_GTU:
 533     case TCG_COND_NE:
 534         return 0;
 535     case TCG_COND_GE:
 536     case TCG_COND_GEU:
 537     case TCG_COND_LE:
 538     case TCG_COND_LEU:
 539     case TCG_COND_EQ:
 540         return 1;
 541     default:
 542         g_assert_not_reached();
 543     }
 544 }
 545
 546 /*
 547  * Return -1 if the condition can't be simplified,
 548  * and the result of the condition (0 or 1) if it can.
 549  */
 550 static int do_constant_folding_cond(TCGType type, TCGArg x,
 551                                     TCGArg y, TCGCond c)
 552 {
 553     if (arg_is_const(x) && arg_is_const(y)) {
 554         uint64_t xv = arg_info(x)->val;
 555         uint64_t yv = arg_info(y)->val;
 556
 557         switch (type) {
 558         case TCG_TYPE_I32:
 559             return do_constant_folding_cond_32(xv, yv, c);
 560         case TCG_TYPE_I64:
 561             return do_constant_folding_cond_64(xv, yv, c);
 562         default:
 563             /* Only scalar comparisons are optimizable */
 564             return -1;
 565         }
 566     } else if (args_are_copies(x, y)) {
 567         return do_constant_folding_cond_eq(c);
 568     } else if (arg_is_const(y) && arg_info(y)->val == 0) {
 569         switch (c) {
 570         case TCG_COND_LTU:
 571             return 0;
 572         case TCG_COND_GEU:
 573             return 1;
 574         default:
 575             return -1;
 576         }
 577     }
 578     return -1;
 579 }
 580
 581 /*
 582  * Return -1 if the condition can't be simplified,
 583  * and the result of the condition (0 or 1) if it can.
 584  */
 585 static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 586 {
 587     TCGArg al = p1[0], ah = p1[1];
 588     TCGArg bl = p2[0], bh = p2[1];
 589
 590     if (arg_is_const(bl) && arg_is_const(bh)) {
 591         tcg_target_ulong blv = arg_info(bl)->val;
 592         tcg_target_ulong bhv = arg_info(bh)->val;
 593         uint64_t b = deposit64(blv, 32, 32, bhv);
 594
 595         if (arg_is_const(al) && arg_is_const(ah)) {
 596             tcg_target_ulong alv = arg_info(al)->val;
 597             tcg_target_ulong ahv = arg_info(ah)->val;
 598             uint64_t a = deposit64(alv, 32, 32, ahv);
 599             return do_constant_folding_cond_64(a, b, c);
 600         }
 601         if (b == 0) {
 602             switch (c) {
 603             case TCG_COND_LTU:
 604                 return 0;
 605             case TCG_COND_GEU:
 606                 return 1;
 607             default:
 608                 break;
 609             }
 610         }
 611     }
 612     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
 613         return do_constant_folding_cond_eq(c);
 614     }
 615     return -1;
 616 }
 617
 618 /**
 619  * swap_commutative:
 620  * @dest: TCGArg of the destination argument, or NO_DEST.
 621  * @p1: first paired argument
 622  * @p2: second paired argument
 623  *
 624  * If *@p1 is a constant and *@p2 is not, swap.
 625  * If *@p2 matches @dest, swap.
 626  * Return true if a swap was performed.
 627  */
 628
 629 #define NO_DEST  temp_arg(NULL)
 630
 631 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 632 {
 633     TCGArg a1 = *p1, a2 = *p2;
 634     int sum = 0;
 635     sum += arg_is_const(a1);
 636     sum -= arg_is_const(a2);
 637
 638     /* Prefer the constant in second argument, and then the form
 639        op a, a, b, which is better handled on non-RISC hosts. */
 640     if (sum > 0 || (sum == 0 && dest == a2)) {
 641         *p1 = a2;
 642         *p2 = a1;
 643         return true;
 644     }
 645     return false;
 646 }
 647
 648 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 649 {
 650     int sum = 0;
 651     sum += arg_is_const(p1[0]);
 652     sum += arg_is_const(p1[1]);
 653     sum -= arg_is_const(p2[0]);
 654     sum -= arg_is_const(p2[1]);
 655     if (sum > 0) {
 656         TCGArg t;
 657         t = p1[0], p1[0] = p2[0], p2[0] = t;
 658         t = p1[1], p1[1] = p2[1], p2[1] = t;
 659         return true;
 660     }
 661     return false;
 662 }
 663
 664 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
 665 {
 666     for (int i = 0; i < nb_args; i++) {
 667         TCGTemp *ts = arg_temp(op->args[i]);
 668         init_ts_info(ctx, ts);
 669     }
 670 }
 671
 672 static void copy_propagate(OptContext *ctx, TCGOp *op,
 673                            int nb_oargs, int nb_iargs)
 674 {
 675     TCGContext *s = ctx->tcg;
 676
 677     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
 678         TCGTemp *ts = arg_temp(op->args[i]);
 679         if (ts_is_copy(ts)) {
 680             op->args[i] = temp_arg(find_better_copy(s, ts));
 681         }
 682     }
 683 }
 684
 685 static void finish_folding(OptContext *ctx, TCGOp *op)
 686 {
 687     const TCGOpDef *def = &tcg_op_defs[op->opc];
 688     int i, nb_oargs;
 689
 690     /*
 691      * We only optimize extended basic blocks.  If the opcode ends a BB
 692      * and is not a conditional branch, reset all temp data.
 693      */
 694     if (def->flags & TCG_OPF_BB_END) {
 695         ctx->prev_mb = NULL;
 696         if (!(def->flags & TCG_OPF_COND_BRANCH)) {
 697             memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
 698         }
 699         return;
 700     }
 701
 702     nb_oargs = def->nb_oargs;
 703     for (i = 0; i < nb_oargs; i++) {
 704         TCGTemp *ts = arg_temp(op->args[i]);
 705         reset_ts(ts);
 706         /*
 707          * Save the corresponding known-zero/sign bits mask for the
 708          * first output argument (only one supported so far).
 709          */
 710         if (i == 0) {
 711             ts_info(ts)->z_mask = ctx->z_mask;
 712             ts_info(ts)->s_mask = ctx->s_mask;
 713         }
 714     }
 715 }
 716
 717 /*
 718  * The fold_* functions return true when processing is complete,
 719  * usually by folding the operation to a constant or to a copy,
 720  * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
 721  * like collect information about the value produced, for use in
 722  * optimizing a subsequent operation.
 723  *
 724  * These first fold_* functions are all helpers, used by other
 725  * folders for more specific operations.
 726  */
 727
 728 static bool fold_const1(OptContext *ctx, TCGOp *op)
 729 {
 730     if (arg_is_const(op->args[1])) {
 731         uint64_t t;
 732
 733         t = arg_info(op->args[1])->val;
 734         t = do_constant_folding(op->opc, ctx->type, t, 0);
 735         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
 736     }
 737     return false;
 738 }
 739
 740 static bool fold_const2(OptContext *ctx, TCGOp *op)
 741 {
 742     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
 743         uint64_t t1 = arg_info(op->args[1])->val;
 744         uint64_t t2 = arg_info(op->args[2])->val;
 745
 746         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
 747         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
 748     }
 749     return false;
 750 }
 751
 752 static bool fold_commutative(OptContext *ctx, TCGOp *op)
 753 {
 754     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 755     return false;
 756 }
 757
 758 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
 759 {
 760     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
 761     return fold_const2(ctx, op);
 762 }
 763
 764 static bool fold_masks(OptContext *ctx, TCGOp *op)
 765 {
 766     uint64_t a_mask = ctx->a_mask;
 767     uint64_t z_mask = ctx->z_mask;
 768     uint64_t s_mask = ctx->s_mask;
 769
 770     /*
 771      * 32-bit ops generate 32-bit results, which for the purpose of
 772      * simplifying tcg are sign-extended.  Certainly that's how we
 773      * represent our constants elsewhere.  Note that the bits will
 774      * be reset properly for a 64-bit value when encountering the
 775      * type changing opcodes.
 776      */
 777     if (ctx->type == TCG_TYPE_I32) {
 778         a_mask = (int32_t)a_mask;
 779         z_mask = (int32_t)z_mask;
 780         s_mask |= MAKE_64BIT_MASK(32, 32);
 781         ctx->z_mask = z_mask;
 782         ctx->s_mask = s_mask;
 783     }
 784
 785     if (z_mask == 0) {
 786         return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
 787     }
 788     if (a_mask == 0) {
 789         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 790     }
 791     return false;
 792 }
 793
 794 /*
 795  * Convert @op to NOT, if NOT is supported by the host.
 796  * Return true f the conversion is successful, which will still
 797  * indicate that the processing is complete.
 798  */
 799 static bool fold_not(OptContext *ctx, TCGOp *op);
 800 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
 801 {
 802     TCGOpcode not_op;
 803     bool have_not;
 804
 805     switch (ctx->type) {
 806     case TCG_TYPE_I32:
 807         not_op = INDEX_op_not_i32;
 808         have_not = TCG_TARGET_HAS_not_i32;
 809         break;
 810     case TCG_TYPE_I64:
 811         not_op = INDEX_op_not_i64;
 812         have_not = TCG_TARGET_HAS_not_i64;
 813         break;
 814     case TCG_TYPE_V64:
 815     case TCG_TYPE_V128:
 816     case TCG_TYPE_V256:
 817         not_op = INDEX_op_not_vec;
 818         have_not = TCG_TARGET_HAS_not_vec;
 819         break;
 820     default:
 821         g_assert_not_reached();
 822     }
 823     if (have_not) {
 824         op->opc = not_op;
 825         op->args[1] = op->args[idx];
 826         return fold_not(ctx, op);
 827     }
 828     return false;
 829 }
 830
 831 /* If the binary operation has first argument @i, fold to @i. */
 832 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 833 {
 834     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
 835         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 836     }
 837     return false;
 838 }
 839
 840 /* If the binary operation has first argument @i, fold to NOT. */
 841 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 842 {
 843     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
 844         return fold_to_not(ctx, op, 2);
 845     }
 846     return false;
 847 }
 848
 849 /* If the binary operation has second argument @i, fold to @i. */
 850 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 851 {
 852     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 853         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 854     }
 855     return false;
 856 }
 857
 858 /* If the binary operation has second argument @i, fold to identity. */
 859 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
 860 {
 861     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 862         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 863     }
 864     return false;
 865 }
 866
 867 /* If the binary operation has second argument @i, fold to NOT. */
 868 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
 869 {
 870     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
 871         return fold_to_not(ctx, op, 1);
 872     }
 873     return false;
 874 }
 875
 876 /* If the binary operation has both arguments equal, fold to @i. */
 877 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
 878 {
 879     if (args_are_copies(op->args[1], op->args[2])) {
 880         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
 881     }
 882     return false;
 883 }
 884
 885 /* If the binary operation has both arguments equal, fold to identity. */
 886 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
 887 {
 888     if (args_are_copies(op->args[1], op->args[2])) {
 889         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
 890     }
 891     return false;
 892 }
 893
 894 /*
 895  * These outermost fold_<op> functions are sorted alphabetically.
 896  *
 897  * The ordering of the transformations should be:
 898  *   1) those that produce a constant
 899  *   2) those that produce a copy
 900  *   3) those that produce information about the result value.
 901  */
 902
 903 static bool fold_add(OptContext *ctx, TCGOp *op)
 904 {
 905     if (fold_const2_commutative(ctx, op) ||
 906         fold_xi_to_x(ctx, op, 0)) {
 907         return true;
 908     }
 909     return false;
 910 }
 911
 912 /* We cannot as yet do_constant_folding with vectors. */
 913 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
 914 {
 915     if (fold_commutative(ctx, op) ||
 916         fold_xi_to_x(ctx, op, 0)) {
 917         return true;
 918     }
 919     return false;
 920 }
 921
 922 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 923 {
 924     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
 925         arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
 926         uint64_t al = arg_info(op->args[2])->val;
 927         uint64_t ah = arg_info(op->args[3])->val;
 928         uint64_t bl = arg_info(op->args[4])->val;
 929         uint64_t bh = arg_info(op->args[5])->val;
 930         TCGArg rl, rh;
 931         TCGOp *op2;
 932
 933         if (ctx->type == TCG_TYPE_I32) {
 934             uint64_t a = deposit64(al, 32, 32, ah);
 935             uint64_t b = deposit64(bl, 32, 32, bh);
 936
 937             if (add) {
 938                 a += b;
 939             } else {
 940                 a -= b;
 941             }
 942
 943             al = sextract64(a, 0, 32);
 944             ah = sextract64(a, 32, 32);
 945         } else {
 946             Int128 a = int128_make128(al, ah);
 947             Int128 b = int128_make128(bl, bh);
 948
 949             if (add) {
 950                 a = int128_add(a, b);
 951             } else {
 952                 a = int128_sub(a, b);
 953             }
 954
 955             al = int128_getlo(a);
 956             ah = int128_gethi(a);
 957         }
 958
 959         rl = op->args[0];
 960         rh = op->args[1];
 961
 962         /* The proper opcode is supplied by tcg_opt_gen_mov. */
 963         op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
 964
 965         tcg_opt_gen_movi(ctx, op, rl, al);
 966         tcg_opt_gen_movi(ctx, op2, rh, ah);
 967         return true;
 968     }
 969     return false;
 970 }
 971
 972 static bool fold_add2(OptContext *ctx, TCGOp *op)
 973 {
 974     /* Note that the high and low parts may be independently swapped. */
 975     swap_commutative(op->args[0], &op->args[2], &op->args[4]);
 976     swap_commutative(op->args[1], &op->args[3], &op->args[5]);
 977
 978     return fold_addsub2(ctx, op, true);
 979 }
 980
 981 static bool fold_and(OptContext *ctx, TCGOp *op)
 982 {
 983     uint64_t z1, z2;
 984
 985     if (fold_const2_commutative(ctx, op) ||
 986         fold_xi_to_i(ctx, op, 0) ||
 987         fold_xi_to_x(ctx, op, -1) ||
 988         fold_xx_to_x(ctx, op)) {
 989         return true;
 990     }
 991
 992     z1 = arg_info(op->args[1])->z_mask;
 993     z2 = arg_info(op->args[2])->z_mask;
 994     ctx->z_mask = z1 & z2;
 995
 996     /*
 997      * Sign repetitions are perforce all identical, whether they are 1 or 0.
 998      * Bitwise operations preserve the relative quantity of the repetitions.
 999      */
1000     ctx->s_mask = arg_info(op->args[1])->s_mask
1001                 & arg_info(op->args[2])->s_mask;
1002
1003     /*
1004      * Known-zeros does not imply known-ones.  Therefore unless
1005      * arg2 is constant, we can't infer affected bits from it.
1006      */
1007     if (arg_is_const(op->args[2])) {
1008         ctx->a_mask = z1 & ~z2;
1009     }
1010
1011     return fold_masks(ctx, op);
1012 }
1013
1014 static bool fold_andc(OptContext *ctx, TCGOp *op)
1015 {
1016     uint64_t z1;
1017
1018     if (fold_const2(ctx, op) ||
1019         fold_xx_to_i(ctx, op, 0) ||
1020         fold_xi_to_x(ctx, op, 0) ||
1021         fold_ix_to_not(ctx, op, -1)) {
1022         return true;
1023     }
1024
1025     z1 = arg_info(op->args[1])->z_mask;
1026
1027     /*
1028      * Known-zeros does not imply known-ones.  Therefore unless
1029      * arg2 is constant, we can't infer anything from it.
1030      */
1031     if (arg_is_const(op->args[2])) {
1032         uint64_t z2 = ~arg_info(op->args[2])->z_mask;
1033         ctx->a_mask = z1 & ~z2;
1034         z1 &= z2;
1035     }
1036     ctx->z_mask = z1;
1037
1038     ctx->s_mask = arg_info(op->args[1])->s_mask
1039                 & arg_info(op->args[2])->s_mask;
1040     return fold_masks(ctx, op);
1041 }
1042
1043 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1044 {
1045     TCGCond cond = op->args[2];
1046     int i;
1047
1048     if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
1049         op->args[2] = cond = tcg_swap_cond(cond);
1050     }
1051
1052     i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
1053     if (i == 0) {
1054         tcg_op_remove(ctx->tcg, op);
1055         return true;
1056     }
1057     if (i > 0) {
1058         op->opc = INDEX_op_br;
1059         op->args[0] = op->args[3];
1060     }
1061     return false;
1062 }
1063
1064 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1065 {
1066     TCGCond cond = op->args[4];
1067     TCGArg label = op->args[5];
1068     int i, inv = 0;
1069
1070     if (swap_commutative2(&op->args[0], &op->args[2])) {
1071         op->args[4] = cond = tcg_swap_cond(cond);
1072     }
1073
1074     i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
1075     if (i >= 0) {
1076         goto do_brcond_const;
1077     }
1078
1079     switch (cond) {
1080     case TCG_COND_LT:
1081     case TCG_COND_GE:
1082         /*
1083          * Simplify LT/GE comparisons vs zero to a single compare
1084          * vs the high word of the input.
1085          */
1086         if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
1087             arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
1088             goto do_brcond_high;
1089         }
1090         break;
1091
1092     case TCG_COND_NE:
1093         inv = 1;
1094         QEMU_FALLTHROUGH;
1095     case TCG_COND_EQ:
1096         /*
1097          * Simplify EQ/NE comparisons where one of the pairs
1098          * can be simplified.
1099          */
1100         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1101                                      op->args[2], cond);
1102         switch (i ^ inv) {
1103         case 0:
1104             goto do_brcond_const;
1105         case 1:
1106             goto do_brcond_high;
1107         }
1108
1109         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1110                                      op->args[3], cond);
1111         switch (i ^ inv) {
1112         case 0:
1113             goto do_brcond_const;
1114         case 1:
1115             op->opc = INDEX_op_brcond_i32;
1116             op->args[1] = op->args[2];
1117             op->args[2] = cond;
1118             op->args[3] = label;
1119             break;
1120         }
1121         break;
1122
1123     default:
1124         break;
1125
1126     do_brcond_high:
1127         op->opc = INDEX_op_brcond_i32;
1128         op->args[0] = op->args[1];
1129         op->args[1] = op->args[3];
1130         op->args[2] = cond;
1131         op->args[3] = label;
1132         break;
1133
1134     do_brcond_const:
1135         if (i == 0) {
1136             tcg_op_remove(ctx->tcg, op);
1137             return true;
1138         }
1139         op->opc = INDEX_op_br;
1140         op->args[0] = label;
1141         break;
1142     }
1143     return false;
1144 }
1145
1146 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1147 {
1148     uint64_t z_mask, s_mask, sign;
1149
1150     if (arg_is_const(op->args[1])) {
1151         uint64_t t = arg_info(op->args[1])->val;
1152
1153         t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
1154         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1155     }
1156
1157     z_mask = arg_info(op->args[1])->z_mask;
1158
1159     switch (op->opc) {
1160     case INDEX_op_bswap16_i32:
1161     case INDEX_op_bswap16_i64:
1162         z_mask = bswap16(z_mask);
1163         sign = INT16_MIN;
1164         break;
1165     case INDEX_op_bswap32_i32:
1166     case INDEX_op_bswap32_i64:
1167         z_mask = bswap32(z_mask);
1168         sign = INT32_MIN;
1169         break;
1170     case INDEX_op_bswap64_i64:
1171         z_mask = bswap64(z_mask);
1172         sign = INT64_MIN;
1173         break;
1174     default:
1175         g_assert_not_reached();
1176     }
1177     s_mask = smask_from_zmask(z_mask);
1178
1179     switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1180     case TCG_BSWAP_OZ:
1181         break;
1182     case TCG_BSWAP_OS:
1183         /* If the sign bit may be 1, force all the bits above to 1. */
1184         if (z_mask & sign) {
1185             z_mask |= sign;
1186             s_mask = sign << 1;
1187         }
1188         break;
1189     default:
1190         /* The high bits are undefined: force all bits above the sign to 1. */
1191         z_mask |= sign << 1;
1192         s_mask = 0;
1193         break;
1194     }
1195     ctx->z_mask = z_mask;
1196     ctx->s_mask = s_mask;
1197
1198     return fold_masks(ctx, op);
1199 }
1200
1201 static bool fold_call(OptContext *ctx, TCGOp *op)
1202 {
1203     TCGContext *s = ctx->tcg;
1204     int nb_oargs = TCGOP_CALLO(op);
1205     int nb_iargs = TCGOP_CALLI(op);
1206     int flags, i;
1207
1208     init_arguments(ctx, op, nb_oargs + nb_iargs);
1209     copy_propagate(ctx, op, nb_oargs, nb_iargs);
1210
1211     /* If the function reads or writes globals, reset temp data. */
1212     flags = tcg_call_flags(op);
1213     if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1214         int nb_globals = s->nb_globals;
1215
1216         for (i = 0; i < nb_globals; i++) {
1217             if (test_bit(i, ctx->temps_used.l)) {
1218                 reset_ts(&ctx->tcg->temps[i]);
1219             }
1220         }
1221     }
1222
1223     /* Reset temp data for outputs. */
1224     for (i = 0; i < nb_oargs; i++) {
1225         reset_temp(op->args[i]);
1226     }
1227
1228     /* Stop optimizing MB across calls. */
1229     ctx->prev_mb = NULL;
1230     return true;
1231 }
1232
1233 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1234 {
1235     uint64_t z_mask;
1236
1237     if (arg_is_const(op->args[1])) {
1238         uint64_t t = arg_info(op->args[1])->val;
1239
1240         if (t != 0) {
1241             t = do_constant_folding(op->opc, ctx->type, t, 0);
1242             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1243         }
1244         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1245     }
1246
1247     switch (ctx->type) {
1248     case TCG_TYPE_I32:
1249         z_mask = 31;
1250         break;
1251     case TCG_TYPE_I64:
1252         z_mask = 63;
1253         break;
1254     default:
1255         g_assert_not_reached();
1256     }
1257     ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
1258     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1259     return false;
1260 }
1261
1262 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1263 {
1264     if (fold_const1(ctx, op)) {
1265         return true;
1266     }
1267
1268     switch (ctx->type) {
1269     case TCG_TYPE_I32:
1270         ctx->z_mask = 32 | 31;
1271         break;
1272     case TCG_TYPE_I64:
1273         ctx->z_mask = 64 | 63;
1274         break;
1275     default:
1276         g_assert_not_reached();
1277     }
1278     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1279     return false;
1280 }
1281
1282 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1283 {
1284     TCGOpcode and_opc;
1285
1286     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1287         uint64_t t1 = arg_info(op->args[1])->val;
1288         uint64_t t2 = arg_info(op->args[2])->val;
1289
1290         t1 = deposit64(t1, op->args[3], op->args[4], t2);
1291         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1292     }
1293
1294     switch (ctx->type) {
1295     case TCG_TYPE_I32:
1296         and_opc = INDEX_op_and_i32;
1297         break;
1298     case TCG_TYPE_I64:
1299         and_opc = INDEX_op_and_i64;
1300         break;
1301     default:
1302         g_assert_not_reached();
1303     }
1304
1305     /* Inserting a value into zero at offset 0. */
1306     if (arg_is_const(op->args[1])
1307         && arg_info(op->args[1])->val == 0
1308         && op->args[3] == 0) {
1309         uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
1310
1311         op->opc = and_opc;
1312         op->args[1] = op->args[2];
1313         op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
1314         ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
1315         return false;
1316     }
1317
1318     /* Inserting zero into a value. */
1319     if (arg_is_const(op->args[2])
1320         && arg_info(op->args[2])->val == 0) {
1321         uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
1322
1323         op->opc = and_opc;
1324         op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
1325         ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
1326         return false;
1327     }
1328
1329     ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1330                             op->args[3], op->args[4],
1331                             arg_info(op->args[2])->z_mask);
1332     return false;
1333 }
1334
1335 static bool fold_divide(OptContext *ctx, TCGOp *op)
1336 {
1337     if (fold_const2(ctx, op) ||
1338         fold_xi_to_x(ctx, op, 1)) {
1339         return true;
1340     }
1341     return false;
1342 }
1343
1344 static bool fold_dup(OptContext *ctx, TCGOp *op)
1345 {
1346     if (arg_is_const(op->args[1])) {
1347         uint64_t t = arg_info(op->args[1])->val;
1348         t = dup_const(TCGOP_VECE(op), t);
1349         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1350     }
1351     return false;
1352 }
1353
1354 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1355 {
1356     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1357         uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1358                                arg_info(op->args[2])->val);
1359         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1360     }
1361
1362     if (args_are_copies(op->args[1], op->args[2])) {
1363         op->opc = INDEX_op_dup_vec;
1364         TCGOP_VECE(op) = MO_32;
1365     }
1366     return false;
1367 }
1368
1369 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1370 {
1371     if (fold_const2_commutative(ctx, op) ||
1372         fold_xi_to_x(ctx, op, -1) ||
1373         fold_xi_to_not(ctx, op, 0)) {
1374         return true;
1375     }
1376
1377     ctx->s_mask = arg_info(op->args[1])->s_mask
1378                 & arg_info(op->args[2])->s_mask;
1379     return false;
1380 }
1381
1382 static bool fold_extract(OptContext *ctx, TCGOp *op)
1383 {
1384     uint64_t z_mask_old, z_mask;
1385     int pos = op->args[2];
1386     int len = op->args[3];
1387
1388     if (arg_is_const(op->args[1])) {
1389         uint64_t t;
1390
1391         t = arg_info(op->args[1])->val;
1392         t = extract64(t, pos, len);
1393         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1394     }
1395
1396     z_mask_old = arg_info(op->args[1])->z_mask;
1397     z_mask = extract64(z_mask_old, pos, len);
1398     if (pos == 0) {
1399         ctx->a_mask = z_mask_old ^ z_mask;
1400     }
1401     ctx->z_mask = z_mask;
1402     ctx->s_mask = smask_from_zmask(z_mask);
1403
1404     return fold_masks(ctx, op);
1405 }
1406
1407 static bool fold_extract2(OptContext *ctx, TCGOp *op)
1408 {
1409     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1410         uint64_t v1 = arg_info(op->args[1])->val;
1411         uint64_t v2 = arg_info(op->args[2])->val;
1412         int shr = op->args[3];
1413
1414         if (op->opc == INDEX_op_extract2_i64) {
1415             v1 >>= shr;
1416             v2 <<= 64 - shr;
1417         } else {
1418             v1 = (uint32_t)v1 >> shr;
1419             v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1420         }
1421         return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1422     }
1423     return false;
1424 }
1425
1426 static bool fold_exts(OptContext *ctx, TCGOp *op)
1427 {
1428     uint64_t s_mask_old, s_mask, z_mask, sign;
1429     bool type_change = false;
1430
1431     if (fold_const1(ctx, op)) {
1432         return true;
1433     }
1434
1435     z_mask = arg_info(op->args[1])->z_mask;
1436     s_mask = arg_info(op->args[1])->s_mask;
1437     s_mask_old = s_mask;
1438
1439     switch (op->opc) {
1440     CASE_OP_32_64(ext8s):
1441         sign = INT8_MIN;
1442         z_mask = (uint8_t)z_mask;
1443         break;
1444     CASE_OP_32_64(ext16s):
1445         sign = INT16_MIN;
1446         z_mask = (uint16_t)z_mask;
1447         break;
1448     case INDEX_op_ext_i32_i64:
1449         type_change = true;
1450         QEMU_FALLTHROUGH;
1451     case INDEX_op_ext32s_i64:
1452         sign = INT32_MIN;
1453         z_mask = (uint32_t)z_mask;
1454         break;
1455     default:
1456         g_assert_not_reached();
1457     }
1458
1459     if (z_mask & sign) {
1460         z_mask |= sign;
1461     }
1462     s_mask |= sign << 1;
1463
1464     ctx->z_mask = z_mask;
1465     ctx->s_mask = s_mask;
1466     if (!type_change) {
1467         ctx->a_mask = s_mask & ~s_mask_old;
1468     }
1469
1470     return fold_masks(ctx, op);
1471 }
1472
1473 static bool fold_extu(OptContext *ctx, TCGOp *op)
1474 {
1475     uint64_t z_mask_old, z_mask;
1476     bool type_change = false;
1477
1478     if (fold_const1(ctx, op)) {
1479         return true;
1480     }
1481
1482     z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1483
1484     switch (op->opc) {
1485     CASE_OP_32_64(ext8u):
1486         z_mask = (uint8_t)z_mask;
1487         break;
1488     CASE_OP_32_64(ext16u):
1489         z_mask = (uint16_t)z_mask;
1490         break;
1491     case INDEX_op_extrl_i64_i32:
1492     case INDEX_op_extu_i32_i64:
1493         type_change = true;
1494         QEMU_FALLTHROUGH;
1495     case INDEX_op_ext32u_i64:
1496         z_mask = (uint32_t)z_mask;
1497         break;
1498     case INDEX_op_extrh_i64_i32:
1499         type_change = true;
1500         z_mask >>= 32;
1501         break;
1502     default:
1503         g_assert_not_reached();
1504     }
1505
1506     ctx->z_mask = z_mask;
1507     ctx->s_mask = smask_from_zmask(z_mask);
1508     if (!type_change) {
1509         ctx->a_mask = z_mask_old ^ z_mask;
1510     }
1511     return fold_masks(ctx, op);
1512 }
1513
1514 static bool fold_mb(OptContext *ctx, TCGOp *op)
1515 {
1516     /* Eliminate duplicate and redundant fence instructions.  */
1517     if (ctx->prev_mb) {
1518         /*
1519          * Merge two barriers of the same type into one,
1520          * or a weaker barrier into a stronger one,
1521          * or two weaker barriers into a stronger one.
1522          *   mb X; mb Y => mb X|Y
1523          *   mb; strl => mb; st
1524          *   ldaq; mb => ld; mb
1525          *   ldaq; strl => ld; mb; st
1526          * Other combinations are also merged into a strong
1527          * barrier.  This is stricter than specified but for
1528          * the purposes of TCG is better than not optimizing.
1529          */
1530         ctx->prev_mb->args[0] |= op->args[0];
1531         tcg_op_remove(ctx->tcg, op);
1532     } else {
1533         ctx->prev_mb = op;
1534     }
1535     return true;
1536 }
1537
1538 static bool fold_mov(OptContext *ctx, TCGOp *op)
1539 {
1540     return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1541 }
1542
1543 static bool fold_movcond(OptContext *ctx, TCGOp *op)
1544 {
1545     TCGCond cond = op->args[5];
1546     int i;
1547
1548     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1549         op->args[5] = cond = tcg_swap_cond(cond);
1550     }
1551     /*
1552      * Canonicalize the "false" input reg to match the destination reg so
1553      * that the tcg backend can implement a "move if true" operation.
1554      */
1555     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1556         op->args[5] = cond = tcg_invert_cond(cond);
1557     }
1558
1559     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1560     if (i >= 0) {
1561         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1562     }
1563
1564     ctx->z_mask = arg_info(op->args[3])->z_mask
1565                 | arg_info(op->args[4])->z_mask;
1566     ctx->s_mask = arg_info(op->args[3])->s_mask
1567                 & arg_info(op->args[4])->s_mask;
1568
1569     if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1570         uint64_t tv = arg_info(op->args[3])->val;
1571         uint64_t fv = arg_info(op->args[4])->val;
1572         TCGOpcode opc, negopc = 0;
1573
1574         switch (ctx->type) {
1575         case TCG_TYPE_I32:
1576             opc = INDEX_op_setcond_i32;
1577             if (TCG_TARGET_HAS_negsetcond_i32) {
1578                 negopc = INDEX_op_negsetcond_i32;
1579             }
1580             tv = (int32_t)tv;
1581             fv = (int32_t)fv;
1582             break;
1583         case TCG_TYPE_I64:
1584             opc = INDEX_op_setcond_i64;
1585             if (TCG_TARGET_HAS_negsetcond_i64) {
1586                 negopc = INDEX_op_negsetcond_i64;
1587             }
1588             break;
1589         default:
1590             g_assert_not_reached();
1591         }
1592
1593         if (tv == 1 && fv == 0) {
1594             op->opc = opc;
1595             op->args[3] = cond;
1596         } else if (fv == 1 && tv == 0) {
1597             op->opc = opc;
1598             op->args[3] = tcg_invert_cond(cond);
1599         } else if (negopc) {
1600             if (tv == -1 && fv == 0) {
1601                 op->opc = negopc;
1602                 op->args[3] = cond;
1603             } else if (fv == -1 && tv == 0) {
1604                 op->opc = negopc;
1605                 op->args[3] = tcg_invert_cond(cond);
1606             }
1607         }
1608     }
1609     return false;
1610 }
1611
1612 static bool fold_mul(OptContext *ctx, TCGOp *op)
1613 {
1614     if (fold_const2(ctx, op) ||
1615         fold_xi_to_i(ctx, op, 0) ||
1616         fold_xi_to_x(ctx, op, 1)) {
1617         return true;
1618     }
1619     return false;
1620 }
1621
1622 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
1623 {
1624     if (fold_const2_commutative(ctx, op) ||
1625         fold_xi_to_i(ctx, op, 0)) {
1626         return true;
1627     }
1628     return false;
1629 }
1630
1631 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
1632 {
1633     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1634
1635     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1636         uint64_t a = arg_info(op->args[2])->val;
1637         uint64_t b = arg_info(op->args[3])->val;
1638         uint64_t h, l;
1639         TCGArg rl, rh;
1640         TCGOp *op2;
1641
1642         switch (op->opc) {
1643         case INDEX_op_mulu2_i32:
1644             l = (uint64_t)(uint32_t)a * (uint32_t)b;
1645             h = (int32_t)(l >> 32);
1646             l = (int32_t)l;
1647             break;
1648         case INDEX_op_muls2_i32:
1649             l = (int64_t)(int32_t)a * (int32_t)b;
1650             h = l >> 32;
1651             l = (int32_t)l;
1652             break;
1653         case INDEX_op_mulu2_i64:
1654             mulu64(&l, &h, a, b);
1655             break;
1656         case INDEX_op_muls2_i64:
1657             muls64(&l, &h, a, b);
1658             break;
1659         default:
1660             g_assert_not_reached();
1661         }
1662
1663         rl = op->args[0];
1664         rh = op->args[1];
1665
1666         /* The proper opcode is supplied by tcg_opt_gen_mov. */
1667         op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
1668
1669         tcg_opt_gen_movi(ctx, op, rl, l);
1670         tcg_opt_gen_movi(ctx, op2, rh, h);
1671         return true;
1672     }
1673     return false;
1674 }
1675
1676 static bool fold_nand(OptContext *ctx, TCGOp *op)
1677 {
1678     if (fold_const2_commutative(ctx, op) ||
1679         fold_xi_to_not(ctx, op, -1)) {
1680         return true;
1681     }
1682
1683     ctx->s_mask = arg_info(op->args[1])->s_mask
1684                 & arg_info(op->args[2])->s_mask;
1685     return false;
1686 }
1687
1688 static bool fold_neg(OptContext *ctx, TCGOp *op)
1689 {
1690     uint64_t z_mask;
1691
1692     if (fold_const1(ctx, op)) {
1693         return true;
1694     }
1695
1696     /* Set to 1 all bits to the left of the rightmost.  */
1697     z_mask = arg_info(op->args[1])->z_mask;
1698     ctx->z_mask = -(z_mask & -z_mask);
1699
1700     /*
1701      * Because of fold_sub_to_neg, we want to always return true,
1702      * via finish_folding.
1703      */
1704     finish_folding(ctx, op);
1705     return true;
1706 }
1707
1708 static bool fold_nor(OptContext *ctx, TCGOp *op)
1709 {
1710     if (fold_const2_commutative(ctx, op) ||
1711         fold_xi_to_not(ctx, op, 0)) {
1712         return true;
1713     }
1714
1715     ctx->s_mask = arg_info(op->args[1])->s_mask
1716                 & arg_info(op->args[2])->s_mask;
1717     return false;
1718 }
1719
1720 static bool fold_not(OptContext *ctx, TCGOp *op)
1721 {
1722     if (fold_const1(ctx, op)) {
1723         return true;
1724     }
1725
1726     ctx->s_mask = arg_info(op->args[1])->s_mask;
1727
1728     /* Because of fold_to_not, we want to always return true, via finish. */
1729     finish_folding(ctx, op);
1730     return true;
1731 }
1732
1733 static bool fold_or(OptContext *ctx, TCGOp *op)
1734 {
1735     if (fold_const2_commutative(ctx, op) ||
1736         fold_xi_to_x(ctx, op, 0) ||
1737         fold_xx_to_x(ctx, op)) {
1738         return true;
1739     }
1740
1741     ctx->z_mask = arg_info(op->args[1])->z_mask
1742                 | arg_info(op->args[2])->z_mask;
1743     ctx->s_mask = arg_info(op->args[1])->s_mask
1744                 & arg_info(op->args[2])->s_mask;
1745     return fold_masks(ctx, op);
1746 }
1747
1748 static bool fold_orc(OptContext *ctx, TCGOp *op)
1749 {
1750     if (fold_const2(ctx, op) ||
1751         fold_xx_to_i(ctx, op, -1) ||
1752         fold_xi_to_x(ctx, op, -1) ||
1753         fold_ix_to_not(ctx, op, 0)) {
1754         return true;
1755     }
1756
1757     ctx->s_mask = arg_info(op->args[1])->s_mask
1758                 & arg_info(op->args[2])->s_mask;
1759     return false;
1760 }
1761
1762 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
1763 {
1764     const TCGOpDef *def = &tcg_op_defs[op->opc];
1765     MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
1766     MemOp mop = get_memop(oi);
1767     int width = 8 * memop_size(mop);
1768
1769     if (width < 64) {
1770         ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
1771         if (!(mop & MO_SIGN)) {
1772             ctx->z_mask = MAKE_64BIT_MASK(0, width);
1773             ctx->s_mask <<= 1;
1774         }
1775     }
1776
1777     /* Opcodes that touch guest memory stop the mb optimization.  */
1778     ctx->prev_mb = NULL;
1779     return false;
1780 }
1781
1782 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
1783 {
1784     /* Opcodes that touch guest memory stop the mb optimization.  */
1785     ctx->prev_mb = NULL;
1786     return false;
1787 }
1788
1789 static bool fold_remainder(OptContext *ctx, TCGOp *op)
1790 {
1791     if (fold_const2(ctx, op) ||
1792         fold_xx_to_i(ctx, op, 0)) {
1793         return true;
1794     }
1795     return false;
1796 }
1797
1798 static bool fold_setcond(OptContext *ctx, TCGOp *op)
1799 {
1800     TCGCond cond = op->args[3];
1801     int i;
1802
1803     if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1804         op->args[3] = cond = tcg_swap_cond(cond);
1805     }
1806
1807     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1808     if (i >= 0) {
1809         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1810     }
1811
1812     ctx->z_mask = 1;
1813     ctx->s_mask = smask_from_zmask(1);
1814     return false;
1815 }
1816
1817 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
1818 {
1819     TCGCond cond = op->args[3];
1820     int i;
1821
1822     if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1823         op->args[3] = cond = tcg_swap_cond(cond);
1824     }
1825
1826     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1827     if (i >= 0) {
1828         return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
1829     }
1830
1831     /* Value is {0,-1} so all bits are repetitions of the sign. */
1832     ctx->s_mask = -1;
1833     return false;
1834 }
1835
1836
1837 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
1838 {
1839     TCGCond cond = op->args[5];
1840     int i, inv = 0;
1841
1842     if (swap_commutative2(&op->args[1], &op->args[3])) {
1843         op->args[5] = cond = tcg_swap_cond(cond);
1844     }
1845
1846     i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
1847     if (i >= 0) {
1848         goto do_setcond_const;
1849     }
1850
1851     switch (cond) {
1852     case TCG_COND_LT:
1853     case TCG_COND_GE:
1854         /*
1855          * Simplify LT/GE comparisons vs zero to a single compare
1856          * vs the high word of the input.
1857          */
1858         if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
1859             arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
1860             goto do_setcond_high;
1861         }
1862         break;
1863
1864     case TCG_COND_NE:
1865         inv = 1;
1866         QEMU_FALLTHROUGH;
1867     case TCG_COND_EQ:
1868         /*
1869          * Simplify EQ/NE comparisons where one of the pairs
1870          * can be simplified.
1871          */
1872         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1873                                      op->args[3], cond);
1874         switch (i ^ inv) {
1875         case 0:
1876             goto do_setcond_const;
1877         case 1:
1878             goto do_setcond_high;
1879         }
1880
1881         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
1882                                      op->args[4], cond);
1883         switch (i ^ inv) {
1884         case 0:
1885             goto do_setcond_const;
1886         case 1:
1887             op->args[2] = op->args[3];
1888             op->args[3] = cond;
1889             op->opc = INDEX_op_setcond_i32;
1890             break;
1891         }
1892         break;
1893
1894     default:
1895         break;
1896
1897     do_setcond_high:
1898         op->args[1] = op->args[2];
1899         op->args[2] = op->args[4];
1900         op->args[3] = cond;
1901         op->opc = INDEX_op_setcond_i32;
1902         break;
1903     }
1904
1905     ctx->z_mask = 1;
1906     ctx->s_mask = smask_from_zmask(1);
1907     return false;
1908
1909  do_setcond_const:
1910     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1911 }
1912
1913 static bool fold_sextract(OptContext *ctx, TCGOp *op)
1914 {
1915     uint64_t z_mask, s_mask, s_mask_old;
1916     int pos = op->args[2];
1917     int len = op->args[3];
1918
1919     if (arg_is_const(op->args[1])) {
1920         uint64_t t;
1921
1922         t = arg_info(op->args[1])->val;
1923         t = sextract64(t, pos, len);
1924         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1925     }
1926
1927     z_mask = arg_info(op->args[1])->z_mask;
1928     z_mask = sextract64(z_mask, pos, len);
1929     ctx->z_mask = z_mask;
1930
1931     s_mask_old = arg_info(op->args[1])->s_mask;
1932     s_mask = sextract64(s_mask_old, pos, len);
1933     s_mask |= MAKE_64BIT_MASK(len, 64 - len);
1934     ctx->s_mask = s_mask;
1935
1936     if (pos == 0) {
1937         ctx->a_mask = s_mask & ~s_mask_old;
1938     }
1939
1940     return fold_masks(ctx, op);
1941 }
1942
1943 static bool fold_shift(OptContext *ctx, TCGOp *op)
1944 {
1945     uint64_t s_mask, z_mask, sign;
1946
1947     if (fold_const2(ctx, op) ||
1948         fold_ix_to_i(ctx, op, 0) ||
1949         fold_xi_to_x(ctx, op, 0)) {
1950         return true;
1951     }
1952
1953     s_mask = arg_info(op->args[1])->s_mask;
1954     z_mask = arg_info(op->args[1])->z_mask;
1955
1956     if (arg_is_const(op->args[2])) {
1957         int sh = arg_info(op->args[2])->val;
1958
1959         ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
1960
1961         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
1962         ctx->s_mask = smask_from_smask(s_mask);
1963
1964         return fold_masks(ctx, op);
1965     }
1966
1967     switch (op->opc) {
1968     CASE_OP_32_64(sar):
1969         /*
1970          * Arithmetic right shift will not reduce the number of
1971          * input sign repetitions.
1972          */
1973         ctx->s_mask = s_mask;
1974         break;
1975     CASE_OP_32_64(shr):
1976         /*
1977          * If the sign bit is known zero, then logical right shift
1978          * will not reduced the number of input sign repetitions.
1979          */
1980         sign = (s_mask & -s_mask) >> 1;
1981         if (!(z_mask & sign)) {
1982             ctx->s_mask = s_mask;
1983         }
1984         break;
1985     default:
1986         break;
1987     }
1988
1989     return false;
1990 }
1991
1992 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
1993 {
1994     TCGOpcode neg_op;
1995     bool have_neg;
1996
1997     if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
1998         return false;
1999     }
2000
2001     switch (ctx->type) {
2002     case TCG_TYPE_I32:
2003         neg_op = INDEX_op_neg_i32;
2004         have_neg = TCG_TARGET_HAS_neg_i32;
2005         break;
2006     case TCG_TYPE_I64:
2007         neg_op = INDEX_op_neg_i64;
2008         have_neg = TCG_TARGET_HAS_neg_i64;
2009         break;
2010     case TCG_TYPE_V64:
2011     case TCG_TYPE_V128:
2012     case TCG_TYPE_V256:
2013         neg_op = INDEX_op_neg_vec;
2014         have_neg = (TCG_TARGET_HAS_neg_vec &&
2015                     tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2016         break;
2017     default:
2018         g_assert_not_reached();
2019     }
2020     if (have_neg) {
2021         op->opc = neg_op;
2022         op->args[1] = op->args[2];
2023         return fold_neg(ctx, op);
2024     }
2025     return false;
2026 }
2027
2028 /* We cannot as yet do_constant_folding with vectors. */
2029 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2030 {
2031     if (fold_xx_to_i(ctx, op, 0) ||
2032         fold_xi_to_x(ctx, op, 0) ||
2033         fold_sub_to_neg(ctx, op)) {
2034         return true;
2035     }
2036     return false;
2037 }
2038
2039 static bool fold_sub(OptContext *ctx, TCGOp *op)
2040 {
2041     return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
2042 }
2043
2044 static bool fold_sub2(OptContext *ctx, TCGOp *op)
2045 {
2046     return fold_addsub2(ctx, op, false);
2047 }
2048
2049 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2050 {
2051     /* We can't do any folding with a load, but we can record bits. */
2052     switch (op->opc) {
2053     CASE_OP_32_64(ld8s):
2054         ctx->s_mask = MAKE_64BIT_MASK(8, 56);
2055         break;
2056     CASE_OP_32_64(ld8u):
2057         ctx->z_mask = MAKE_64BIT_MASK(0, 8);
2058         ctx->s_mask = MAKE_64BIT_MASK(9, 55);
2059         break;
2060     CASE_OP_32_64(ld16s):
2061         ctx->s_mask = MAKE_64BIT_MASK(16, 48);
2062         break;
2063     CASE_OP_32_64(ld16u):
2064         ctx->z_mask = MAKE_64BIT_MASK(0, 16);
2065         ctx->s_mask = MAKE_64BIT_MASK(17, 47);
2066         break;
2067     case INDEX_op_ld32s_i64:
2068         ctx->s_mask = MAKE_64BIT_MASK(32, 32);
2069         break;
2070     case INDEX_op_ld32u_i64:
2071         ctx->z_mask = MAKE_64BIT_MASK(0, 32);
2072         ctx->s_mask = MAKE_64BIT_MASK(33, 31);
2073         break;
2074     default:
2075         g_assert_not_reached();
2076     }
2077     return false;
2078 }
2079
2080 static bool fold_xor(OptContext *ctx, TCGOp *op)
2081 {
2082     if (fold_const2_commutative(ctx, op) ||
2083         fold_xx_to_i(ctx, op, 0) ||
2084         fold_xi_to_x(ctx, op, 0) ||
2085         fold_xi_to_not(ctx, op, -1)) {
2086         return true;
2087     }
2088
2089     ctx->z_mask = arg_info(op->args[1])->z_mask
2090                 | arg_info(op->args[2])->z_mask;
2091     ctx->s_mask = arg_info(op->args[1])->s_mask
2092                 & arg_info(op->args[2])->s_mask;
2093     return fold_masks(ctx, op);
2094 }
2095
2096 /* Propagate constants and copies, fold constant expressions. */
2097 void tcg_optimize(TCGContext *s)
2098 {
2099     int nb_temps, i;
2100     TCGOp *op, *op_next;
2101     OptContext ctx = { .tcg = s };
2102
2103     /* Array VALS has an element for each temp.
2104        If this temp holds a constant then its value is kept in VALS' element.
2105        If this temp is a copy of other ones then the other copies are
2106        available through the doubly linked circular list. */
2107
2108     nb_temps = s->nb_temps;
2109     for (i = 0; i < nb_temps; ++i) {
2110         s->temps[i].state_ptr = NULL;
2111     }
2112
2113     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2114         TCGOpcode opc = op->opc;
2115         const TCGOpDef *def;
2116         bool done = false;
2117
2118         /* Calls are special. */
2119         if (opc == INDEX_op_call) {
2120             fold_call(&ctx, op);
2121             continue;
2122         }
2123
2124         def = &tcg_op_defs[opc];
2125         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2126         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2127
2128         /* Pre-compute the type of the operation. */
2129         if (def->flags & TCG_OPF_VECTOR) {
2130             ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
2131         } else if (def->flags & TCG_OPF_64BIT) {
2132             ctx.type = TCG_TYPE_I64;
2133         } else {
2134             ctx.type = TCG_TYPE_I32;
2135         }
2136
2137         /* Assume all bits affected, no bits known zero, no sign reps. */
2138         ctx.a_mask = -1;
2139         ctx.z_mask = -1;
2140         ctx.s_mask = 0;
2141
2142         /*
2143          * Process each opcode.
2144          * Sorted alphabetically by opcode as much as possible.
2145          */
2146         switch (opc) {
2147         CASE_OP_32_64(add):
2148             done = fold_add(&ctx, op);
2149             break;
2150         case INDEX_op_add_vec:
2151             done = fold_add_vec(&ctx, op);
2152             break;
2153         CASE_OP_32_64(add2):
2154             done = fold_add2(&ctx, op);
2155             break;
2156         CASE_OP_32_64_VEC(and):
2157             done = fold_and(&ctx, op);
2158             break;
2159         CASE_OP_32_64_VEC(andc):
2160             done = fold_andc(&ctx, op);
2161             break;
2162         CASE_OP_32_64(brcond):
2163             done = fold_brcond(&ctx, op);
2164             break;
2165         case INDEX_op_brcond2_i32:
2166             done = fold_brcond2(&ctx, op);
2167             break;
2168         CASE_OP_32_64(bswap16):
2169         CASE_OP_32_64(bswap32):
2170         case INDEX_op_bswap64_i64:
2171             done = fold_bswap(&ctx, op);
2172             break;
2173         CASE_OP_32_64(clz):
2174         CASE_OP_32_64(ctz):
2175             done = fold_count_zeros(&ctx, op);
2176             break;
2177         CASE_OP_32_64(ctpop):
2178             done = fold_ctpop(&ctx, op);
2179             break;
2180         CASE_OP_32_64(deposit):
2181             done = fold_deposit(&ctx, op);
2182             break;
2183         CASE_OP_32_64(div):
2184         CASE_OP_32_64(divu):
2185             done = fold_divide(&ctx, op);
2186             break;
2187         case INDEX_op_dup_vec:
2188             done = fold_dup(&ctx, op);
2189             break;
2190         case INDEX_op_dup2_vec:
2191             done = fold_dup2(&ctx, op);
2192             break;
2193         CASE_OP_32_64_VEC(eqv):
2194             done = fold_eqv(&ctx, op);
2195             break;
2196         CASE_OP_32_64(extract):
2197             done = fold_extract(&ctx, op);
2198             break;
2199         CASE_OP_32_64(extract2):
2200             done = fold_extract2(&ctx, op);
2201             break;
2202         CASE_OP_32_64(ext8s):
2203         CASE_OP_32_64(ext16s):
2204         case INDEX_op_ext32s_i64:
2205         case INDEX_op_ext_i32_i64:
2206             done = fold_exts(&ctx, op);
2207             break;
2208         CASE_OP_32_64(ext8u):
2209         CASE_OP_32_64(ext16u):
2210         case INDEX_op_ext32u_i64:
2211         case INDEX_op_extu_i32_i64:
2212         case INDEX_op_extrl_i64_i32:
2213         case INDEX_op_extrh_i64_i32:
2214             done = fold_extu(&ctx, op);
2215             break;
2216         CASE_OP_32_64(ld8s):
2217         CASE_OP_32_64(ld8u):
2218         CASE_OP_32_64(ld16s):
2219         CASE_OP_32_64(ld16u):
2220         case INDEX_op_ld32s_i64:
2221         case INDEX_op_ld32u_i64:
2222             done = fold_tcg_ld(&ctx, op);
2223             break;
2224         case INDEX_op_mb:
2225             done = fold_mb(&ctx, op);
2226             break;
2227         CASE_OP_32_64_VEC(mov):
2228             done = fold_mov(&ctx, op);
2229             break;
2230         CASE_OP_32_64(movcond):
2231             done = fold_movcond(&ctx, op);
2232             break;
2233         CASE_OP_32_64(mul):
2234             done = fold_mul(&ctx, op);
2235             break;
2236         CASE_OP_32_64(mulsh):
2237         CASE_OP_32_64(muluh):
2238             done = fold_mul_highpart(&ctx, op);
2239             break;
2240         CASE_OP_32_64(muls2):
2241         CASE_OP_32_64(mulu2):
2242             done = fold_multiply2(&ctx, op);
2243             break;
2244         CASE_OP_32_64_VEC(nand):
2245             done = fold_nand(&ctx, op);
2246             break;
2247         CASE_OP_32_64(neg):
2248             done = fold_neg(&ctx, op);
2249             break;
2250         CASE_OP_32_64_VEC(nor):
2251             done = fold_nor(&ctx, op);
2252             break;
2253         CASE_OP_32_64_VEC(not):
2254             done = fold_not(&ctx, op);
2255             break;
2256         CASE_OP_32_64_VEC(or):
2257             done = fold_or(&ctx, op);
2258             break;
2259         CASE_OP_32_64_VEC(orc):
2260             done = fold_orc(&ctx, op);
2261             break;
2262         case INDEX_op_qemu_ld_a32_i32:
2263         case INDEX_op_qemu_ld_a64_i32:
2264         case INDEX_op_qemu_ld_a32_i64:
2265         case INDEX_op_qemu_ld_a64_i64:
2266         case INDEX_op_qemu_ld_a32_i128:
2267         case INDEX_op_qemu_ld_a64_i128:
2268             done = fold_qemu_ld(&ctx, op);
2269             break;
2270         case INDEX_op_qemu_st8_a32_i32:
2271         case INDEX_op_qemu_st8_a64_i32:
2272         case INDEX_op_qemu_st_a32_i32:
2273         case INDEX_op_qemu_st_a64_i32:
2274         case INDEX_op_qemu_st_a32_i64:
2275         case INDEX_op_qemu_st_a64_i64:
2276         case INDEX_op_qemu_st_a32_i128:
2277         case INDEX_op_qemu_st_a64_i128:
2278             done = fold_qemu_st(&ctx, op);
2279             break;
2280         CASE_OP_32_64(rem):
2281         CASE_OP_32_64(remu):
2282             done = fold_remainder(&ctx, op);
2283             break;
2284         CASE_OP_32_64(rotl):
2285         CASE_OP_32_64(rotr):
2286         CASE_OP_32_64(sar):
2287         CASE_OP_32_64(shl):
2288         CASE_OP_32_64(shr):
2289             done = fold_shift(&ctx, op);
2290             break;
2291         CASE_OP_32_64(setcond):
2292             done = fold_setcond(&ctx, op);
2293             break;
2294         CASE_OP_32_64(negsetcond):
2295             done = fold_negsetcond(&ctx, op);
2296             break;
2297         case INDEX_op_setcond2_i32:
2298             done = fold_setcond2(&ctx, op);
2299             break;
2300         CASE_OP_32_64(sextract):
2301             done = fold_sextract(&ctx, op);
2302             break;
2303         CASE_OP_32_64(sub):
2304             done = fold_sub(&ctx, op);
2305             break;
2306         case INDEX_op_sub_vec:
2307             done = fold_sub_vec(&ctx, op);
2308             break;
2309         CASE_OP_32_64(sub2):
2310             done = fold_sub2(&ctx, op);
2311             break;
2312         CASE_OP_32_64_VEC(xor):
2313             done = fold_xor(&ctx, op);
2314             break;
2315         default:
2316             break;
2317         }
2318
2319         if (!done) {
2320             finish_folding(&ctx, op);
2321         }
2322     }
2323 }