gcc/expmed.cc

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2022 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158   /* Restore all->reg's mode.  */
 159   PUT_MODE (all->reg, to_mode);
 160 }
 161
 162 static void
 163 init_expmed_one_mode (struct init_expmed_rtl *all,
 164                       machine_mode mode, int speed)
 165 {
 166   int m, n, mode_bitsize;
 167   machine_mode mode_from;
 168
 169   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 170
 171   PUT_MODE (all->reg, mode);
 172   PUT_MODE (all->plus, mode);
 173   PUT_MODE (all->neg, mode);
 174   PUT_MODE (all->mult, mode);
 175   PUT_MODE (all->sdiv, mode);
 176   PUT_MODE (all->udiv, mode);
 177   PUT_MODE (all->sdiv_32, mode);
 178   PUT_MODE (all->smod_32, mode);
 179   PUT_MODE (all->wide_trunc, mode);
 180   PUT_MODE (all->shift, mode);
 181   PUT_MODE (all->shift_mult, mode);
 182   PUT_MODE (all->shift_add, mode);
 183   PUT_MODE (all->shift_sub0, mode);
 184   PUT_MODE (all->shift_sub1, mode);
 185   PUT_MODE (all->zext, mode);
 186   PUT_MODE (all->trunc, mode);
 187
 188   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 189   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 190   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 191   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 192   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 193
 194   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 195                                      <= 2 * add_cost (speed, mode)));
 196   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 197                                      <= 4 * add_cost (speed, mode)));
 198
 199   set_shift_cost (speed, mode, 0, 0);
 200   {
 201     int cost = add_cost (speed, mode);
 202     set_shiftadd_cost (speed, mode, 0, cost);
 203     set_shiftsub0_cost (speed, mode, 0, cost);
 204     set_shiftsub1_cost (speed, mode, 0, cost);
 205   }
 206
 207   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 208   for (m = 1; m < n; m++)
 209     {
 210       XEXP (all->shift, 1) = all->cint[m];
 211       XEXP (all->shift_mult, 1) = all->pow2[m];
 212
 213       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 214       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 215                                                        speed));
 216       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 217                                                         speed));
 218       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 219                                                         speed));
 220     }
 221
 222   scalar_int_mode int_mode_to;
 223   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 224     {
 225       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 226            mode_from = (machine_mode)(mode_from + 1))
 227         init_expmed_one_conv (all, int_mode_to,
 228                               as_a <scalar_int_mode> (mode_from), speed);
 229
 230       scalar_int_mode wider_mode;
 231       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 232           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 233         {
 234           PUT_MODE (all->reg, mode);
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1)
 239             = gen_int_shift_amount (wider_mode, mode_bitsize);
 240
 241           set_mul_widen_cost (speed, wider_mode,
 242                               set_src_cost (all->wide_mult, wider_mode, speed));
 243           set_mul_highpart_cost (speed, int_mode_to,
 244                                  set_src_cost (all->wide_trunc,
 245                                                int_mode_to, speed));
 246         }
 247     }
 248 }
 249
 250 void
 251 init_expmed (void)
 252 {
 253   struct init_expmed_rtl all;
 254   machine_mode mode = QImode;
 255   int m, speed;
 256
 257   memset (&all, 0, sizeof all);
 258   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 259     {
 260       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 261       all.cint[m] = GEN_INT (m);
 262     }
 263
 264   /* Avoid using hard regs in ways which may be unsupported.  */
 265   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 266   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 267   all.neg = gen_rtx_NEG (mode, all.reg);
 268   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 269   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 270   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 271   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 272   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 273   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 274   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 275   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 276   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 277   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 278   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 279   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 280   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 282   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312
 313   ggc_free (all.trunc);
 314   ggc_free (all.shift_sub1);
 315   ggc_free (all.shift_sub0);
 316   ggc_free (all.shift_add);
 317   ggc_free (all.shift_mult);
 318   ggc_free (all.shift);
 319   ggc_free (all.wide_trunc);
 320   ggc_free (all.wide_lshr);
 321   ggc_free (all.wide_mult);
 322   ggc_free (all.zext);
 323   ggc_free (all.smod_32);
 324   ggc_free (all.sdiv_32);
 325   ggc_free (all.udiv);
 326   ggc_free (all.sdiv);
 327   ggc_free (all.mult);
 328   ggc_free (all.neg);
 329   ggc_free (all.plus);
 330   ggc_free (all.reg);
 331 }
 332
 333 /* Return an rtx representing minus the value of X.
 334    MODE is the intended mode of the result,
 335    useful if X is a CONST_INT.  */
 336
 337 rtx
 338 negate_rtx (machine_mode mode, rtx x)
 339 {
 340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 341
 342   if (result == 0)
 343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 344
 345   return result;
 346 }
 347
 348 /* Whether reverse storage order is supported on the target.  */
 349 static int reverse_storage_order_supported = -1;
 350
 351 /* Check whether reverse storage order is supported on the target.  */
 352
 353 static void
 354 check_reverse_storage_order_support (void)
 355 {
 356   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 357     {
 358       reverse_storage_order_supported = 0;
 359       sorry ("reverse scalar storage order");
 360     }
 361   else
 362     reverse_storage_order_supported = 1;
 363 }
 364
 365 /* Whether reverse FP storage order is supported on the target.  */
 366 static int reverse_float_storage_order_supported = -1;
 367
 368 /* Check whether reverse FP storage order is supported on the target.  */
 369
 370 static void
 371 check_reverse_float_storage_order_support (void)
 372 {
 373   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 374     {
 375       reverse_float_storage_order_supported = 0;
 376       sorry ("reverse floating-point scalar storage order");
 377     }
 378   else
 379     reverse_float_storage_order_supported = 1;
 380 }
 381
 382 /* Return an rtx representing value of X with reverse storage order.
 383    MODE is the intended mode of the result,
 384    useful if X is a CONST_INT.  */
 385
 386 rtx
 387 flip_storage_order (machine_mode mode, rtx x)
 388 {
 389   scalar_int_mode int_mode;
 390   rtx result;
 391
 392   if (mode == QImode)
 393     return x;
 394
 395   if (COMPLEX_MODE_P (mode))
 396     {
 397       rtx real = read_complex_part (x, false);
 398       rtx imag = read_complex_part (x, true);
 399
 400       real = flip_storage_order (GET_MODE_INNER (mode), real);
 401       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 402
 403       return gen_rtx_CONCAT (mode, real, imag);
 404     }
 405
 406   if (UNLIKELY (reverse_storage_order_supported < 0))
 407     check_reverse_storage_order_support ();
 408
 409   if (!is_a <scalar_int_mode> (mode, &int_mode))
 410     {
 411       if (FLOAT_MODE_P (mode)
 412           && UNLIKELY (reverse_float_storage_order_supported < 0))
 413         check_reverse_float_storage_order_support ();
 414
 415       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 416           || !targetm.scalar_mode_supported_p (int_mode))
 417         {
 418           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 419           return x;
 420         }
 421       x = gen_lowpart (int_mode, x);
 422     }
 423
 424   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 425   if (result == 0)
 426     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 427
 428   if (int_mode != mode)
 429     result = gen_lowpart (mode, result);
 430
 431   return result;
 432 }
 433
 434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 435    first unit of mode MODE that contains a bitfield of size BITSIZE at
 436    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 437    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 438    of the field within the new memory.  */
 439
 440 static rtx
 441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 442                       unsigned HOST_WIDE_INT bitsize,
 443                       unsigned HOST_WIDE_INT bitnum,
 444                       unsigned HOST_WIDE_INT *new_bitnum)
 445 {
 446   scalar_int_mode imode;
 447   if (mode.exists (&imode))
 448     {
 449       unsigned int unit = GET_MODE_BITSIZE (imode);
 450       *new_bitnum = bitnum % unit;
 451       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 452       return adjust_bitfield_address (mem, imode, offset);
 453     }
 454   else
 455     {
 456       *new_bitnum = bitnum % BITS_PER_UNIT;
 457       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 458       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 459                             / BITS_PER_UNIT);
 460       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 461     }
 462 }
 463
 464 /* The caller wants to perform insertion or extraction PATTERN on a
 465    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 466    BITREGION_START and BITREGION_END are as for store_bit_field
 467    and FIELDMODE is the natural mode of the field.
 468
 469    Search for a mode that is compatible with the memory access
 470    restrictions and (where applicable) with a register insertion or
 471    extraction.  Return the new memory on success, storing the adjusted
 472    bit position in *NEW_BITNUM.  Return null otherwise.  */
 473
 474 static rtx
 475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 476                               rtx op0, HOST_WIDE_INT bitsize,
 477                               HOST_WIDE_INT bitnum,
 478                               poly_uint64 bitregion_start,
 479                               poly_uint64 bitregion_end,
 480                               machine_mode fieldmode,
 481                               unsigned HOST_WIDE_INT *new_bitnum)
 482 {
 483   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 484                                 bitregion_end, MEM_ALIGN (op0),
 485                                 MEM_VOLATILE_P (op0));
 486   scalar_int_mode best_mode;
 487   if (iter.next_mode (&best_mode))
 488     {
 489       /* We can use a memory in BEST_MODE.  See whether this is true for
 490          any wider modes.  All other things being equal, we prefer to
 491          use the widest mode possible because it tends to expose more
 492          CSE opportunities.  */
 493       if (!iter.prefer_smaller_modes ())
 494         {
 495           /* Limit the search to the mode required by the corresponding
 496              register insertion or extraction instruction, if any.  */
 497           scalar_int_mode limit_mode = word_mode;
 498           extraction_insn insn;
 499           if (get_best_reg_extraction_insn (&insn, pattern,
 500                                             GET_MODE_BITSIZE (best_mode),
 501                                             fieldmode))
 502             limit_mode = insn.field_mode;
 503
 504           scalar_int_mode wider_mode;
 505           while (iter.next_mode (&wider_mode)
 506                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 507             best_mode = wider_mode;
 508         }
 509       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 510                                    new_bitnum);
 511     }
 512   return NULL_RTX;
 513 }
 514
 515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 516    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 517    offset is then BITNUM / BITS_PER_UNIT.  */
 518
 519 static bool
 520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 521                      machine_mode struct_mode)
 522 {
 523   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 524   if (BYTES_BIG_ENDIAN)
 525     return (multiple_p (bitnum, BITS_PER_UNIT)
 526             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 527                 || multiple_p (bitnum + bitsize,
 528                                regsize * BITS_PER_UNIT)));
 529   else
 530     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 531 }
 532
 533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 534    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 535    Return false if the access would touch memory outside the range
 536    BITREGION_START to BITREGION_END for conformance to the C++ memory
 537    model.  */
 538
 539 static bool
 540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 541                             unsigned HOST_WIDE_INT bitnum,
 542                             scalar_int_mode fieldmode,
 543                             poly_uint64 bitregion_start,
 544                             poly_uint64 bitregion_end)
 545 {
 546   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 547
 548   /* -fstrict-volatile-bitfields must be enabled and we must have a
 549      volatile MEM.  */
 550   if (!MEM_P (op0)
 551       || !MEM_VOLATILE_P (op0)
 552       || flag_strict_volatile_bitfields <= 0)
 553     return false;
 554
 555   /* The bit size must not be larger than the field mode, and
 556      the field mode must not be larger than a word.  */
 557   if (bitsize > modesize || modesize > BITS_PER_WORD)
 558     return false;
 559
 560   /* Check for cases of unaligned fields that must be split.  */
 561   if (bitnum % modesize + bitsize > modesize)
 562     return false;
 563
 564   /* The memory must be sufficiently aligned for a MODESIZE access.
 565      This condition guarantees, that the memory access will not
 566      touch anything after the end of the structure.  */
 567   if (MEM_ALIGN (op0) < modesize)
 568     return false;
 569
 570   /* Check for cases where the C++ memory model applies.  */
 571   if (maybe_ne (bitregion_end, 0U)
 572       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 573           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 574                        bitregion_end)))
 575     return false;
 576
 577   return true;
 578 }
 579
 580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 581    bit number BITNUM can be treated as a simple value of mode MODE.
 582    Store the byte offset in *BYTENUM if so.  */
 583
 584 static bool
 585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 586                        machine_mode mode, poly_uint64 *bytenum)
 587 {
 588   return (MEM_P (op0)
 589           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 590           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 591           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 592               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 593                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 594 }
 595 \f
 596 /* Try to use instruction INSV to store VALUE into a field of OP0.
 597    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 598    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 599    are as for store_bit_field.  */
 600
 601 static bool
 602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 603                             opt_scalar_int_mode op0_mode,
 604                             unsigned HOST_WIDE_INT bitsize,
 605                             unsigned HOST_WIDE_INT bitnum,
 606                             rtx value, scalar_int_mode value_mode)
 607 {
 608   class expand_operand ops[4];
 609   rtx value1;
 610   rtx xop0 = op0;
 611   rtx_insn *last = get_last_insn ();
 612   bool copy_back = false;
 613
 614   scalar_int_mode op_mode = insv->field_mode;
 615   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 616   if (bitsize == 0 || bitsize > unit)
 617     return false;
 618
 619   if (MEM_P (xop0))
 620     /* Get a reference to the first byte of the field.  */
 621     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 622                                  &bitnum);
 623   else
 624     {
 625       /* Convert from counting within OP0 to counting in OP_MODE.  */
 626       if (BYTES_BIG_ENDIAN)
 627         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 628
 629       /* If xop0 is a register, we need it in OP_MODE
 630          to make it acceptable to the format of insv.  */
 631       if (GET_CODE (xop0) == SUBREG)
 632         {
 633           /* If such a SUBREG can't be created, give up.  */
 634           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 635                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 636             return false;
 637           /* We can't just change the mode, because this might clobber op0,
 638              and we will need the original value of op0 if insv fails.  */
 639           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 640                                  SUBREG_BYTE (xop0));
 641         }
 642       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 643         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 644     }
 645
 646   /* If the destination is a paradoxical subreg such that we need a
 647      truncate to the inner mode, perform the insertion on a temporary and
 648      truncate the result to the original destination.  Note that we can't
 649      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 650      X) 0)) is (reg:N X).  */
 651   if (GET_CODE (xop0) == SUBREG
 652       && REG_P (SUBREG_REG (xop0))
 653       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 654                                          op_mode))
 655     {
 656       rtx tem = gen_reg_rtx (op_mode);
 657       emit_move_insn (tem, xop0);
 658       xop0 = tem;
 659       copy_back = true;
 660     }
 661
 662   /* There are similar overflow check at the start of store_bit_field_1,
 663      but that only check the situation where the field lies completely
 664      outside the register, while there do have situation where the field
 665      lies partialy in the register, we need to adjust bitsize for this
 666      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 667      will broken on those arch support bit insert instruction, like arm, aarch64
 668      etc.  */
 669   if (bitsize + bitnum > unit && bitnum < unit)
 670     {
 671       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 672                "destination object, data truncated into %wu-bit",
 673                bitsize, unit - bitnum);
 674       bitsize = unit - bitnum;
 675     }
 676
 677   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 678      "backwards" from the size of the unit we are inserting into.
 679      Otherwise, we count bits from the most significant on a
 680      BYTES/BITS_BIG_ENDIAN machine.  */
 681
 682   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 683     bitnum = unit - bitsize - bitnum;
 684
 685   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 686   value1 = value;
 687   if (value_mode != op_mode)
 688     {
 689       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 690         {
 691           rtx tmp;
 692           /* Optimization: Don't bother really extending VALUE
 693              if it has all the bits we will actually use.  However,
 694              if we must narrow it, be sure we do it correctly.  */
 695
 696           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 697             {
 698               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 699               if (! tmp)
 700                 tmp = simplify_gen_subreg (op_mode,
 701                                            force_reg (value_mode, value1),
 702                                            value_mode, 0);
 703             }
 704           else
 705             {
 706               tmp = gen_lowpart_if_possible (op_mode, value1);
 707               if (! tmp)
 708                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 709             }
 710           value1 = tmp;
 711         }
 712       else if (CONST_INT_P (value))
 713         value1 = gen_int_mode (INTVAL (value), op_mode);
 714       else
 715         /* Parse phase is supposed to make VALUE's data type
 716            match that of the component reference, which is a type
 717            at least as wide as the field; so VALUE should have
 718            a mode that corresponds to that type.  */
 719         gcc_assert (CONSTANT_P (value));
 720     }
 721
 722   create_fixed_operand (&ops[0], xop0);
 723   create_integer_operand (&ops[1], bitsize);
 724   create_integer_operand (&ops[2], bitnum);
 725   create_input_operand (&ops[3], value1, op_mode);
 726   if (maybe_expand_insn (insv->icode, 4, ops))
 727     {
 728       if (copy_back)
 729         convert_move (op0, xop0, true);
 730       return true;
 731     }
 732   delete_insns_since (last);
 733   return false;
 734 }
 735
 736 /* A subroutine of store_bit_field, with the same arguments.  Return true
 737    if the operation could be implemented.
 738
 739    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 740    no other way of implementing the operation.  If FALLBACK_P is false,
 741    return false instead.
 742
 743    if UNDEFINED_P is true then STR_RTX is undefined and may be set using
 744    a subreg instead.  */
 745
 746 static bool
 747 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 748                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 749                    machine_mode fieldmode,
 750                    rtx value, bool reverse, bool fallback_p, bool undefined_p)
 751 {
 752   rtx op0 = str_rtx;
 753
 754   while (GET_CODE (op0) == SUBREG)
 755     {
 756       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 757       op0 = SUBREG_REG (op0);
 758     }
 759
 760   /* No action is needed if the target is a register and if the field
 761      lies completely outside that register.  This can occur if the source
 762      code contains an out-of-bounds access to a small array.  */
 763   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 764     return true;
 765
 766   /* Use vec_set patterns for inserting parts of vectors whenever
 767      available.  */
 768   machine_mode outermode = GET_MODE (op0);
 769   scalar_mode innermode = GET_MODE_INNER (outermode);
 770   poly_uint64 pos;
 771   if (VECTOR_MODE_P (outermode)
 772       && !MEM_P (op0)
 773       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 774       && fieldmode == innermode
 775       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 776       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 777     {
 778       class expand_operand ops[3];
 779       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 780
 781       create_fixed_operand (&ops[0], op0);
 782       create_input_operand (&ops[1], value, innermode);
 783       create_integer_operand (&ops[2], pos);
 784       if (maybe_expand_insn (icode, 3, ops))
 785         return true;
 786     }
 787
 788   /* If the target is a register, overwriting the entire object, or storing
 789      a full-word or multi-word field can be done with just a SUBREG.  */
 790   if (!MEM_P (op0)
 791       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 792     {
 793       /* Use the subreg machinery either to narrow OP0 to the required
 794          words or to cope with mode punning between equal-sized modes.
 795          In the latter case, use subreg on the rhs side, not lhs.  */
 796       rtx sub;
 797       poly_uint64 bytenum;
 798       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 799       if (known_eq (bitnum, 0U)
 800           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 801         {
 802           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 803           if (sub)
 804             {
 805               if (reverse)
 806                 sub = flip_storage_order (GET_MODE (op0), sub);
 807               emit_move_insn (op0, sub);
 808               return true;
 809             }
 810         }
 811       else if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
 812                && (undefined_p
 813                    || (multiple_p (bitnum, regsize * BITS_PER_UNIT)
 814                        && multiple_p (bitsize, regsize * BITS_PER_UNIT)))
 815                && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
 816         {
 817           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), bytenum);
 818           if (sub)
 819             {
 820               if (reverse)
 821                 value = flip_storage_order (fieldmode, value);
 822               emit_move_insn (sub, value);
 823               return true;
 824             }
 825         }
 826     }
 827
 828   /* If the target is memory, storing any naturally aligned field can be
 829      done with a simple store.  For targets that support fast unaligned
 830      memory, any naturally sized, unit aligned field can be done directly.  */
 831   poly_uint64 bytenum;
 832   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 833     {
 834       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 835       if (reverse)
 836         value = flip_storage_order (fieldmode, value);
 837       emit_move_insn (op0, value);
 838       return true;
 839     }
 840
 841   /* It's possible we'll need to handle other cases here for
 842      polynomial bitnum and bitsize.  */
 843
 844   /* From here on we need to be looking at a fixed-size insertion.  */
 845   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 846   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 847
 848   /* Make sure we are playing with integral modes.  Pun with subregs
 849      if we aren't.  This must come after the entire register case above,
 850      since that case is valid for any mode.  The following cases are only
 851      valid for integral modes.  */
 852   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 853   scalar_int_mode imode;
 854   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 855     {
 856       if (MEM_P (op0))
 857         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 858                                             0, MEM_SIZE (op0));
 859       else if (!op0_mode.exists ())
 860         {
 861           if (ibitnum == 0
 862               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 863               && MEM_P (value)
 864               && !reverse)
 865             {
 866               value = adjust_address (value, GET_MODE (op0), 0);
 867               emit_move_insn (op0, value);
 868               return true;
 869             }
 870           if (!fallback_p)
 871             return false;
 872           rtx temp = assign_stack_temp (GET_MODE (op0),
 873                                         GET_MODE_SIZE (GET_MODE (op0)));
 874           emit_move_insn (temp, op0);
 875           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 876                              reverse, fallback_p, undefined_p);
 877           emit_move_insn (op0, temp);
 878           return true;
 879         }
 880       else
 881         op0 = gen_lowpart (op0_mode.require (), op0);
 882     }
 883
 884   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 885                                    bitregion_start, bitregion_end,
 886                                    fieldmode, value, reverse, fallback_p);
 887 }
 888
 889 /* Subroutine of store_bit_field_1, with the same arguments, except
 890    that BITSIZE and BITNUM are constant.  Handle cases specific to
 891    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 892    otherwise OP0 is a BLKmode MEM.  */
 893
 894 static bool
 895 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 896                           unsigned HOST_WIDE_INT bitsize,
 897                           unsigned HOST_WIDE_INT bitnum,
 898                           poly_uint64 bitregion_start,
 899                           poly_uint64 bitregion_end,
 900                           machine_mode fieldmode,
 901                           rtx value, bool reverse, bool fallback_p)
 902 {
 903   /* Storing an lsb-aligned field in a register
 904      can be done with a movstrict instruction.  */
 905
 906   if (!MEM_P (op0)
 907       && !reverse
 908       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 909       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 910       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 911     {
 912       class expand_operand ops[2];
 913       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 914       rtx arg0 = op0;
 915       unsigned HOST_WIDE_INT subreg_off;
 916
 917       if (GET_CODE (arg0) == SUBREG)
 918         {
 919           /* Else we've got some float mode source being extracted into
 920              a different float mode destination -- this combination of
 921              subregs results in Severe Tire Damage.  */
 922           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 923                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 924                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 925           arg0 = SUBREG_REG (arg0);
 926         }
 927
 928       subreg_off = bitnum / BITS_PER_UNIT;
 929       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
 930           /* STRICT_LOW_PART must have a non-paradoxical subreg as
 931              operand.  */
 932           && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
 933         {
 934           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 935
 936           create_fixed_operand (&ops[0], arg0);
 937           /* Shrink the source operand to FIELDMODE.  */
 938           create_convert_operand_to (&ops[1], value, fieldmode, false);
 939           if (maybe_expand_insn (icode, 2, ops))
 940             return true;
 941         }
 942     }
 943
 944   /* Handle fields bigger than a word.  */
 945
 946   if (bitsize > BITS_PER_WORD)
 947     {
 948       /* Here we transfer the words of the field
 949          in the order least significant first.
 950          This is because the most significant word is the one which may
 951          be less than full.
 952          However, only do that if the value is not BLKmode.  */
 953
 954       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 955       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 956       rtx_insn *last;
 957
 958       /* This is the mode we must force value to, so that there will be enough
 959          subwords to extract.  Note that fieldmode will often (always?) be
 960          VOIDmode, because that is what store_field uses to indicate that this
 961          is a bit field, but passing VOIDmode to operand_subword_force
 962          is not allowed.
 963
 964          The mode must be fixed-size, since insertions into variable-sized
 965          objects are meant to be handled before calling this function.  */
 966       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 967       if (value_mode == VOIDmode)
 968         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 969
 970       last = get_last_insn ();
 971       for (int i = 0; i < nwords; i++)
 972         {
 973           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 974              except maybe for the last iteration.  */
 975           const unsigned HOST_WIDE_INT new_bitsize
 976             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 977           /* Bit offset from the starting bit number in the target.  */
 978           const unsigned int bit_offset
 979             = backwards ^ reverse
 980               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 981               : i * BITS_PER_WORD;
 982           /* Starting word number in the value.  */
 983           const unsigned int wordnum
 984             = backwards
 985               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 986               : i;
 987           /* The chunk of the value in word_mode.  We use bit-field extraction
 988               in BLKmode to handle unaligned memory references and to shift the
 989               last chunk right on big-endian machines if need be.  */
 990           rtx value_word
 991             = fieldmode == BLKmode
 992               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 993                                    1, NULL_RTX, word_mode, word_mode, false,
 994                                    NULL)
 995               : operand_subword_force (value, wordnum, value_mode);
 996
 997           if (!store_bit_field_1 (op0, new_bitsize,
 998                                   bitnum + bit_offset,
 999                                   bitregion_start, bitregion_end,
1000                                   word_mode,
1001                                   value_word, reverse, fallback_p, false))
1002             {
1003               delete_insns_since (last);
1004               return false;
1005             }
1006         }
1007       return true;
1008     }
1009
1010   /* If VALUE has a floating-point or complex mode, access it as an
1011      integer of the corresponding size.  This can occur on a machine
1012      with 64 bit registers that uses SFmode for float.  It can also
1013      occur for unaligned float or complex fields.  */
1014   rtx orig_value = value;
1015   scalar_int_mode value_mode;
1016   if (GET_MODE (value) == VOIDmode)
1017     /* By this point we've dealt with values that are bigger than a word,
1018        so word_mode is a conservatively correct choice.  */
1019     value_mode = word_mode;
1020   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1021     {
1022       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1023       value = gen_reg_rtx (value_mode);
1024       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1025     }
1026
1027   /* If OP0 is a multi-word register, narrow it to the affected word.
1028      If the region spans two words, defer to store_split_bit_field.
1029      Don't do this if op0 is a single hard register wider than word
1030      such as a float or vector register.  */
1031   if (!MEM_P (op0)
1032       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1033       && (!REG_P (op0)
1034           || !HARD_REGISTER_P (op0)
1035           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1036     {
1037       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1038         {
1039           if (!fallback_p)
1040             return false;
1041
1042           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1043                                  bitregion_start, bitregion_end,
1044                                  value, value_mode, reverse);
1045           return true;
1046         }
1047       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1048                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1049       gcc_assert (op0);
1050       op0_mode = word_mode;
1051       bitnum %= BITS_PER_WORD;
1052     }
1053
1054   /* From here on we can assume that the field to be stored in fits
1055      within a word.  If the destination is a register, it too fits
1056      in a word.  */
1057
1058   extraction_insn insv;
1059   if (!MEM_P (op0)
1060       && !reverse
1061       && get_best_reg_extraction_insn (&insv, EP_insv,
1062                                        GET_MODE_BITSIZE (op0_mode.require ()),
1063                                        fieldmode)
1064       && store_bit_field_using_insv (&insv, op0, op0_mode,
1065                                      bitsize, bitnum, value, value_mode))
1066     return true;
1067
1068   /* If OP0 is a memory, try copying it to a register and seeing if a
1069      cheap register alternative is available.  */
1070   if (MEM_P (op0) && !reverse)
1071     {
1072       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1073                                         fieldmode)
1074           && store_bit_field_using_insv (&insv, op0, op0_mode,
1075                                          bitsize, bitnum, value, value_mode))
1076         return true;
1077
1078       rtx_insn *last = get_last_insn ();
1079
1080       /* Try loading part of OP0 into a register, inserting the bitfield
1081          into that, and then copying the result back to OP0.  */
1082       unsigned HOST_WIDE_INT bitpos;
1083       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1084                                                bitregion_start, bitregion_end,
1085                                                fieldmode, &bitpos);
1086       if (xop0)
1087         {
1088           rtx tempreg = copy_to_reg (xop0);
1089           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1090                                  bitregion_start, bitregion_end,
1091                                  fieldmode, orig_value, reverse, false, false))
1092             {
1093               emit_move_insn (xop0, tempreg);
1094               return true;
1095             }
1096           delete_insns_since (last);
1097         }
1098     }
1099
1100   if (!fallback_p)
1101     return false;
1102
1103   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1104                          bitregion_end, value, value_mode, reverse);
1105   return true;
1106 }
1107
1108 /* Generate code to store value from rtx VALUE
1109    into a bit-field within structure STR_RTX
1110    containing BITSIZE bits starting at bit BITNUM.
1111
1112    BITREGION_START is bitpos of the first bitfield in this region.
1113    BITREGION_END is the bitpos of the ending bitfield in this region.
1114    These two fields are 0, if the C++ memory model does not apply,
1115    or we are not interested in keeping track of bitfield regions.
1116
1117    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1118
1119    If REVERSE is true, the store is to be done in reverse order.
1120
1121    If UNDEFINED_P is true then STR_RTX is currently undefined.  */
1122
1123 void
1124 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1125                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1126                  machine_mode fieldmode,
1127                  rtx value, bool reverse, bool undefined_p)
1128 {
1129   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1130   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1131   scalar_int_mode int_mode;
1132   if (bitsize.is_constant (&ibitsize)
1133       && bitnum.is_constant (&ibitnum)
1134       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1135       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1136                                      bitregion_start, bitregion_end))
1137     {
1138       /* Storing of a full word can be done with a simple store.
1139          We know here that the field can be accessed with one single
1140          instruction.  For targets that support unaligned memory,
1141          an unaligned access may be necessary.  */
1142       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1143         {
1144           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1145                                              ibitnum / BITS_PER_UNIT);
1146           if (reverse)
1147             value = flip_storage_order (int_mode, value);
1148           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1149           emit_move_insn (str_rtx, value);
1150         }
1151       else
1152         {
1153           rtx temp;
1154
1155           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1156                                           ibitnum, &ibitnum);
1157           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1158           temp = copy_to_reg (str_rtx);
1159           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1160                                   int_mode, value, reverse, true, undefined_p))
1161             gcc_unreachable ();
1162
1163           emit_move_insn (str_rtx, temp);
1164         }
1165
1166       return;
1167     }
1168
1169   /* Under the C++0x memory model, we must not touch bits outside the
1170      bit region.  Adjust the address to start at the beginning of the
1171      bit region.  */
1172   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1173     {
1174       scalar_int_mode best_mode;
1175       machine_mode addr_mode = VOIDmode;
1176
1177       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1178       bitnum -= bitregion_start;
1179       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1180       bitregion_end -= bitregion_start;
1181       bitregion_start = 0;
1182       if (bitsize.is_constant (&ibitsize)
1183           && bitnum.is_constant (&ibitnum)
1184           && get_best_mode (ibitsize, ibitnum,
1185                             bitregion_start, bitregion_end,
1186                             MEM_ALIGN (str_rtx), INT_MAX,
1187                             MEM_VOLATILE_P (str_rtx), &best_mode))
1188         addr_mode = best_mode;
1189       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1190                                               offset, size);
1191     }
1192
1193   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1194                           bitregion_start, bitregion_end,
1195                           fieldmode, value, reverse, true, undefined_p))
1196     gcc_unreachable ();
1197 }
1198 \f
1199 /* Use shifts and boolean operations to store VALUE into a bit field of
1200    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1201    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1202    the mode of VALUE.
1203
1204    If REVERSE is true, the store is to be done in reverse order.  */
1205
1206 static void
1207 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1208                        unsigned HOST_WIDE_INT bitsize,
1209                        unsigned HOST_WIDE_INT bitnum,
1210                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1211                        rtx value, scalar_int_mode value_mode, bool reverse)
1212 {
1213   /* There is a case not handled here:
1214      a structure with a known alignment of just a halfword
1215      and a field split across two aligned halfwords within the structure.
1216      Or likewise a structure with a known alignment of just a byte
1217      and a field split across two bytes.
1218      Such cases are not supposed to be able to occur.  */
1219
1220   scalar_int_mode best_mode;
1221   if (MEM_P (op0))
1222     {
1223       unsigned int max_bitsize = BITS_PER_WORD;
1224       scalar_int_mode imode;
1225       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1226         max_bitsize = GET_MODE_BITSIZE (imode);
1227
1228       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1229                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1230                           &best_mode))
1231         {
1232           /* The only way this should occur is if the field spans word
1233              boundaries.  */
1234           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1235                                  bitregion_start, bitregion_end,
1236                                  value, value_mode, reverse);
1237           return;
1238         }
1239
1240       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1241     }
1242   else
1243     best_mode = op0_mode.require ();
1244
1245   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1246                            value, value_mode, reverse);
1247 }
1248
1249 /* Helper function for store_fixed_bit_field, stores
1250    the bit field always using MODE, which is the mode of OP0.  The other
1251    arguments are as for store_fixed_bit_field.  */
1252
1253 static void
1254 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1255                          unsigned HOST_WIDE_INT bitsize,
1256                          unsigned HOST_WIDE_INT bitnum,
1257                          rtx value, scalar_int_mode value_mode, bool reverse)
1258 {
1259   rtx temp;
1260   int all_zero = 0;
1261   int all_one = 0;
1262
1263   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1264      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1265
1266   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1267     /* BITNUM is the distance between our msb
1268        and that of the containing datum.
1269        Convert it to the distance from the lsb.  */
1270     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1271
1272   /* Now BITNUM is always the distance between our lsb
1273      and that of OP0.  */
1274
1275   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1276      we must first convert its mode to MODE.  */
1277
1278   if (CONST_INT_P (value))
1279     {
1280       unsigned HOST_WIDE_INT v = UINTVAL (value);
1281
1282       if (bitsize < HOST_BITS_PER_WIDE_INT)
1283         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1284
1285       if (v == 0)
1286         all_zero = 1;
1287       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1288                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1289                || (bitsize == HOST_BITS_PER_WIDE_INT
1290                    && v == HOST_WIDE_INT_M1U))
1291         all_one = 1;
1292
1293       value = lshift_value (mode, v, bitnum);
1294     }
1295   else
1296     {
1297       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1298                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1299
1300       if (value_mode != mode)
1301         value = convert_to_mode (mode, value, 1);
1302
1303       if (must_and)
1304         value = expand_binop (mode, and_optab, value,
1305                               mask_rtx (mode, 0, bitsize, 0),
1306                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1307       if (bitnum > 0)
1308         value = expand_shift (LSHIFT_EXPR, mode, value,
1309                               bitnum, NULL_RTX, 1);
1310     }
1311
1312   if (reverse)
1313     value = flip_storage_order (mode, value);
1314
1315   /* Now clear the chosen bits in OP0,
1316      except that if VALUE is -1 we need not bother.  */
1317   /* We keep the intermediates in registers to allow CSE to combine
1318      consecutive bitfield assignments.  */
1319
1320   temp = force_reg (mode, op0);
1321
1322   if (! all_one)
1323     {
1324       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1325       if (reverse)
1326         mask = flip_storage_order (mode, mask);
1327       temp = expand_binop (mode, and_optab, temp, mask,
1328                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1329       temp = force_reg (mode, temp);
1330     }
1331
1332   /* Now logical-or VALUE into OP0, unless it is zero.  */
1333
1334   if (! all_zero)
1335     {
1336       temp = expand_binop (mode, ior_optab, temp, value,
1337                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1338       temp = force_reg (mode, temp);
1339     }
1340
1341   if (op0 != temp)
1342     {
1343       op0 = copy_rtx (op0);
1344       emit_move_insn (op0, temp);
1345     }
1346 }
1347 \f
1348 /* Store a bit field that is split across multiple accessible memory objects.
1349
1350    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1351    BITSIZE is the field width; BITPOS the position of its first bit
1352    (within the word).
1353    VALUE is the value to store, which has mode VALUE_MODE.
1354    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1355    a BLKmode MEM.
1356
1357    If REVERSE is true, the store is to be done in reverse order.
1358
1359    This does not yet handle fields wider than BITS_PER_WORD.  */
1360
1361 static void
1362 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1363                        unsigned HOST_WIDE_INT bitsize,
1364                        unsigned HOST_WIDE_INT bitpos,
1365                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1366                        rtx value, scalar_int_mode value_mode, bool reverse)
1367 {
1368   unsigned int unit, total_bits, bitsdone = 0;
1369
1370   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1371      much at a time.  */
1372   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1373     unit = BITS_PER_WORD;
1374   else
1375     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1376
1377   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1378      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1379      again, and we will mutually recurse forever.  */
1380   if (MEM_P (op0) && op0_mode.exists ())
1381     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1382
1383   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1384      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1385      that VALUE might be a floating-point constant.  */
1386   if (CONSTANT_P (value) && !CONST_INT_P (value))
1387     {
1388       rtx word = gen_lowpart_common (word_mode, value);
1389
1390       if (word && (value != word))
1391         value = word;
1392       else
1393         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1394       value_mode = word_mode;
1395     }
1396
1397   total_bits = GET_MODE_BITSIZE (value_mode);
1398
1399   while (bitsdone < bitsize)
1400     {
1401       unsigned HOST_WIDE_INT thissize;
1402       unsigned HOST_WIDE_INT thispos;
1403       unsigned HOST_WIDE_INT offset;
1404       rtx part;
1405
1406       offset = (bitpos + bitsdone) / unit;
1407       thispos = (bitpos + bitsdone) % unit;
1408
1409       /* When region of bytes we can touch is restricted, decrease
1410          UNIT close to the end of the region as needed.  If op0 is a REG
1411          or SUBREG of REG, don't do this, as there can't be data races
1412          on a register and we can expand shorter code in some cases.  */
1413       if (maybe_ne (bitregion_end, 0U)
1414           && unit > BITS_PER_UNIT
1415           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1416           && !REG_P (op0)
1417           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1418         {
1419           unit = unit / 2;
1420           continue;
1421         }
1422
1423       /* THISSIZE must not overrun a word boundary.  Otherwise,
1424          store_fixed_bit_field will call us again, and we will mutually
1425          recurse forever.  */
1426       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1427       thissize = MIN (thissize, unit - thispos);
1428
1429       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1430         {
1431           /* Fetch successively less significant portions.  */
1432           if (CONST_INT_P (value))
1433             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1434                              >> (bitsize - bitsdone - thissize))
1435                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1436           /* Likewise, but the source is little-endian.  */
1437           else if (reverse)
1438             part = extract_fixed_bit_field (word_mode, value, value_mode,
1439                                             thissize,
1440                                             bitsize - bitsdone - thissize,
1441                                             NULL_RTX, 1, false);
1442           else
1443             /* The args are chosen so that the last part includes the
1444                lsb.  Give extract_bit_field the value it needs (with
1445                endianness compensation) to fetch the piece we want.  */
1446             part = extract_fixed_bit_field (word_mode, value, value_mode,
1447                                             thissize,
1448                                             total_bits - bitsize + bitsdone,
1449                                             NULL_RTX, 1, false);
1450         }
1451       else
1452         {
1453           /* Fetch successively more significant portions.  */
1454           if (CONST_INT_P (value))
1455             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1456                              >> bitsdone)
1457                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1458           /* Likewise, but the source is big-endian.  */
1459           else if (reverse)
1460             part = extract_fixed_bit_field (word_mode, value, value_mode,
1461                                             thissize,
1462                                             total_bits - bitsdone - thissize,
1463                                             NULL_RTX, 1, false);
1464           else
1465             part = extract_fixed_bit_field (word_mode, value, value_mode,
1466                                             thissize, bitsdone, NULL_RTX,
1467                                             1, false);
1468         }
1469
1470       /* If OP0 is a register, then handle OFFSET here.  */
1471       rtx op0_piece = op0;
1472       opt_scalar_int_mode op0_piece_mode = op0_mode;
1473       if (SUBREG_P (op0) || REG_P (op0))
1474         {
1475           scalar_int_mode imode;
1476           if (op0_mode.exists (&imode)
1477               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1478             {
1479               if (offset)
1480                 op0_piece = const0_rtx;
1481             }
1482           else
1483             {
1484               op0_piece = operand_subword_force (op0,
1485                                                  offset * unit / BITS_PER_WORD,
1486                                                  GET_MODE (op0));
1487               op0_piece_mode = word_mode;
1488             }
1489           offset &= BITS_PER_WORD / unit - 1;
1490         }
1491
1492       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1493          it is just an out-of-bounds access.  Ignore it.  */
1494       if (op0_piece != const0_rtx)
1495         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1496                                offset * unit + thispos, bitregion_start,
1497                                bitregion_end, part, word_mode, reverse);
1498       bitsdone += thissize;
1499     }
1500 }
1501 \f
1502 /* A subroutine of extract_bit_field_1 that converts return value X
1503    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1504    to extract_bit_field.  */
1505
1506 static rtx
1507 convert_extracted_bit_field (rtx x, machine_mode mode,
1508                              machine_mode tmode, bool unsignedp)
1509 {
1510   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1511     return x;
1512
1513   /* If the x mode is not a scalar integral, first convert to the
1514      integer mode of that size and then access it as a floating-point
1515      value via a SUBREG.  */
1516   if (!SCALAR_INT_MODE_P (tmode))
1517     {
1518       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1519       x = convert_to_mode (int_mode, x, unsignedp);
1520       x = force_reg (int_mode, x);
1521       return gen_lowpart (tmode, x);
1522     }
1523
1524   return convert_to_mode (tmode, x, unsignedp);
1525 }
1526
1527 /* Try to use an ext(z)v pattern to extract a field from OP0.
1528    Return the extracted value on success, otherwise return null.
1529    EXTV describes the extraction instruction to use.  If OP0_MODE
1530    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1531    The other arguments are as for extract_bit_field.  */
1532
1533 static rtx
1534 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1535                               opt_scalar_int_mode op0_mode,
1536                               unsigned HOST_WIDE_INT bitsize,
1537                               unsigned HOST_WIDE_INT bitnum,
1538                               int unsignedp, rtx target,
1539                               machine_mode mode, machine_mode tmode)
1540 {
1541   class expand_operand ops[4];
1542   rtx spec_target = target;
1543   rtx spec_target_subreg = 0;
1544   scalar_int_mode ext_mode = extv->field_mode;
1545   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1546
1547   if (bitsize == 0 || unit < bitsize)
1548     return NULL_RTX;
1549
1550   if (MEM_P (op0))
1551     /* Get a reference to the first byte of the field.  */
1552     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1553                                 &bitnum);
1554   else
1555     {
1556       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1557       if (BYTES_BIG_ENDIAN)
1558         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1559
1560       /* If op0 is a register, we need it in EXT_MODE to make it
1561          acceptable to the format of ext(z)v.  */
1562       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1563         return NULL_RTX;
1564       if (REG_P (op0) && op0_mode.require () != ext_mode)
1565         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1566     }
1567
1568   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1569      "backwards" from the size of the unit we are extracting from.
1570      Otherwise, we count bits from the most significant on a
1571      BYTES/BITS_BIG_ENDIAN machine.  */
1572
1573   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1574     bitnum = unit - bitsize - bitnum;
1575
1576   if (target == 0)
1577     target = spec_target = gen_reg_rtx (tmode);
1578
1579   if (GET_MODE (target) != ext_mode)
1580     {
1581       rtx temp;
1582       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1583          between the mode of the extraction (word_mode) and the target
1584          mode.  Instead, create a temporary and use convert_move to set
1585          the target.  */
1586       if (REG_P (target)
1587           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1588           && (temp = gen_lowpart_if_possible (ext_mode, target)))
1589         {
1590           target = temp;
1591           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1592             spec_target_subreg = target;
1593         }
1594       else
1595         target = gen_reg_rtx (ext_mode);
1596     }
1597
1598   create_output_operand (&ops[0], target, ext_mode);
1599   create_fixed_operand (&ops[1], op0);
1600   create_integer_operand (&ops[2], bitsize);
1601   create_integer_operand (&ops[3], bitnum);
1602   if (maybe_expand_insn (extv->icode, 4, ops))
1603     {
1604       target = ops[0].value;
1605       if (target == spec_target)
1606         return target;
1607       if (target == spec_target_subreg)
1608         return spec_target;
1609       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1610     }
1611   return NULL_RTX;
1612 }
1613
1614 /* See whether it would be valid to extract the part of OP0 with
1615    mode OP0_MODE described by BITNUM and BITSIZE into a value of
1616    mode MODE using a subreg operation.
1617    Return the subreg if so, otherwise return null.  */
1618
1619 static rtx
1620 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1621                              machine_mode op0_mode,
1622                              poly_uint64 bitsize, poly_uint64 bitnum)
1623 {
1624   poly_uint64 bytenum;
1625   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1626       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1627       && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1628       && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1629     return simplify_gen_subreg (mode, op0, op0_mode, bytenum);
1630   return NULL_RTX;
1631 }
1632
1633 /* A subroutine of extract_bit_field, with the same arguments.
1634    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1635    if we can find no other means of implementing the operation.
1636    if FALLBACK_P is false, return NULL instead.  */
1637
1638 static rtx
1639 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1640                      int unsignedp, rtx target, machine_mode mode,
1641                      machine_mode tmode, bool reverse, bool fallback_p,
1642                      rtx *alt_rtl)
1643 {
1644   rtx op0 = str_rtx;
1645   machine_mode mode1;
1646
1647   if (tmode == VOIDmode)
1648     tmode = mode;
1649
1650   while (GET_CODE (op0) == SUBREG)
1651     {
1652       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1653       op0 = SUBREG_REG (op0);
1654     }
1655
1656   /* If we have an out-of-bounds access to a register, just return an
1657      uninitialized register of the required mode.  This can occur if the
1658      source code contains an out-of-bounds access to a small array.  */
1659   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1660     return gen_reg_rtx (tmode);
1661
1662   if (REG_P (op0)
1663       && mode == GET_MODE (op0)
1664       && known_eq (bitnum, 0U)
1665       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1666     {
1667       if (reverse)
1668         op0 = flip_storage_order (mode, op0);
1669       /* We're trying to extract a full register from itself.  */
1670       return op0;
1671     }
1672
1673   /* First try to check for vector from vector extractions.  */
1674   if (VECTOR_MODE_P (GET_MODE (op0))
1675       && !MEM_P (op0)
1676       && VECTOR_MODE_P (tmode)
1677       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1678       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1679     {
1680       machine_mode new_mode = GET_MODE (op0);
1681       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1682         {
1683           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1684           poly_uint64 nunits;
1685           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1686                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1687               || !related_vector_mode (tmode, inner_mode,
1688                                        nunits).exists (&new_mode)
1689               || maybe_ne (GET_MODE_SIZE (new_mode),
1690                            GET_MODE_SIZE (GET_MODE (op0))))
1691             new_mode = VOIDmode;
1692         }
1693       poly_uint64 pos;
1694       if (new_mode != VOIDmode
1695           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1696               != CODE_FOR_nothing)
1697           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1698         {
1699           class expand_operand ops[3];
1700           machine_mode outermode = new_mode;
1701           machine_mode innermode = tmode;
1702           enum insn_code icode
1703             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1704
1705           if (new_mode != GET_MODE (op0))
1706             op0 = gen_lowpart (new_mode, op0);
1707           create_output_operand (&ops[0], target, innermode);
1708           ops[0].target = 1;
1709           create_input_operand (&ops[1], op0, outermode);
1710           create_integer_operand (&ops[2], pos);
1711           if (maybe_expand_insn (icode, 3, ops))
1712             {
1713               if (alt_rtl && ops[0].target)
1714                 *alt_rtl = target;
1715               target = ops[0].value;
1716               if (GET_MODE (target) != mode)
1717                 return gen_lowpart (tmode, target);
1718               return target;
1719             }
1720         }
1721     }
1722
1723   /* See if we can get a better vector mode before extracting.  */
1724   if (VECTOR_MODE_P (GET_MODE (op0))
1725       && !MEM_P (op0)
1726       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1727     {
1728       machine_mode new_mode;
1729
1730       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1731         new_mode = MIN_MODE_VECTOR_FLOAT;
1732       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1733         new_mode = MIN_MODE_VECTOR_FRACT;
1734       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1735         new_mode = MIN_MODE_VECTOR_UFRACT;
1736       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1737         new_mode = MIN_MODE_VECTOR_ACCUM;
1738       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1739         new_mode = MIN_MODE_VECTOR_UACCUM;
1740       else
1741         new_mode = MIN_MODE_VECTOR_INT;
1742
1743       FOR_EACH_MODE_FROM (new_mode, new_mode)
1744         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1745             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1746             && targetm.vector_mode_supported_p (new_mode)
1747             && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1748           break;
1749       if (new_mode != VOIDmode)
1750         op0 = gen_lowpart (new_mode, op0);
1751     }
1752
1753   /* Use vec_extract patterns for extracting parts of vectors whenever
1754      available.  If that fails, see whether the current modes and bitregion
1755      give a natural subreg.  */
1756   machine_mode outermode = GET_MODE (op0);
1757   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1758     {
1759       scalar_mode innermode = GET_MODE_INNER (outermode);
1760       enum insn_code icode
1761         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1762       poly_uint64 pos;
1763       if (icode != CODE_FOR_nothing
1764           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1765           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1766         {
1767           class expand_operand ops[3];
1768
1769           create_output_operand (&ops[0], target, innermode);
1770           ops[0].target = 1;
1771           create_input_operand (&ops[1], op0, outermode);
1772           create_integer_operand (&ops[2], pos);
1773           if (maybe_expand_insn (icode, 3, ops))
1774             {
1775               if (alt_rtl && ops[0].target)
1776                 *alt_rtl = target;
1777               target = ops[0].value;
1778               if (GET_MODE (target) != mode)
1779                 return gen_lowpart (tmode, target);
1780               return target;
1781             }
1782         }
1783       /* Using subregs is useful if we're extracting one register vector
1784          from a multi-register vector.  extract_bit_field_as_subreg checks
1785          for valid bitsize and bitnum, so we don't need to do that here.  */
1786       if (VECTOR_MODE_P (mode))
1787         {
1788           rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1789                                                  bitsize, bitnum);
1790           if (sub)
1791             return sub;
1792         }
1793     }
1794
1795   /* Make sure we are playing with integral modes.  Pun with subregs
1796      if we aren't.  */
1797   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1798   scalar_int_mode imode;
1799   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1800     {
1801       if (MEM_P (op0))
1802         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1803                                             0, MEM_SIZE (op0));
1804       else if (op0_mode.exists (&imode))
1805         {
1806           op0 = gen_lowpart (imode, op0);
1807
1808           /* If we got a SUBREG, force it into a register since we
1809              aren't going to be able to do another SUBREG on it.  */
1810           if (GET_CODE (op0) == SUBREG)
1811             op0 = force_reg (imode, op0);
1812         }
1813       else
1814         {
1815           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1816           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1817           emit_move_insn (mem, op0);
1818           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1819         }
1820     }
1821
1822   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1823      If that's wrong, the solution is to test for it and set TARGET to 0
1824      if needed.  */
1825
1826   /* Get the mode of the field to use for atomic access or subreg
1827      conversion.  */
1828   if (!SCALAR_INT_MODE_P (tmode)
1829       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1830     mode1 = mode;
1831   gcc_assert (mode1 != BLKmode);
1832
1833   /* Extraction of a full MODE1 value can be done with a subreg as long
1834      as the least significant bit of the value is the least significant
1835      bit of either OP0 or a word of OP0.  */
1836   if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1837     {
1838       rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1839                                              bitsize, bitnum);
1840       if (sub)
1841         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1842     }
1843
1844   /* Extraction of a full MODE1 value can be done with a load as long as
1845      the field is on a byte boundary and is sufficiently aligned.  */
1846   poly_uint64 bytenum;
1847   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1848     {
1849       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1850       if (reverse)
1851         op0 = flip_storage_order (mode1, op0);
1852       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1853     }
1854
1855   /* If we have a memory source and a non-constant bit offset, restrict
1856      the memory to the referenced bytes.  This is a worst-case fallback
1857      but is useful for things like vector booleans.  */
1858   if (MEM_P (op0) && !bitnum.is_constant ())
1859     {
1860       bytenum = bits_to_bytes_round_down (bitnum);
1861       bitnum = num_trailing_bits (bitnum);
1862       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1863       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1864       op0_mode = opt_scalar_int_mode ();
1865     }
1866
1867   /* It's possible we'll need to handle other cases here for
1868      polynomial bitnum and bitsize.  */
1869
1870   /* From here on we need to be looking at a fixed-size insertion.  */
1871   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1872                                      bitnum.to_constant (), unsignedp,
1873                                      target, mode, tmode, reverse, fallback_p);
1874 }
1875
1876 /* Subroutine of extract_bit_field_1, with the same arguments, except
1877    that BITSIZE and BITNUM are constant.  Handle cases specific to
1878    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1879    otherwise OP0 is a BLKmode MEM.  */
1880
1881 static rtx
1882 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1883                             unsigned HOST_WIDE_INT bitsize,
1884                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1885                             rtx target, machine_mode mode, machine_mode tmode,
1886                             bool reverse, bool fallback_p)
1887 {
1888   /* Handle fields bigger than a word.  */
1889
1890   if (bitsize > BITS_PER_WORD)
1891     {
1892       /* Here we transfer the words of the field
1893          in the order least significant first.
1894          This is because the most significant word is the one which may
1895          be less than full.  */
1896
1897       const bool backwards = WORDS_BIG_ENDIAN;
1898       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1899       unsigned int i;
1900       rtx_insn *last;
1901
1902       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1903         target = gen_reg_rtx (mode);
1904
1905       /* In case we're about to clobber a base register or something
1906          (see gcc.c-torture/execute/20040625-1.c).   */
1907       if (reg_mentioned_p (target, op0))
1908         target = gen_reg_rtx (mode);
1909
1910       /* Indicate for flow that the entire target reg is being set.  */
1911       emit_clobber (target);
1912
1913       /* The mode must be fixed-size, since extract_bit_field_1 handles
1914          extractions from variable-sized objects before calling this
1915          function.  */
1916       unsigned int target_size
1917         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1918       last = get_last_insn ();
1919       for (i = 0; i < nwords; i++)
1920         {
1921           /* If I is 0, use the low-order word in both field and target;
1922              if I is 1, use the next to lowest word; and so on.  */
1923           /* Word number in TARGET to use.  */
1924           unsigned int wordnum
1925             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1926           /* Offset from start of field in OP0.  */
1927           unsigned int bit_offset = (backwards ^ reverse
1928                                      ? MAX ((int) bitsize - ((int) i + 1)
1929                                             * BITS_PER_WORD,
1930                                             0)
1931                                      : (int) i * BITS_PER_WORD);
1932           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1933           rtx result_part
1934             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1935                                              bitsize - i * BITS_PER_WORD),
1936                                    bitnum + bit_offset, 1, target_part,
1937                                    mode, word_mode, reverse, fallback_p, NULL);
1938
1939           gcc_assert (target_part);
1940           if (!result_part)
1941             {
1942               delete_insns_since (last);
1943               return NULL;
1944             }
1945
1946           if (result_part != target_part)
1947             emit_move_insn (target_part, result_part);
1948         }
1949
1950       if (unsignedp)
1951         {
1952           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1953              need to be zero'd out.  */
1954           if (target_size > nwords * UNITS_PER_WORD)
1955             {
1956               unsigned int i, total_words;
1957
1958               total_words = target_size / UNITS_PER_WORD;
1959               for (i = nwords; i < total_words; i++)
1960                 emit_move_insn
1961                   (operand_subword (target,
1962                                     backwards ? total_words - i - 1 : i,
1963                                     1, VOIDmode),
1964                    const0_rtx);
1965             }
1966           return target;
1967         }
1968
1969       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1970       target = expand_shift (LSHIFT_EXPR, mode, target,
1971                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1972       return expand_shift (RSHIFT_EXPR, mode, target,
1973                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1974     }
1975
1976   /* If OP0 is a multi-word register, narrow it to the affected word.
1977      If the region spans two words, defer to extract_split_bit_field.  */
1978   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1979     {
1980       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1981         {
1982           if (!fallback_p)
1983             return NULL_RTX;
1984           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1985                                             unsignedp, reverse);
1986           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1987         }
1988       /* If OP0 is a hard register, copy it to a pseudo before calling
1989          simplify_gen_subreg.  */
1990       if (REG_P (op0) && HARD_REGISTER_P (op0))
1991         op0 = copy_to_reg (op0);
1992       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1993                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1994       op0_mode = word_mode;
1995       bitnum %= BITS_PER_WORD;
1996     }
1997
1998   /* From here on we know the desired field is smaller than a word.
1999      If OP0 is a register, it too fits within a word.  */
2000   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
2001   extraction_insn extv;
2002   if (!MEM_P (op0)
2003       && !reverse
2004       /* ??? We could limit the structure size to the part of OP0 that
2005          contains the field, with appropriate checks for endianness
2006          and TARGET_TRULY_NOOP_TRUNCATION.  */
2007       && get_best_reg_extraction_insn (&extv, pattern,
2008                                        GET_MODE_BITSIZE (op0_mode.require ()),
2009                                        tmode))
2010     {
2011       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2012                                                  bitsize, bitnum,
2013                                                  unsignedp, target, mode,
2014                                                  tmode);
2015       if (result)
2016         return result;
2017     }
2018
2019   /* If OP0 is a memory, try copying it to a register and seeing if a
2020      cheap register alternative is available.  */
2021   if (MEM_P (op0) & !reverse)
2022     {
2023       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2024                                         tmode))
2025         {
2026           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2027                                                      bitsize, bitnum,
2028                                                      unsignedp, target, mode,
2029                                                      tmode);
2030           if (result)
2031             return result;
2032         }
2033
2034       rtx_insn *last = get_last_insn ();
2035
2036       /* Try loading part of OP0 into a register and extracting the
2037          bitfield from that.  */
2038       unsigned HOST_WIDE_INT bitpos;
2039       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2040                                                0, 0, tmode, &bitpos);
2041       if (xop0)
2042         {
2043           xop0 = copy_to_reg (xop0);
2044           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2045                                             unsignedp, target,
2046                                             mode, tmode, reverse, false, NULL);
2047           if (result)
2048             return result;
2049           delete_insns_since (last);
2050         }
2051     }
2052
2053   if (!fallback_p)
2054     return NULL;
2055
2056   /* Find a correspondingly-sized integer field, so we can apply
2057      shifts and masks to it.  */
2058   scalar_int_mode int_mode;
2059   if (!int_mode_for_mode (tmode).exists (&int_mode))
2060     /* If this fails, we should probably push op0 out to memory and then
2061        do a load.  */
2062     int_mode = int_mode_for_mode (mode).require ();
2063
2064   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2065                                     bitnum, target, unsignedp, reverse);
2066
2067   /* Complex values must be reversed piecewise, so we need to undo the global
2068      reversal, convert to the complex mode and reverse again.  */
2069   if (reverse && COMPLEX_MODE_P (tmode))
2070     {
2071       target = flip_storage_order (int_mode, target);
2072       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2073       target = flip_storage_order (tmode, target);
2074     }
2075   else
2076     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2077
2078   return target;
2079 }
2080
2081 /* Generate code to extract a byte-field from STR_RTX
2082    containing BITSIZE bits, starting at BITNUM,
2083    and put it in TARGET if possible (if TARGET is nonzero).
2084    Regardless of TARGET, we return the rtx for where the value is placed.
2085
2086    STR_RTX is the structure containing the byte (a REG or MEM).
2087    UNSIGNEDP is nonzero if this is an unsigned bit field.
2088    MODE is the natural mode of the field value once extracted.
2089    TMODE is the mode the caller would like the value to have;
2090    but the value may be returned with type MODE instead.
2091
2092    If REVERSE is true, the extraction is to be done in reverse order.
2093
2094    If a TARGET is specified and we can store in it at no extra cost,
2095    we do so, and return TARGET.
2096    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2097    if they are equally easy.
2098
2099    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2100    then *ALT_RTL is set to TARGET (before legitimziation).  */
2101
2102 rtx
2103 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2104                    int unsignedp, rtx target, machine_mode mode,
2105                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2106 {
2107   machine_mode mode1;
2108
2109   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2110   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2111     mode1 = GET_MODE (str_rtx);
2112   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2113     mode1 = GET_MODE (target);
2114   else
2115     mode1 = tmode;
2116
2117   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2118   scalar_int_mode int_mode;
2119   if (bitsize.is_constant (&ibitsize)
2120       && bitnum.is_constant (&ibitnum)
2121       && is_a <scalar_int_mode> (mode1, &int_mode)
2122       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2123                                      int_mode, 0, 0))
2124     {
2125       /* Extraction of a full INT_MODE value can be done with a simple load.
2126          We know here that the field can be accessed with one single
2127          instruction.  For targets that support unaligned memory,
2128          an unaligned access may be necessary.  */
2129       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2130         {
2131           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2132                                                 ibitnum / BITS_PER_UNIT);
2133           if (reverse)
2134             result = flip_storage_order (int_mode, result);
2135           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2136           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2137         }
2138
2139       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2140                                       &ibitnum);
2141       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2142       str_rtx = copy_to_reg (str_rtx);
2143       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2144                                   target, mode, tmode, reverse, true, alt_rtl);
2145     }
2146
2147   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2148                               target, mode, tmode, reverse, true, alt_rtl);
2149 }
2150 \f
2151 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2152    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2153    otherwise OP0 is a BLKmode MEM.
2154
2155    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2156    If REVERSE is true, the extraction is to be done in reverse order.
2157
2158    If TARGET is nonzero, attempts to store the value there
2159    and return TARGET, but this is not guaranteed.
2160    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2161
2162 static rtx
2163 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2164                          opt_scalar_int_mode op0_mode,
2165                          unsigned HOST_WIDE_INT bitsize,
2166                          unsigned HOST_WIDE_INT bitnum, rtx target,
2167                          int unsignedp, bool reverse)
2168 {
2169   scalar_int_mode mode;
2170   if (MEM_P (op0))
2171     {
2172       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2173                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2174         /* The only way this should occur is if the field spans word
2175            boundaries.  */
2176         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2177                                         unsignedp, reverse);
2178
2179       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2180     }
2181   else
2182     mode = op0_mode.require ();
2183
2184   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2185                                     target, unsignedp, reverse);
2186 }
2187
2188 /* Helper function for extract_fixed_bit_field, extracts
2189    the bit field always using MODE, which is the mode of OP0.
2190    The other arguments are as for extract_fixed_bit_field.  */
2191
2192 static rtx
2193 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2194                            unsigned HOST_WIDE_INT bitsize,
2195                            unsigned HOST_WIDE_INT bitnum, rtx target,
2196                            int unsignedp, bool reverse)
2197 {
2198   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2199      for invalid input, such as extract equivalent of f5 from
2200      gcc.dg/pr48335-2.c.  */
2201
2202   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2203     /* BITNUM is the distance between our msb and that of OP0.
2204        Convert it to the distance from the lsb.  */
2205     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2206
2207   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2208      We have reduced the big-endian case to the little-endian case.  */
2209   if (reverse)
2210     op0 = flip_storage_order (mode, op0);
2211
2212   if (unsignedp)
2213     {
2214       if (bitnum)
2215         {
2216           /* If the field does not already start at the lsb,
2217              shift it so it does.  */
2218           /* Maybe propagate the target for the shift.  */
2219           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2220           if (tmode != mode)
2221             subtarget = 0;
2222           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2223         }
2224       /* Convert the value to the desired mode.  TMODE must also be a
2225          scalar integer for this conversion to make sense, since we
2226          shouldn't reinterpret the bits.  */
2227       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2228       if (mode != new_mode)
2229         op0 = convert_to_mode (new_mode, op0, 1);
2230
2231       /* Unless the msb of the field used to be the msb when we shifted,
2232          mask out the upper bits.  */
2233
2234       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2235         return expand_binop (new_mode, and_optab, op0,
2236                              mask_rtx (new_mode, 0, bitsize, 0),
2237                              target, 1, OPTAB_LIB_WIDEN);
2238       return op0;
2239     }
2240
2241   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2242      then arithmetic-shift its lsb to the lsb of the word.  */
2243   op0 = force_reg (mode, op0);
2244
2245   /* Find the narrowest integer mode that contains the field.  */
2246
2247   opt_scalar_int_mode mode_iter;
2248   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2249     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2250       break;
2251
2252   mode = mode_iter.require ();
2253   op0 = convert_to_mode (mode, op0, 0);
2254
2255   if (mode != tmode)
2256     target = 0;
2257
2258   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2259     {
2260       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2261       /* Maybe propagate the target for the shift.  */
2262       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2263       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2264     }
2265
2266   return expand_shift (RSHIFT_EXPR, mode, op0,
2267                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2268 }
2269
2270 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2271    VALUE << BITPOS.  */
2272
2273 static rtx
2274 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2275               int bitpos)
2276 {
2277   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2278 }
2279 \f
2280 /* Extract a bit field that is split across two words
2281    and return an RTX for the result.
2282
2283    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2284    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2285    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2286    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2287    a BLKmode MEM.
2288
2289    If REVERSE is true, the extraction is to be done in reverse order.  */
2290
2291 static rtx
2292 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2293                          unsigned HOST_WIDE_INT bitsize,
2294                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2295                          bool reverse)
2296 {
2297   unsigned int unit;
2298   unsigned int bitsdone = 0;
2299   rtx result = NULL_RTX;
2300   int first = 1;
2301
2302   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2303      much at a time.  */
2304   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2305     unit = BITS_PER_WORD;
2306   else
2307     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2308
2309   while (bitsdone < bitsize)
2310     {
2311       unsigned HOST_WIDE_INT thissize;
2312       rtx part;
2313       unsigned HOST_WIDE_INT thispos;
2314       unsigned HOST_WIDE_INT offset;
2315
2316       offset = (bitpos + bitsdone) / unit;
2317       thispos = (bitpos + bitsdone) % unit;
2318
2319       /* THISSIZE must not overrun a word boundary.  Otherwise,
2320          extract_fixed_bit_field will call us again, and we will mutually
2321          recurse forever.  */
2322       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2323       thissize = MIN (thissize, unit - thispos);
2324
2325       /* If OP0 is a register, then handle OFFSET here.  */
2326       rtx op0_piece = op0;
2327       opt_scalar_int_mode op0_piece_mode = op0_mode;
2328       if (SUBREG_P (op0) || REG_P (op0))
2329         {
2330           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2331           op0_piece_mode = word_mode;
2332           offset = 0;
2333         }
2334
2335       /* Extract the parts in bit-counting order,
2336          whose meaning is determined by BYTES_PER_UNIT.
2337          OFFSET is in UNITs, and UNIT is in bits.  */
2338       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2339                                       thissize, offset * unit + thispos,
2340                                       0, 1, reverse);
2341       bitsdone += thissize;
2342
2343       /* Shift this part into place for the result.  */
2344       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2345         {
2346           if (bitsize != bitsdone)
2347             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2348                                  bitsize - bitsdone, 0, 1);
2349         }
2350       else
2351         {
2352           if (bitsdone != thissize)
2353             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2354                                  bitsdone - thissize, 0, 1);
2355         }
2356
2357       if (first)
2358         result = part;
2359       else
2360         /* Combine the parts with bitwise or.  This works
2361            because we extracted each part as an unsigned bit field.  */
2362         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2363                                OPTAB_LIB_WIDEN);
2364
2365       first = 0;
2366     }
2367
2368   /* Unsigned bit field: we are done.  */
2369   if (unsignedp)
2370     return result;
2371   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2372   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2373                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2374   return expand_shift (RSHIFT_EXPR, word_mode, result,
2375                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2376 }
2377 \f
2378 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2379    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2380    MODE, fill the upper bits with zeros.  Fail if the layout of either
2381    mode is unknown (as for CC modes) or if the extraction would involve
2382    unprofitable mode punning.  Return the value on success, otherwise
2383    return null.
2384
2385    This is different from gen_lowpart* in these respects:
2386
2387      - the returned value must always be considered an rvalue
2388
2389      - when MODE is wider than SRC_MODE, the extraction involves
2390        a zero extension
2391
2392      - when MODE is smaller than SRC_MODE, the extraction involves
2393        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2394
2395    In other words, this routine performs a computation, whereas the
2396    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2397    operations.  */
2398
2399 rtx
2400 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2401 {
2402   scalar_int_mode int_mode, src_int_mode;
2403
2404   if (mode == src_mode)
2405     return src;
2406
2407   if (CONSTANT_P (src))
2408     {
2409       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2410          fails, it will happily create (subreg (symbol_ref)) or similar
2411          invalid SUBREGs.  */
2412       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2413       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2414       if (ret)
2415         return ret;
2416
2417       if (GET_MODE (src) == VOIDmode
2418           || !validate_subreg (mode, src_mode, src, byte))
2419         return NULL_RTX;
2420
2421       src = force_reg (GET_MODE (src), src);
2422       return gen_rtx_SUBREG (mode, src, byte);
2423     }
2424
2425   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2426     return NULL_RTX;
2427
2428   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2429       && targetm.modes_tieable_p (mode, src_mode))
2430     {
2431       rtx x = gen_lowpart_common (mode, src);
2432       if (x)
2433         return x;
2434     }
2435
2436   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2437       || !int_mode_for_mode (mode).exists (&int_mode))
2438     return NULL_RTX;
2439
2440   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2441     return NULL_RTX;
2442   if (!targetm.modes_tieable_p (int_mode, mode))
2443     return NULL_RTX;
2444
2445   src = gen_lowpart (src_int_mode, src);
2446   if (!validate_subreg (int_mode, src_int_mode, src,
2447                         subreg_lowpart_offset (int_mode, src_int_mode)))
2448     return NULL_RTX;
2449
2450   src = convert_modes (int_mode, src_int_mode, src, true);
2451   src = gen_lowpart (mode, src);
2452   return src;
2453 }
2454 \f
2455 /* Add INC into TARGET.  */
2456
2457 void
2458 expand_inc (rtx target, rtx inc)
2459 {
2460   rtx value = expand_binop (GET_MODE (target), add_optab,
2461                             target, inc,
2462                             target, 0, OPTAB_LIB_WIDEN);
2463   if (value != target)
2464     emit_move_insn (target, value);
2465 }
2466
2467 /* Subtract DEC from TARGET.  */
2468
2469 void
2470 expand_dec (rtx target, rtx dec)
2471 {
2472   rtx value = expand_binop (GET_MODE (target), sub_optab,
2473                             target, dec,
2474                             target, 0, OPTAB_LIB_WIDEN);
2475   if (value != target)
2476     emit_move_insn (target, value);
2477 }
2478 \f
2479 /* Output a shift instruction for expression code CODE,
2480    with SHIFTED being the rtx for the value to shift,
2481    and AMOUNT the rtx for the amount to shift by.
2482    Store the result in the rtx TARGET, if that is convenient.
2483    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2484    Return the rtx for where the value is.
2485    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2486    in which case 0 is returned.  */
2487
2488 static rtx
2489 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2490                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2491 {
2492   rtx op1, temp = 0;
2493   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2494   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2495   optab lshift_optab = ashl_optab;
2496   optab rshift_arith_optab = ashr_optab;
2497   optab rshift_uns_optab = lshr_optab;
2498   optab lrotate_optab = rotl_optab;
2499   optab rrotate_optab = rotr_optab;
2500   machine_mode op1_mode;
2501   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2502   int attempt;
2503   bool speed = optimize_insn_for_speed_p ();
2504
2505   op1 = amount;
2506   op1_mode = GET_MODE (op1);
2507
2508   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2509      shift amount is a vector, use the vector/vector shift patterns.  */
2510   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2511     {
2512       lshift_optab = vashl_optab;
2513       rshift_arith_optab = vashr_optab;
2514       rshift_uns_optab = vlshr_optab;
2515       lrotate_optab = vrotl_optab;
2516       rrotate_optab = vrotr_optab;
2517     }
2518
2519   /* Previously detected shift-counts computed by NEGATE_EXPR
2520      and shifted in the other direction; but that does not work
2521      on all machines.  */
2522
2523   if (SHIFT_COUNT_TRUNCATED)
2524     {
2525       if (CONST_INT_P (op1)
2526           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2527               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2528         op1 = gen_int_shift_amount (mode,
2529                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2530                                     % GET_MODE_BITSIZE (scalar_mode));
2531       else if (GET_CODE (op1) == SUBREG
2532                && subreg_lowpart_p (op1)
2533                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2534                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2535         op1 = SUBREG_REG (op1);
2536     }
2537
2538   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2539      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2540      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2541      amount instead.  */
2542   if (rotate
2543       && CONST_INT_P (op1)
2544       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2545                    GET_MODE_BITSIZE (scalar_mode) - 1))
2546     {
2547       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2548                                          - INTVAL (op1)));
2549       left = !left;
2550       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2551     }
2552
2553   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2554      Note that this is not the case for bigger values.  For instance a rotation
2555      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2556      0x04030201 (bswapsi).  */
2557   if (rotate
2558       && CONST_INT_P (op1)
2559       && INTVAL (op1) == BITS_PER_UNIT
2560       && GET_MODE_SIZE (scalar_mode) == 2
2561       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2562     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2563
2564   if (op1 == const0_rtx)
2565     return shifted;
2566
2567   /* Check whether its cheaper to implement a left shift by a constant
2568      bit count by a sequence of additions.  */
2569   if (code == LSHIFT_EXPR
2570       && CONST_INT_P (op1)
2571       && INTVAL (op1) > 0
2572       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2573       && INTVAL (op1) < MAX_BITS_PER_WORD
2574       && (shift_cost (speed, mode, INTVAL (op1))
2575           > INTVAL (op1) * add_cost (speed, mode))
2576       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2577     {
2578       int i;
2579       for (i = 0; i < INTVAL (op1); i++)
2580         {
2581           temp = force_reg (mode, shifted);
2582           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2583                                   unsignedp, OPTAB_LIB_WIDEN);
2584         }
2585       return shifted;
2586     }
2587
2588   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2589     {
2590       enum optab_methods methods;
2591
2592       if (attempt == 0)
2593         methods = OPTAB_DIRECT;
2594       else if (attempt == 1)
2595         methods = OPTAB_WIDEN;
2596       else
2597         methods = OPTAB_LIB_WIDEN;
2598
2599       if (rotate)
2600         {
2601           /* Widening does not work for rotation.  */
2602           if (methods == OPTAB_WIDEN)
2603             continue;
2604           else if (methods == OPTAB_LIB_WIDEN)
2605             {
2606               /* If we have been unable to open-code this by a rotation,
2607                  do it as the IOR of two shifts.  I.e., to rotate A
2608                  by N bits, compute
2609                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2610                  where C is the bitsize of A.
2611
2612                  It is theoretically possible that the target machine might
2613                  not be able to perform either shift and hence we would
2614                  be making two libcalls rather than just the one for the
2615                  shift (similarly if IOR could not be done).  We will allow
2616                  this extremely unlikely lossage to avoid complicating the
2617                  code below.  */
2618
2619               rtx subtarget = target == shifted ? 0 : target;
2620               rtx new_amount, other_amount;
2621               rtx temp1;
2622
2623               new_amount = op1;
2624               if (op1 == const0_rtx)
2625                 return shifted;
2626               else if (CONST_INT_P (op1))
2627                 other_amount = gen_int_shift_amount
2628                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2629               else
2630                 {
2631                   other_amount
2632                     = simplify_gen_unary (NEG, GET_MODE (op1),
2633                                           op1, GET_MODE (op1));
2634                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2635                   other_amount
2636                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2637                                            gen_int_mode (mask, GET_MODE (op1)));
2638                 }
2639
2640               shifted = force_reg (mode, shifted);
2641
2642               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2643                                      mode, shifted, new_amount, 0, 1);
2644               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2645                                       mode, shifted, other_amount,
2646                                       subtarget, 1);
2647               return expand_binop (mode, ior_optab, temp, temp1, target,
2648                                    unsignedp, methods);
2649             }
2650
2651           temp = expand_binop (mode,
2652                                left ? lrotate_optab : rrotate_optab,
2653                                shifted, op1, target, unsignedp, methods);
2654         }
2655       else if (unsignedp)
2656         temp = expand_binop (mode,
2657                              left ? lshift_optab : rshift_uns_optab,
2658                              shifted, op1, target, unsignedp, methods);
2659
2660       /* Do arithmetic shifts.
2661          Also, if we are going to widen the operand, we can just as well
2662          use an arithmetic right-shift instead of a logical one.  */
2663       if (temp == 0 && ! rotate
2664           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2665         {
2666           enum optab_methods methods1 = methods;
2667
2668           /* If trying to widen a log shift to an arithmetic shift,
2669              don't accept an arithmetic shift of the same size.  */
2670           if (unsignedp)
2671             methods1 = OPTAB_MUST_WIDEN;
2672
2673           /* Arithmetic shift */
2674
2675           temp = expand_binop (mode,
2676                                left ? lshift_optab : rshift_arith_optab,
2677                                shifted, op1, target, unsignedp, methods1);
2678         }
2679
2680       /* We used to try extzv here for logical right shifts, but that was
2681          only useful for one machine, the VAX, and caused poor code
2682          generation there for lshrdi3, so the code was deleted and a
2683          define_expand for lshrsi3 was added to vax.md.  */
2684     }
2685
2686   gcc_assert (temp != NULL_RTX || may_fail);
2687   return temp;
2688 }
2689
2690 /* Output a shift instruction for expression code CODE,
2691    with SHIFTED being the rtx for the value to shift,
2692    and AMOUNT the amount to shift by.
2693    Store the result in the rtx TARGET, if that is convenient.
2694    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2695    Return the rtx for where the value is.  */
2696
2697 rtx
2698 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2699               poly_int64 amount, rtx target, int unsignedp)
2700 {
2701   return expand_shift_1 (code, mode, shifted,
2702                          gen_int_shift_amount (mode, amount),
2703                          target, unsignedp);
2704 }
2705
2706 /* Likewise, but return 0 if that cannot be done.  */
2707
2708 rtx
2709 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2710                     int amount, rtx target, int unsignedp)
2711 {
2712   return expand_shift_1 (code, mode,
2713                          shifted, GEN_INT (amount), target, unsignedp, true);
2714 }
2715
2716 /* Output a shift instruction for expression code CODE,
2717    with SHIFTED being the rtx for the value to shift,
2718    and AMOUNT the tree for the amount to shift by.
2719    Store the result in the rtx TARGET, if that is convenient.
2720    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2721    Return the rtx for where the value is.  */
2722
2723 rtx
2724 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2725                        tree amount, rtx target, int unsignedp)
2726 {
2727   return expand_shift_1 (code, mode,
2728                          shifted, expand_normal (amount), target, unsignedp);
2729 }
2730
2731 \f
2732 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2733                         const struct mult_cost *, machine_mode mode);
2734 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2735                               const struct algorithm *, enum mult_variant);
2736 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2737 static rtx extract_high_half (scalar_int_mode, rtx);
2738 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2739 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2740                                        int, int);
2741 /* Compute and return the best algorithm for multiplying by T.
2742    The algorithm must cost less than cost_limit
2743    If retval.cost >= COST_LIMIT, no algorithm was found and all
2744    other field of the returned struct are undefined.
2745    MODE is the machine mode of the multiplication.  */
2746
2747 static void
2748 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2749             const struct mult_cost *cost_limit, machine_mode mode)
2750 {
2751   int m;
2752   struct algorithm *alg_in, *best_alg;
2753   struct mult_cost best_cost;
2754   struct mult_cost new_limit;
2755   int op_cost, op_latency;
2756   unsigned HOST_WIDE_INT orig_t = t;
2757   unsigned HOST_WIDE_INT q;
2758   int maxm, hash_index;
2759   bool cache_hit = false;
2760   enum alg_code cache_alg = alg_zero;
2761   bool speed = optimize_insn_for_speed_p ();
2762   scalar_int_mode imode;
2763   struct alg_hash_entry *entry_ptr;
2764
2765   /* Indicate that no algorithm is yet found.  If no algorithm
2766      is found, this value will be returned and indicate failure.  */
2767   alg_out->cost.cost = cost_limit->cost + 1;
2768   alg_out->cost.latency = cost_limit->latency + 1;
2769
2770   if (cost_limit->cost < 0
2771       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2772     return;
2773
2774   /* Be prepared for vector modes.  */
2775   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2776
2777   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2778
2779   /* Restrict the bits of "t" to the multiplication's mode.  */
2780   t &= GET_MODE_MASK (imode);
2781
2782   /* t == 1 can be done in zero cost.  */
2783   if (t == 1)
2784     {
2785       alg_out->ops = 1;
2786       alg_out->cost.cost = 0;
2787       alg_out->cost.latency = 0;
2788       alg_out->op[0] = alg_m;
2789       return;
2790     }
2791
2792   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2793      fail now.  */
2794   if (t == 0)
2795     {
2796       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2797         return;
2798       else
2799         {
2800           alg_out->ops = 1;
2801           alg_out->cost.cost = zero_cost (speed);
2802           alg_out->cost.latency = zero_cost (speed);
2803           alg_out->op[0] = alg_zero;
2804           return;
2805         }
2806     }
2807
2808   /* We'll be needing a couple extra algorithm structures now.  */
2809
2810   alg_in = XALLOCA (struct algorithm);
2811   best_alg = XALLOCA (struct algorithm);
2812   best_cost = *cost_limit;
2813
2814   /* Compute the hash index.  */
2815   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2816
2817   /* See if we already know what to do for T.  */
2818   entry_ptr = alg_hash_entry_ptr (hash_index);
2819   if (entry_ptr->t == t
2820       && entry_ptr->mode == mode
2821       && entry_ptr->speed == speed
2822       && entry_ptr->alg != alg_unknown)
2823     {
2824       cache_alg = entry_ptr->alg;
2825
2826       if (cache_alg == alg_impossible)
2827         {
2828           /* The cache tells us that it's impossible to synthesize
2829              multiplication by T within entry_ptr->cost.  */
2830           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2831             /* COST_LIMIT is at least as restrictive as the one
2832                recorded in the hash table, in which case we have no
2833                hope of synthesizing a multiplication.  Just
2834                return.  */
2835             return;
2836
2837           /* If we get here, COST_LIMIT is less restrictive than the
2838              one recorded in the hash table, so we may be able to
2839              synthesize a multiplication.  Proceed as if we didn't
2840              have the cache entry.  */
2841         }
2842       else
2843         {
2844           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2845             /* The cached algorithm shows that this multiplication
2846                requires more cost than COST_LIMIT.  Just return.  This
2847                way, we don't clobber this cache entry with
2848                alg_impossible but retain useful information.  */
2849             return;
2850
2851           cache_hit = true;
2852
2853           switch (cache_alg)
2854             {
2855             case alg_shift:
2856               goto do_alg_shift;
2857
2858             case alg_add_t_m2:
2859             case alg_sub_t_m2:
2860               goto do_alg_addsub_t_m2;
2861
2862             case alg_add_factor:
2863             case alg_sub_factor:
2864               goto do_alg_addsub_factor;
2865
2866             case alg_add_t2_m:
2867               goto do_alg_add_t2_m;
2868
2869             case alg_sub_t2_m:
2870               goto do_alg_sub_t2_m;
2871
2872             default:
2873               gcc_unreachable ();
2874             }
2875         }
2876     }
2877
2878   /* If we have a group of zero bits at the low-order part of T, try
2879      multiplying by the remaining bits and then doing a shift.  */
2880
2881   if ((t & 1) == 0)
2882     {
2883     do_alg_shift:
2884       m = ctz_or_zero (t); /* m = number of low zero bits */
2885       if (m < maxm)
2886         {
2887           q = t >> m;
2888           /* The function expand_shift will choose between a shift and
2889              a sequence of additions, so the observed cost is given as
2890              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2891           op_cost = m * add_cost (speed, mode);
2892           if (shift_cost (speed, mode, m) < op_cost)
2893             op_cost = shift_cost (speed, mode, m);
2894           new_limit.cost = best_cost.cost - op_cost;
2895           new_limit.latency = best_cost.latency - op_cost;
2896           synth_mult (alg_in, q, &new_limit, mode);
2897
2898           alg_in->cost.cost += op_cost;
2899           alg_in->cost.latency += op_cost;
2900           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2901             {
2902               best_cost = alg_in->cost;
2903               std::swap (alg_in, best_alg);
2904               best_alg->log[best_alg->ops] = m;
2905               best_alg->op[best_alg->ops] = alg_shift;
2906             }
2907
2908           /* See if treating ORIG_T as a signed number yields a better
2909              sequence.  Try this sequence only for a negative ORIG_T
2910              as it would be useless for a non-negative ORIG_T.  */
2911           if ((HOST_WIDE_INT) orig_t < 0)
2912             {
2913               /* Shift ORIG_T as follows because a right shift of a
2914                  negative-valued signed type is implementation
2915                  defined.  */
2916               q = ~(~orig_t >> m);
2917               /* The function expand_shift will choose between a shift
2918                  and a sequence of additions, so the observed cost is
2919                  given as MIN (m * add_cost(speed, mode),
2920                  shift_cost(speed, mode, m)).  */
2921               op_cost = m * add_cost (speed, mode);
2922               if (shift_cost (speed, mode, m) < op_cost)
2923                 op_cost = shift_cost (speed, mode, m);
2924               new_limit.cost = best_cost.cost - op_cost;
2925               new_limit.latency = best_cost.latency - op_cost;
2926               synth_mult (alg_in, q, &new_limit, mode);
2927
2928               alg_in->cost.cost += op_cost;
2929               alg_in->cost.latency += op_cost;
2930               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2931                 {
2932                   best_cost = alg_in->cost;
2933                   std::swap (alg_in, best_alg);
2934                   best_alg->log[best_alg->ops] = m;
2935                   best_alg->op[best_alg->ops] = alg_shift;
2936                 }
2937             }
2938         }
2939       if (cache_hit)
2940         goto done;
2941     }
2942
2943   /* If we have an odd number, add or subtract one.  */
2944   if ((t & 1) != 0)
2945     {
2946       unsigned HOST_WIDE_INT w;
2947
2948     do_alg_addsub_t_m2:
2949       for (w = 1; (w & t) != 0; w <<= 1)
2950         ;
2951       /* If T was -1, then W will be zero after the loop.  This is another
2952          case where T ends with ...111.  Handling this with (T + 1) and
2953          subtract 1 produces slightly better code and results in algorithm
2954          selection much faster than treating it like the ...0111 case
2955          below.  */
2956       if (w == 0
2957           || (w > 2
2958               /* Reject the case where t is 3.
2959                  Thus we prefer addition in that case.  */
2960               && t != 3))
2961         {
2962           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2963
2964           op_cost = add_cost (speed, mode);
2965           new_limit.cost = best_cost.cost - op_cost;
2966           new_limit.latency = best_cost.latency - op_cost;
2967           synth_mult (alg_in, t + 1, &new_limit, mode);
2968
2969           alg_in->cost.cost += op_cost;
2970           alg_in->cost.latency += op_cost;
2971           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2972             {
2973               best_cost = alg_in->cost;
2974               std::swap (alg_in, best_alg);
2975               best_alg->log[best_alg->ops] = 0;
2976               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2977             }
2978         }
2979       else
2980         {
2981           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2982
2983           op_cost = add_cost (speed, mode);
2984           new_limit.cost = best_cost.cost - op_cost;
2985           new_limit.latency = best_cost.latency - op_cost;
2986           synth_mult (alg_in, t - 1, &new_limit, mode);
2987
2988           alg_in->cost.cost += op_cost;
2989           alg_in->cost.latency += op_cost;
2990           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2991             {
2992               best_cost = alg_in->cost;
2993               std::swap (alg_in, best_alg);
2994               best_alg->log[best_alg->ops] = 0;
2995               best_alg->op[best_alg->ops] = alg_add_t_m2;
2996             }
2997         }
2998
2999       /* We may be able to calculate a * -7, a * -15, a * -31, etc
3000          quickly with a - a * n for some appropriate constant n.  */
3001       m = exact_log2 (-orig_t + 1);
3002       if (m >= 0 && m < maxm)
3003         {
3004           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3005           /* If the target has a cheap shift-and-subtract insn use
3006              that in preference to a shift insn followed by a sub insn.
3007              Assume that the shift-and-sub is "atomic" with a latency
3008              equal to it's cost, otherwise assume that on superscalar
3009              hardware the shift may be executed concurrently with the
3010              earlier steps in the algorithm.  */
3011           if (shiftsub1_cost (speed, mode, m) <= op_cost)
3012             {
3013               op_cost = shiftsub1_cost (speed, mode, m);
3014               op_latency = op_cost;
3015             }
3016           else
3017             op_latency = add_cost (speed, mode);
3018
3019           new_limit.cost = best_cost.cost - op_cost;
3020           new_limit.latency = best_cost.latency - op_latency;
3021           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3022                       &new_limit, mode);
3023
3024           alg_in->cost.cost += op_cost;
3025           alg_in->cost.latency += op_latency;
3026           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3027             {
3028               best_cost = alg_in->cost;
3029               std::swap (alg_in, best_alg);
3030               best_alg->log[best_alg->ops] = m;
3031               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3032             }
3033         }
3034
3035       if (cache_hit)
3036         goto done;
3037     }
3038
3039   /* Look for factors of t of the form
3040      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3041      If we find such a factor, we can multiply by t using an algorithm that
3042      multiplies by q, shift the result by m and add/subtract it to itself.
3043
3044      We search for large factors first and loop down, even if large factors
3045      are less probable than small; if we find a large factor we will find a
3046      good sequence quickly, and therefore be able to prune (by decreasing
3047      COST_LIMIT) the search.  */
3048
3049  do_alg_addsub_factor:
3050   for (m = floor_log2 (t - 1); m >= 2; m--)
3051     {
3052       unsigned HOST_WIDE_INT d;
3053
3054       d = (HOST_WIDE_INT_1U << m) + 1;
3055       if (t % d == 0 && t > d && m < maxm
3056           && (!cache_hit || cache_alg == alg_add_factor))
3057         {
3058           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3059           if (shiftadd_cost (speed, mode, m) <= op_cost)
3060             op_cost = shiftadd_cost (speed, mode, m);
3061
3062           op_latency = op_cost;
3063
3064
3065           new_limit.cost = best_cost.cost - op_cost;
3066           new_limit.latency = best_cost.latency - op_latency;
3067           synth_mult (alg_in, t / d, &new_limit, mode);
3068
3069           alg_in->cost.cost += op_cost;
3070           alg_in->cost.latency += op_latency;
3071           if (alg_in->cost.latency < op_cost)
3072             alg_in->cost.latency = op_cost;
3073           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3074             {
3075               best_cost = alg_in->cost;
3076               std::swap (alg_in, best_alg);
3077               best_alg->log[best_alg->ops] = m;
3078               best_alg->op[best_alg->ops] = alg_add_factor;
3079             }
3080           /* Other factors will have been taken care of in the recursion.  */
3081           break;
3082         }
3083
3084       d = (HOST_WIDE_INT_1U << m) - 1;
3085       if (t % d == 0 && t > d && m < maxm
3086           && (!cache_hit || cache_alg == alg_sub_factor))
3087         {
3088           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3089           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3090             op_cost = shiftsub0_cost (speed, mode, m);
3091
3092           op_latency = op_cost;
3093
3094           new_limit.cost = best_cost.cost - op_cost;
3095           new_limit.latency = best_cost.latency - op_latency;
3096           synth_mult (alg_in, t / d, &new_limit, mode);
3097
3098           alg_in->cost.cost += op_cost;
3099           alg_in->cost.latency += op_latency;
3100           if (alg_in->cost.latency < op_cost)
3101             alg_in->cost.latency = op_cost;
3102           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3103             {
3104               best_cost = alg_in->cost;
3105               std::swap (alg_in, best_alg);
3106               best_alg->log[best_alg->ops] = m;
3107               best_alg->op[best_alg->ops] = alg_sub_factor;
3108             }
3109           break;
3110         }
3111     }
3112   if (cache_hit)
3113     goto done;
3114
3115   /* Try shift-and-add (load effective address) instructions,
3116      i.e. do a*3, a*5, a*9.  */
3117   if ((t & 1) != 0)
3118     {
3119     do_alg_add_t2_m:
3120       q = t - 1;
3121       m = ctz_hwi (q);
3122       if (q && m < maxm)
3123         {
3124           op_cost = shiftadd_cost (speed, mode, m);
3125           new_limit.cost = best_cost.cost - op_cost;
3126           new_limit.latency = best_cost.latency - op_cost;
3127           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3128
3129           alg_in->cost.cost += op_cost;
3130           alg_in->cost.latency += op_cost;
3131           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3132             {
3133               best_cost = alg_in->cost;
3134               std::swap (alg_in, best_alg);
3135               best_alg->log[best_alg->ops] = m;
3136               best_alg->op[best_alg->ops] = alg_add_t2_m;
3137             }
3138         }
3139       if (cache_hit)
3140         goto done;
3141
3142     do_alg_sub_t2_m:
3143       q = t + 1;
3144       m = ctz_hwi (q);
3145       if (q && m < maxm)
3146         {
3147           op_cost = shiftsub0_cost (speed, mode, m);
3148           new_limit.cost = best_cost.cost - op_cost;
3149           new_limit.latency = best_cost.latency - op_cost;
3150           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3151
3152           alg_in->cost.cost += op_cost;
3153           alg_in->cost.latency += op_cost;
3154           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3155             {
3156               best_cost = alg_in->cost;
3157               std::swap (alg_in, best_alg);
3158               best_alg->log[best_alg->ops] = m;
3159               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3160             }
3161         }
3162       if (cache_hit)
3163         goto done;
3164     }
3165
3166  done:
3167   /* If best_cost has not decreased, we have not found any algorithm.  */
3168   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3169     {
3170       /* We failed to find an algorithm.  Record alg_impossible for
3171          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3172          we are asked to find an algorithm for T within the same or
3173          lower COST_LIMIT, we can immediately return to the
3174          caller.  */
3175       entry_ptr->t = t;
3176       entry_ptr->mode = mode;
3177       entry_ptr->speed = speed;
3178       entry_ptr->alg = alg_impossible;
3179       entry_ptr->cost = *cost_limit;
3180       return;
3181     }
3182
3183   /* Cache the result.  */
3184   if (!cache_hit)
3185     {
3186       entry_ptr->t = t;
3187       entry_ptr->mode = mode;
3188       entry_ptr->speed = speed;
3189       entry_ptr->alg = best_alg->op[best_alg->ops];
3190       entry_ptr->cost.cost = best_cost.cost;
3191       entry_ptr->cost.latency = best_cost.latency;
3192     }
3193
3194   /* If we are getting a too long sequence for `struct algorithm'
3195      to record, make this search fail.  */
3196   if (best_alg->ops == MAX_BITS_PER_WORD)
3197     return;
3198
3199   /* Copy the algorithm from temporary space to the space at alg_out.
3200      We avoid using structure assignment because the majority of
3201      best_alg is normally undefined, and this is a critical function.  */
3202   alg_out->ops = best_alg->ops + 1;
3203   alg_out->cost = best_cost;
3204   memcpy (alg_out->op, best_alg->op,
3205           alg_out->ops * sizeof *alg_out->op);
3206   memcpy (alg_out->log, best_alg->log,
3207           alg_out->ops * sizeof *alg_out->log);
3208 }
3209 \f
3210 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3211    Try three variations:
3212
3213        - a shift/add sequence based on VAL itself
3214        - a shift/add sequence based on -VAL, followed by a negation
3215        - a shift/add sequence based on VAL - 1, followed by an addition.
3216
3217    Return true if the cheapest of these cost less than MULT_COST,
3218    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3219
3220 bool
3221 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3222                      struct algorithm *alg, enum mult_variant *variant,
3223                      int mult_cost)
3224 {
3225   struct algorithm alg2;
3226   struct mult_cost limit;
3227   int op_cost;
3228   bool speed = optimize_insn_for_speed_p ();
3229
3230   /* Fail quickly for impossible bounds.  */
3231   if (mult_cost < 0)
3232     return false;
3233
3234   /* Ensure that mult_cost provides a reasonable upper bound.
3235      Any constant multiplication can be performed with less
3236      than 2 * bits additions.  */
3237   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3238   if (mult_cost > op_cost)
3239     mult_cost = op_cost;
3240
3241   *variant = basic_variant;
3242   limit.cost = mult_cost;
3243   limit.latency = mult_cost;
3244   synth_mult (alg, val, &limit, mode);
3245
3246   /* This works only if the inverted value actually fits in an
3247      `unsigned int' */
3248   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3249     {
3250       op_cost = neg_cost (speed, mode);
3251       if (MULT_COST_LESS (&alg->cost, mult_cost))
3252         {
3253           limit.cost = alg->cost.cost - op_cost;
3254           limit.latency = alg->cost.latency - op_cost;
3255         }
3256       else
3257         {
3258           limit.cost = mult_cost - op_cost;
3259           limit.latency = mult_cost - op_cost;
3260         }
3261
3262       synth_mult (&alg2, -val, &limit, mode);
3263       alg2.cost.cost += op_cost;
3264       alg2.cost.latency += op_cost;
3265       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3266         *alg = alg2, *variant = negate_variant;
3267     }
3268
3269   /* This proves very useful for division-by-constant.  */
3270   op_cost = add_cost (speed, mode);
3271   if (MULT_COST_LESS (&alg->cost, mult_cost))
3272     {
3273       limit.cost = alg->cost.cost - op_cost;
3274       limit.latency = alg->cost.latency - op_cost;
3275     }
3276   else
3277     {
3278       limit.cost = mult_cost - op_cost;
3279       limit.latency = mult_cost - op_cost;
3280     }
3281
3282   synth_mult (&alg2, val - 1, &limit, mode);
3283   alg2.cost.cost += op_cost;
3284   alg2.cost.latency += op_cost;
3285   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3286     *alg = alg2, *variant = add_variant;
3287
3288   return MULT_COST_LESS (&alg->cost, mult_cost);
3289 }
3290
3291 /* A subroutine of expand_mult, used for constant multiplications.
3292    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3293    convenient.  Use the shift/add sequence described by ALG and apply
3294    the final fixup specified by VARIANT.  */
3295
3296 static rtx
3297 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3298                    rtx target, const struct algorithm *alg,
3299                    enum mult_variant variant)
3300 {
3301   unsigned HOST_WIDE_INT val_so_far;
3302   rtx_insn *insn;
3303   rtx accum, tem;
3304   int opno;
3305   machine_mode nmode;
3306
3307   /* Avoid referencing memory over and over and invalid sharing
3308      on SUBREGs.  */
3309   op0 = force_reg (mode, op0);
3310
3311   /* ACCUM starts out either as OP0 or as a zero, depending on
3312      the first operation.  */
3313
3314   if (alg->op[0] == alg_zero)
3315     {
3316       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3317       val_so_far = 0;
3318     }
3319   else if (alg->op[0] == alg_m)
3320     {
3321       accum = copy_to_mode_reg (mode, op0);
3322       val_so_far = 1;
3323     }
3324   else
3325     gcc_unreachable ();
3326
3327   for (opno = 1; opno < alg->ops; opno++)
3328     {
3329       int log = alg->log[opno];
3330       rtx shift_subtarget = optimize ? 0 : accum;
3331       rtx add_target
3332         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3333            && !optimize)
3334           ? target : 0;
3335       rtx accum_target = optimize ? 0 : accum;
3336       rtx accum_inner;
3337
3338       switch (alg->op[opno])
3339         {
3340         case alg_shift:
3341           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3342           /* REG_EQUAL note will be attached to the following insn.  */
3343           emit_move_insn (accum, tem);
3344           val_so_far <<= log;
3345           break;
3346
3347         case alg_add_t_m2:
3348           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3349           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3350                                  add_target ? add_target : accum_target);
3351           val_so_far += HOST_WIDE_INT_1U << log;
3352           break;
3353
3354         case alg_sub_t_m2:
3355           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3356           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3357                                  add_target ? add_target : accum_target);
3358           val_so_far -= HOST_WIDE_INT_1U << log;
3359           break;
3360
3361         case alg_add_t2_m:
3362           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3363                                 log, shift_subtarget, 0);
3364           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3365                                  add_target ? add_target : accum_target);
3366           val_so_far = (val_so_far << log) + 1;
3367           break;
3368
3369         case alg_sub_t2_m:
3370           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3371                                 log, shift_subtarget, 0);
3372           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3373                                  add_target ? add_target : accum_target);
3374           val_so_far = (val_so_far << log) - 1;
3375           break;
3376
3377         case alg_add_factor:
3378           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3379           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3380                                  add_target ? add_target : accum_target);
3381           val_so_far += val_so_far << log;
3382           break;
3383
3384         case alg_sub_factor:
3385           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3386           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3387                                  (add_target
3388                                   ? add_target : (optimize ? 0 : tem)));
3389           val_so_far = (val_so_far << log) - val_so_far;
3390           break;
3391
3392         default:
3393           gcc_unreachable ();
3394         }
3395
3396       if (SCALAR_INT_MODE_P (mode))
3397         {
3398           /* Write a REG_EQUAL note on the last insn so that we can cse
3399              multiplication sequences.  Note that if ACCUM is a SUBREG,
3400              we've set the inner register and must properly indicate that.  */
3401           tem = op0, nmode = mode;
3402           accum_inner = accum;
3403           if (GET_CODE (accum) == SUBREG)
3404             {
3405               accum_inner = SUBREG_REG (accum);
3406               nmode = GET_MODE (accum_inner);
3407               tem = gen_lowpart (nmode, op0);
3408             }
3409
3410           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3411              In that case, only the low bits of accum would be guaranteed to
3412              be equal to the content of the REG_EQUAL note, the upper bits
3413              can be anything.  */
3414           if (!paradoxical_subreg_p (tem))
3415             {
3416               insn = get_last_insn ();
3417               wide_int wval_so_far
3418                 = wi::uhwi (val_so_far,
3419                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3420               rtx c = immed_wide_int_const (wval_so_far, nmode);
3421               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3422                                 accum_inner);
3423             }
3424         }
3425     }
3426
3427   if (variant == negate_variant)
3428     {
3429       val_so_far = -val_so_far;
3430       accum = expand_unop (mode, neg_optab, accum, target, 0);
3431     }
3432   else if (variant == add_variant)
3433     {
3434       val_so_far = val_so_far + 1;
3435       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3436     }
3437
3438   /* Compare only the bits of val and val_so_far that are significant
3439      in the result mode, to avoid sign-/zero-extension confusion.  */
3440   nmode = GET_MODE_INNER (mode);
3441   val &= GET_MODE_MASK (nmode);
3442   val_so_far &= GET_MODE_MASK (nmode);
3443   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3444
3445   return accum;
3446 }
3447
3448 /* Perform a multiplication and return an rtx for the result.
3449    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3450    TARGET is a suggestion for where to store the result (an rtx).
3451
3452    We check specially for a constant integer as OP1.
3453    If you want this check for OP0 as well, then before calling
3454    you should swap the two operands if OP0 would be constant.  */
3455
3456 rtx
3457 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3458              int unsignedp, bool no_libcall)
3459 {
3460   enum mult_variant variant;
3461   struct algorithm algorithm;
3462   rtx scalar_op1;
3463   int max_cost;
3464   bool speed = optimize_insn_for_speed_p ();
3465   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3466
3467   if (CONSTANT_P (op0))
3468     std::swap (op0, op1);
3469
3470   /* For vectors, there are several simplifications that can be made if
3471      all elements of the vector constant are identical.  */
3472   scalar_op1 = unwrap_const_vec_duplicate (op1);
3473
3474   if (INTEGRAL_MODE_P (mode))
3475     {
3476       rtx fake_reg;
3477       HOST_WIDE_INT coeff;
3478       bool is_neg;
3479       int mode_bitsize;
3480
3481       if (op1 == CONST0_RTX (mode))
3482         return op1;
3483       if (op1 == CONST1_RTX (mode))
3484         return op0;
3485       if (op1 == CONSTM1_RTX (mode))
3486         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3487                             op0, target, 0);
3488
3489       if (do_trapv)
3490         goto skip_synth;
3491
3492       /* If mode is integer vector mode, check if the backend supports
3493          vector lshift (by scalar or vector) at all.  If not, we can't use
3494          synthetized multiply.  */
3495       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3496           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3497           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3498         goto skip_synth;
3499
3500       /* These are the operations that are potentially turned into
3501          a sequence of shifts and additions.  */
3502       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3503
3504       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3505          less than or equal in size to `unsigned int' this doesn't matter.
3506          If the mode is larger than `unsigned int', then synth_mult works
3507          only if the constant value exactly fits in an `unsigned int' without
3508          any truncation.  This means that multiplying by negative values does
3509          not work; results are off by 2^32 on a 32 bit machine.  */
3510       if (CONST_INT_P (scalar_op1))
3511         {
3512           coeff = INTVAL (scalar_op1);
3513           is_neg = coeff < 0;
3514         }
3515 #if TARGET_SUPPORTS_WIDE_INT
3516       else if (CONST_WIDE_INT_P (scalar_op1))
3517 #else
3518       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3519 #endif
3520         {
3521           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3522           /* Perfect power of 2 (other than 1, which is handled above).  */
3523           if (shift > 0)
3524             return expand_shift (LSHIFT_EXPR, mode, op0,
3525                                  shift, target, unsignedp);
3526           else
3527             goto skip_synth;
3528         }
3529       else
3530         goto skip_synth;
3531
3532       /* We used to test optimize here, on the grounds that it's better to
3533          produce a smaller program when -O is not used.  But this causes
3534          such a terrible slowdown sometimes that it seems better to always
3535          use synth_mult.  */
3536
3537       /* Special case powers of two.  */
3538       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3539           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3540         return expand_shift (LSHIFT_EXPR, mode, op0,
3541                              floor_log2 (coeff), target, unsignedp);
3542
3543       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3544
3545       /* Attempt to handle multiplication of DImode values by negative
3546          coefficients, by performing the multiplication by a positive
3547          multiplier and then inverting the result.  */
3548       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3549         {
3550           /* Its safe to use -coeff even for INT_MIN, as the
3551              result is interpreted as an unsigned coefficient.
3552              Exclude cost of op0 from max_cost to match the cost
3553              calculation of the synth_mult.  */
3554           coeff = -(unsigned HOST_WIDE_INT) coeff;
3555           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3556                                     mode, speed)
3557                       - neg_cost (speed, mode));
3558           if (max_cost <= 0)
3559             goto skip_synth;
3560
3561           /* Special case powers of two.  */
3562           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3563             {
3564               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3565                                        floor_log2 (coeff), target, unsignedp);
3566               return expand_unop (mode, neg_optab, temp, target, 0);
3567             }
3568
3569           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3570                                    max_cost))
3571             {
3572               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3573                                             &algorithm, variant);
3574               return expand_unop (mode, neg_optab, temp, target, 0);
3575             }
3576           goto skip_synth;
3577         }
3578
3579       /* Exclude cost of op0 from max_cost to match the cost
3580          calculation of the synth_mult.  */
3581       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3582       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3583         return expand_mult_const (mode, op0, coeff, target,
3584                                   &algorithm, variant);
3585     }
3586  skip_synth:
3587
3588   /* Expand x*2.0 as x+x.  */
3589   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3590       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3591     {
3592       op0 = force_reg (GET_MODE (op0), op0);
3593       return expand_binop (mode, add_optab, op0, op0,
3594                            target, unsignedp,
3595                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3596     }
3597
3598   /* This used to use umul_optab if unsigned, but for non-widening multiply
3599      there is no difference between signed and unsigned.  */
3600   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3601                       op0, op1, target, unsignedp,
3602                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3603   gcc_assert (op0 || no_libcall);
3604   return op0;
3605 }
3606
3607 /* Return a cost estimate for multiplying a register by the given
3608    COEFFicient in the given MODE and SPEED.  */
3609
3610 int
3611 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3612 {
3613   int max_cost;
3614   struct algorithm algorithm;
3615   enum mult_variant variant;
3616
3617   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3618   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3619                            mode, speed);
3620   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3621     return algorithm.cost.cost;
3622   else
3623     return max_cost;
3624 }
3625
3626 /* Perform a widening multiplication and return an rtx for the result.
3627    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3628    TARGET is a suggestion for where to store the result (an rtx).
3629    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3630    or smul_widen_optab.
3631
3632    We check specially for a constant integer as OP1, comparing the
3633    cost of a widening multiply against the cost of a sequence of shifts
3634    and adds.  */
3635
3636 rtx
3637 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3638                       int unsignedp, optab this_optab)
3639 {
3640   bool speed = optimize_insn_for_speed_p ();
3641   rtx cop1;
3642
3643   if (CONST_INT_P (op1)
3644       && GET_MODE (op0) != VOIDmode
3645       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3646                                 this_optab == umul_widen_optab))
3647       && CONST_INT_P (cop1)
3648       && (INTVAL (cop1) >= 0
3649           || HWI_COMPUTABLE_MODE_P (mode)))
3650     {
3651       HOST_WIDE_INT coeff = INTVAL (cop1);
3652       int max_cost;
3653       enum mult_variant variant;
3654       struct algorithm algorithm;
3655
3656       if (coeff == 0)
3657         return CONST0_RTX (mode);
3658
3659       /* Special case powers of two.  */
3660       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3661         {
3662           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3663           return expand_shift (LSHIFT_EXPR, mode, op0,
3664                                floor_log2 (coeff), target, unsignedp);
3665         }
3666
3667       /* Exclude cost of op0 from max_cost to match the cost
3668          calculation of the synth_mult.  */
3669       max_cost = mul_widen_cost (speed, mode);
3670       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3671                                max_cost))
3672         {
3673           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3674           return expand_mult_const (mode, op0, coeff, target,
3675                                     &algorithm, variant);
3676         }
3677     }
3678   return expand_binop (mode, this_optab, op0, op1, target,
3679                        unsignedp, OPTAB_LIB_WIDEN);
3680 }
3681 \f
3682 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3683    replace division by D, and put the least significant N bits of the result
3684    in *MULTIPLIER_PTR and return the most significant bit.
3685
3686    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3687    needed precision is in PRECISION (should be <= N).
3688
3689    PRECISION should be as small as possible so this function can choose
3690    multiplier more freely.
3691
3692    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3693    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3694
3695    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3696    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3697
3698 unsigned HOST_WIDE_INT
3699 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3700                    unsigned HOST_WIDE_INT *multiplier_ptr,
3701                    int *post_shift_ptr, int *lgup_ptr)
3702 {
3703   int lgup, post_shift;
3704   int pow, pow2;
3705
3706   /* lgup = ceil(log2(divisor)); */
3707   lgup = ceil_log2 (d);
3708
3709   gcc_assert (lgup <= n);
3710
3711   pow = n + lgup;
3712   pow2 = n + lgup - precision;
3713
3714   /* mlow = 2^(N + lgup)/d */
3715   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3716   wide_int mlow = wi::udiv_trunc (val, d);
3717
3718   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3719   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3720   wide_int mhigh = wi::udiv_trunc (val, d);
3721
3722   /* If precision == N, then mlow, mhigh exceed 2^N
3723      (but they do not exceed 2^(N+1)).  */
3724
3725   /* Reduce to lowest terms.  */
3726   for (post_shift = lgup; post_shift > 0; post_shift--)
3727     {
3728       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3729                                                        HOST_BITS_PER_WIDE_INT);
3730       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3731                                                        HOST_BITS_PER_WIDE_INT);
3732       if (ml_lo >= mh_lo)
3733         break;
3734
3735       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3736       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3737     }
3738
3739   *post_shift_ptr = post_shift;
3740   *lgup_ptr = lgup;
3741   if (n < HOST_BITS_PER_WIDE_INT)
3742     {
3743       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3744       *multiplier_ptr = mhigh.to_uhwi () & mask;
3745       return mhigh.to_uhwi () > mask;
3746     }
3747   else
3748     {
3749       *multiplier_ptr = mhigh.to_uhwi ();
3750       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3751     }
3752 }
3753
3754 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3755    congruent to 1 (mod 2**N).  */
3756
3757 static unsigned HOST_WIDE_INT
3758 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3759 {
3760   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3761
3762   /* The algorithm notes that the choice y = x satisfies
3763      x*y == 1 mod 2^3, since x is assumed odd.
3764      Each iteration doubles the number of bits of significance in y.  */
3765
3766   unsigned HOST_WIDE_INT mask;
3767   unsigned HOST_WIDE_INT y = x;
3768   int nbit = 3;
3769
3770   mask = (n == HOST_BITS_PER_WIDE_INT
3771           ? HOST_WIDE_INT_M1U
3772           : (HOST_WIDE_INT_1U << n) - 1);
3773
3774   while (nbit < n)
3775     {
3776       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3777       nbit *= 2;
3778     }
3779   return y;
3780 }
3781
3782 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3783    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3784    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3785    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3786    become signed.
3787
3788    The result is put in TARGET if that is convenient.
3789
3790    MODE is the mode of operation.  */
3791
3792 rtx
3793 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3794                              rtx op1, rtx target, int unsignedp)
3795 {
3796   rtx tem;
3797   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3798
3799   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3800                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3801   tem = expand_and (mode, tem, op1, NULL_RTX);
3802   adj_operand
3803     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3804                      adj_operand);
3805
3806   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3807                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3808   tem = expand_and (mode, tem, op0, NULL_RTX);
3809   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3810                           target);
3811
3812   return target;
3813 }
3814
3815 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3816
3817 static rtx
3818 extract_high_half (scalar_int_mode mode, rtx op)
3819 {
3820   if (mode == word_mode)
3821     return gen_highpart (mode, op);
3822
3823   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3824
3825   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3826                      GET_MODE_BITSIZE (mode), 0, 1);
3827   return convert_modes (mode, wider_mode, op, 0);
3828 }
3829
3830 /* Like expmed_mult_highpart, but only consider using a multiplication
3831    optab.  OP1 is an rtx for the constant operand.  */
3832
3833 static rtx
3834 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3835                             rtx target, int unsignedp, int max_cost)
3836 {
3837   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3838   optab moptab;
3839   rtx tem;
3840   int size;
3841   bool speed = optimize_insn_for_speed_p ();
3842
3843   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3844
3845   size = GET_MODE_BITSIZE (mode);
3846
3847   /* Firstly, try using a multiplication insn that only generates the needed
3848      high part of the product, and in the sign flavor of unsignedp.  */
3849   if (mul_highpart_cost (speed, mode) < max_cost)
3850     {
3851       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3852       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3853                           unsignedp, OPTAB_DIRECT);
3854       if (tem)
3855         return tem;
3856     }
3857
3858   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3859      Need to adjust the result after the multiplication.  */
3860   if (size - 1 < BITS_PER_WORD
3861       && (mul_highpart_cost (speed, mode)
3862           + 2 * shift_cost (speed, mode, size-1)
3863           + 4 * add_cost (speed, mode) < max_cost))
3864     {
3865       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3866       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3867                           unsignedp, OPTAB_DIRECT);
3868       if (tem)
3869         /* We used the wrong signedness.  Adjust the result.  */
3870         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3871                                             tem, unsignedp);
3872     }
3873
3874   /* Try widening multiplication.  */
3875   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3876   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3877       && mul_widen_cost (speed, wider_mode) < max_cost)
3878     {
3879       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3880                           unsignedp, OPTAB_WIDEN);
3881       if (tem)
3882         return extract_high_half (mode, tem);
3883     }
3884
3885   /* Try widening the mode and perform a non-widening multiplication.  */
3886   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3887       && size - 1 < BITS_PER_WORD
3888       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3889           < max_cost))
3890     {
3891       rtx_insn *insns;
3892       rtx wop0, wop1;
3893
3894       /* We need to widen the operands, for example to ensure the
3895          constant multiplier is correctly sign or zero extended.
3896          Use a sequence to clean-up any instructions emitted by
3897          the conversions if things don't work out.  */
3898       start_sequence ();
3899       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3900       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3901       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3902                           unsignedp, OPTAB_WIDEN);
3903       insns = get_insns ();
3904       end_sequence ();
3905
3906       if (tem)
3907         {
3908           emit_insn (insns);
3909           return extract_high_half (mode, tem);
3910         }
3911     }
3912
3913   /* Try widening multiplication of opposite signedness, and adjust.  */
3914   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3915   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3916       && size - 1 < BITS_PER_WORD
3917       && (mul_widen_cost (speed, wider_mode)
3918           + 2 * shift_cost (speed, mode, size-1)
3919           + 4 * add_cost (speed, mode) < max_cost))
3920     {
3921       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3922                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3923       if (tem != 0)
3924         {
3925           tem = extract_high_half (mode, tem);
3926           /* We used the wrong signedness.  Adjust the result.  */
3927           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3928                                               target, unsignedp);
3929         }
3930     }
3931
3932   return 0;
3933 }
3934
3935 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3936    putting the high half of the result in TARGET if that is convenient,
3937    and return where the result is.  If the operation cannot be performed,
3938    0 is returned.
3939
3940    MODE is the mode of operation and result.
3941
3942    UNSIGNEDP nonzero means unsigned multiply.
3943
3944    MAX_COST is the total allowed cost for the expanded RTL.  */
3945
3946 static rtx
3947 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3948                       rtx target, int unsignedp, int max_cost)
3949 {
3950   unsigned HOST_WIDE_INT cnst1;
3951   int extra_cost;
3952   bool sign_adjust = false;
3953   enum mult_variant variant;
3954   struct algorithm alg;
3955   rtx tem;
3956   bool speed = optimize_insn_for_speed_p ();
3957
3958   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3959   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3960
3961   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3962
3963   /* We can't optimize modes wider than BITS_PER_WORD.
3964      ??? We might be able to perform double-word arithmetic if
3965      mode == word_mode, however all the cost calculations in
3966      synth_mult etc. assume single-word operations.  */
3967   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3968   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3969     return expmed_mult_highpart_optab (mode, op0, op1, target,
3970                                        unsignedp, max_cost);
3971
3972   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3973
3974   /* Check whether we try to multiply by a negative constant.  */
3975   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3976     {
3977       sign_adjust = true;
3978       extra_cost += add_cost (speed, mode);
3979     }
3980
3981   /* See whether shift/add multiplication is cheap enough.  */
3982   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3983                            max_cost - extra_cost))
3984     {
3985       /* See whether the specialized multiplication optabs are
3986          cheaper than the shift/add version.  */
3987       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3988                                         alg.cost.cost + extra_cost);
3989       if (tem)
3990         return tem;
3991
3992       tem = convert_to_mode (wider_mode, op0, unsignedp);
3993       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3994       tem = extract_high_half (mode, tem);
3995
3996       /* Adjust result for signedness.  */
3997       if (sign_adjust)
3998         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3999
4000       return tem;
4001     }
4002   return expmed_mult_highpart_optab (mode, op0, op1, target,
4003                                      unsignedp, max_cost);
4004 }
4005
4006
4007 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
4008
4009 static rtx
4010 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4011 {
4012   rtx result, temp, shift;
4013   rtx_code_label *label;
4014   int logd;
4015   int prec = GET_MODE_PRECISION (mode);
4016
4017   logd = floor_log2 (d);
4018   result = gen_reg_rtx (mode);
4019
4020   /* Avoid conditional branches when they're expensive.  */
4021   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4022       && optimize_insn_for_speed_p ())
4023     {
4024       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4025                                       mode, 0, -1);
4026       if (signmask)
4027         {
4028           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4029           signmask = force_reg (mode, signmask);
4030           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4031
4032           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4033              which instruction sequence to use.  If logical right shifts
4034              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4035              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4036
4037           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4038           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4039               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4040                   > COSTS_N_INSNS (2)))
4041             {
4042               temp = expand_binop (mode, xor_optab, op0, signmask,
4043                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4044               temp = expand_binop (mode, sub_optab, temp, signmask,
4045                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4046               temp = expand_binop (mode, and_optab, temp,
4047                                    gen_int_mode (masklow, mode),
4048                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4049               temp = expand_binop (mode, xor_optab, temp, signmask,
4050                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4051               temp = expand_binop (mode, sub_optab, temp, signmask,
4052                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4053             }
4054           else
4055             {
4056               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4057                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4058               signmask = force_reg (mode, signmask);
4059
4060               temp = expand_binop (mode, add_optab, op0, signmask,
4061                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4062               temp = expand_binop (mode, and_optab, temp,
4063                                    gen_int_mode (masklow, mode),
4064                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4065               temp = expand_binop (mode, sub_optab, temp, signmask,
4066                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4067             }
4068           return temp;
4069         }
4070     }
4071
4072   /* Mask contains the mode's signbit and the significant bits of the
4073      modulus.  By including the signbit in the operation, many targets
4074      can avoid an explicit compare operation in the following comparison
4075      against zero.  */
4076   wide_int mask = wi::mask (logd, false, prec);
4077   mask = wi::set_bit (mask, prec - 1);
4078
4079   temp = expand_binop (mode, and_optab, op0,
4080                        immed_wide_int_const (mask, mode),
4081                        result, 1, OPTAB_LIB_WIDEN);
4082   if (temp != result)
4083     emit_move_insn (result, temp);
4084
4085   label = gen_label_rtx ();
4086   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4087
4088   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4089                        0, OPTAB_LIB_WIDEN);
4090
4091   mask = wi::mask (logd, true, prec);
4092   temp = expand_binop (mode, ior_optab, temp,
4093                        immed_wide_int_const (mask, mode),
4094                        result, 1, OPTAB_LIB_WIDEN);
4095   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4096                        0, OPTAB_LIB_WIDEN);
4097   if (temp != result)
4098     emit_move_insn (result, temp);
4099   emit_label (label);
4100   return result;
4101 }
4102
4103 /* Expand signed division of OP0 by a power of two D in mode MODE.
4104    This routine is only called for positive values of D.  */
4105
4106 static rtx
4107 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4108 {
4109   rtx temp;
4110   rtx_code_label *label;
4111   int logd;
4112
4113   logd = floor_log2 (d);
4114
4115   if (d == 2
4116       && BRANCH_COST (optimize_insn_for_speed_p (),
4117                       false) >= 1)
4118     {
4119       temp = gen_reg_rtx (mode);
4120       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4121       if (temp != NULL_RTX)
4122         {
4123           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4124                                0, OPTAB_LIB_WIDEN);
4125           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4126         }
4127     }
4128
4129   if (HAVE_conditional_move
4130       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4131     {
4132       rtx temp2;
4133
4134       start_sequence ();
4135       temp2 = copy_to_mode_reg (mode, op0);
4136       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4137                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4138       temp = force_reg (mode, temp);
4139
4140       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4141       temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4142                                      temp, temp2, mode, 0);
4143       if (temp2)
4144         {
4145           rtx_insn *seq = get_insns ();
4146           end_sequence ();
4147           emit_insn (seq);
4148           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4149         }
4150       end_sequence ();
4151     }
4152
4153   if (BRANCH_COST (optimize_insn_for_speed_p (),
4154                    false) >= 2)
4155     {
4156       int ushift = GET_MODE_BITSIZE (mode) - logd;
4157
4158       temp = gen_reg_rtx (mode);
4159       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4160       if (temp != NULL_RTX)
4161         {
4162           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4163               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4164               > COSTS_N_INSNS (1))
4165             temp = expand_binop (mode, and_optab, temp,
4166                                  gen_int_mode (d - 1, mode),
4167                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4168           else
4169             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4170                                  ushift, NULL_RTX, 1);
4171           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4172                                0, OPTAB_LIB_WIDEN);
4173           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4174         }
4175     }
4176
4177   label = gen_label_rtx ();
4178   temp = copy_to_mode_reg (mode, op0);
4179   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4180   expand_inc (temp, gen_int_mode (d - 1, mode));
4181   emit_label (label);
4182   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4183 }
4184 \f
4185 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4186    if that is convenient, and returning where the result is.
4187    You may request either the quotient or the remainder as the result;
4188    specify REM_FLAG nonzero to get the remainder.
4189
4190    CODE is the expression code for which kind of division this is;
4191    it controls how rounding is done.  MODE is the machine mode to use.
4192    UNSIGNEDP nonzero means do unsigned division.  */
4193
4194 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4195    and then correct it by or'ing in missing high bits
4196    if result of ANDI is nonzero.
4197    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4198    This could optimize to a bfexts instruction.
4199    But C doesn't use these operations, so their optimizations are
4200    left for later.  */
4201 /* ??? For modulo, we don't actually need the highpart of the first product,
4202    the low part will do nicely.  And for small divisors, the second multiply
4203    can also be a low-part only multiply or even be completely left out.
4204    E.g. to calculate the remainder of a division by 3 with a 32 bit
4205    multiply, multiply with 0x55555556 and extract the upper two bits;
4206    the result is exact for inputs up to 0x1fffffff.
4207    The input range can be reduced by using cross-sum rules.
4208    For odd divisors >= 3, the following table gives right shift counts
4209    so that if a number is shifted by an integer multiple of the given
4210    amount, the remainder stays the same:
4211    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4212    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4213    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4214    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4215    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4216
4217    Cross-sum rules for even numbers can be derived by leaving as many bits
4218    to the right alone as the divisor has zeros to the right.
4219    E.g. if x is an unsigned 32 bit number:
4220    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4221    */
4222
4223 rtx
4224 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4225                tree treeop0, tree treeop1, rtx op0, rtx op1, rtx target,
4226                int unsignedp, enum optab_methods methods)
4227 {
4228   machine_mode compute_mode;
4229   rtx tquotient;
4230   rtx quotient = 0, remainder = 0;
4231   rtx_insn *last;
4232   rtx_insn *insn;
4233   optab optab1, optab2;
4234   int op1_is_constant, op1_is_pow2 = 0;
4235   int max_cost, extra_cost;
4236   static HOST_WIDE_INT last_div_const = 0;
4237   bool speed = optimize_insn_for_speed_p ();
4238
4239   op1_is_constant = CONST_INT_P (op1);
4240   if (op1_is_constant)
4241     {
4242       wide_int ext_op1 = rtx_mode_t (op1, mode);
4243       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4244                      || (! unsignedp
4245                          && wi::popcount (wi::neg (ext_op1)) == 1));
4246     }
4247
4248   /*
4249      This is the structure of expand_divmod:
4250
4251      First comes code to fix up the operands so we can perform the operations
4252      correctly and efficiently.
4253
4254      Second comes a switch statement with code specific for each rounding mode.
4255      For some special operands this code emits all RTL for the desired
4256      operation, for other cases, it generates only a quotient and stores it in
4257      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4258      to indicate that it has not done anything.
4259
4260      Last comes code that finishes the operation.  If QUOTIENT is set and
4261      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4262      QUOTIENT is not set, it is computed using trunc rounding.
4263
4264      We try to generate special code for division and remainder when OP1 is a
4265      constant.  If |OP1| = 2**n we can use shifts and some other fast
4266      operations.  For other values of OP1, we compute a carefully selected
4267      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4268      by m.
4269
4270      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4271      half of the product.  Different strategies for generating the product are
4272      implemented in expmed_mult_highpart.
4273
4274      If what we actually want is the remainder, we generate that by another
4275      by-constant multiplication and a subtraction.  */
4276
4277   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4278      code below will malfunction if we are, so check here and handle
4279      the special case if so.  */
4280   if (op1 == const1_rtx)
4281     return rem_flag ? const0_rtx : op0;
4282
4283     /* When dividing by -1, we could get an overflow.
4284      negv_optab can handle overflows.  */
4285   if (! unsignedp && op1 == constm1_rtx)
4286     {
4287       if (rem_flag)
4288         return const0_rtx;
4289       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4290                           ? negv_optab : neg_optab, op0, target, 0);
4291     }
4292
4293   if (target
4294       /* Don't use the function value register as a target
4295          since we have to read it as well as write it,
4296          and function-inlining gets confused by this.  */
4297       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4298           /* Don't clobber an operand while doing a multi-step calculation.  */
4299           || ((rem_flag || op1_is_constant)
4300               && (reg_mentioned_p (target, op0)
4301                   || (MEM_P (op0) && MEM_P (target))))
4302           || reg_mentioned_p (target, op1)
4303           || (MEM_P (op1) && MEM_P (target))))
4304     target = 0;
4305
4306   /* Get the mode in which to perform this computation.  Normally it will
4307      be MODE, but sometimes we can't do the desired operation in MODE.
4308      If so, pick a wider mode in which we can do the operation.  Convert
4309      to that mode at the start to avoid repeated conversions.
4310
4311      First see what operations we need.  These depend on the expression
4312      we are evaluating.  (We assume that divxx3 insns exist under the
4313      same conditions that modxx3 insns and that these insns don't normally
4314      fail.  If these assumptions are not correct, we may generate less
4315      efficient code in some cases.)
4316
4317      Then see if we find a mode in which we can open-code that operation
4318      (either a division, modulus, or shift).  Finally, check for the smallest
4319      mode for which we can do the operation with a library call.  */
4320
4321   /* We might want to refine this now that we have division-by-constant
4322      optimization.  Since expmed_mult_highpart tries so many variants, it is
4323      not straightforward to generalize this.  Maybe we should make an array
4324      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4325
4326   optab1 = (op1_is_pow2
4327             ? (unsignedp ? lshr_optab : ashr_optab)
4328             : (unsignedp ? udiv_optab : sdiv_optab));
4329   optab2 = (op1_is_pow2 ? optab1
4330             : (unsignedp ? udivmod_optab : sdivmod_optab));
4331
4332   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4333     {
4334       FOR_EACH_MODE_FROM (compute_mode, mode)
4335       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4336           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4337         break;
4338
4339       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4340         FOR_EACH_MODE_FROM (compute_mode, mode)
4341           if (optab_libfunc (optab1, compute_mode)
4342               || optab_libfunc (optab2, compute_mode))
4343             break;
4344     }
4345   else
4346     compute_mode = mode;
4347
4348   /* If we still couldn't find a mode, use MODE, but expand_binop will
4349      probably die.  */
4350   if (compute_mode == VOIDmode)
4351     compute_mode = mode;
4352
4353   if (target && GET_MODE (target) == compute_mode)
4354     tquotient = target;
4355   else
4356     tquotient = gen_reg_rtx (compute_mode);
4357
4358 #if 0
4359   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4360      (mode), and thereby get better code when OP1 is a constant.  Do that
4361      later.  It will require going over all usages of SIZE below.  */
4362   size = GET_MODE_BITSIZE (mode);
4363 #endif
4364
4365   /* Only deduct something for a REM if the last divide done was
4366      for a different constant.   Then set the constant of the last
4367      divide.  */
4368   max_cost = (unsignedp
4369               ? udiv_cost (speed, compute_mode)
4370               : sdiv_cost (speed, compute_mode));
4371   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4372                      && INTVAL (op1) == last_div_const))
4373     max_cost -= (mul_cost (speed, compute_mode)
4374                  + add_cost (speed, compute_mode));
4375
4376   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4377
4378   /* Check if the target has specific expansions for the division.  */
4379   tree cst;
4380   if (treeop0
4381       && treeop1
4382       && (cst = uniform_integer_cst_p (treeop1))
4383       && targetm.vectorize.can_special_div_by_const (code, TREE_TYPE (treeop0),
4384                                                      wi::to_wide (cst),
4385                                                      &target, op0, op1))
4386     return target;
4387
4388
4389   /* Now convert to the best mode to use.  */
4390   if (compute_mode != mode)
4391     {
4392       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4393       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4394
4395       /* convert_modes may have placed op1 into a register, so we
4396          must recompute the following.  */
4397       op1_is_constant = CONST_INT_P (op1);
4398       if (op1_is_constant)
4399         {
4400           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4401           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4402                          || (! unsignedp
4403                              && wi::popcount (wi::neg (ext_op1)) == 1));
4404         }
4405       else
4406         op1_is_pow2 = 0;
4407     }
4408
4409   /* If one of the operands is a volatile MEM, copy it into a register.  */
4410
4411   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4412     op0 = force_reg (compute_mode, op0);
4413   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4414     op1 = force_reg (compute_mode, op1);
4415
4416   /* If we need the remainder or if OP1 is constant, we need to
4417      put OP0 in a register in case it has any queued subexpressions.  */
4418   if (rem_flag || op1_is_constant)
4419     op0 = force_reg (compute_mode, op0);
4420
4421   last = get_last_insn ();
4422
4423   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4424   if (unsignedp)
4425     {
4426       if (code == FLOOR_DIV_EXPR)
4427         code = TRUNC_DIV_EXPR;
4428       if (code == FLOOR_MOD_EXPR)
4429         code = TRUNC_MOD_EXPR;
4430       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4431         code = TRUNC_DIV_EXPR;
4432     }
4433
4434   if (op1 != const0_rtx)
4435     switch (code)
4436       {
4437       case TRUNC_MOD_EXPR:
4438       case TRUNC_DIV_EXPR:
4439         if (op1_is_constant)
4440           {
4441             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4442             int size = GET_MODE_BITSIZE (int_mode);
4443             if (unsignedp)
4444               {
4445                 unsigned HOST_WIDE_INT mh, ml;
4446                 int pre_shift, post_shift;
4447                 int dummy;
4448                 wide_int wd = rtx_mode_t (op1, int_mode);
4449                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4450
4451                 if (wi::popcount (wd) == 1)
4452                   {
4453                     pre_shift = floor_log2 (d);
4454                     if (rem_flag)
4455                       {
4456                         unsigned HOST_WIDE_INT mask
4457                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4458                         remainder
4459                           = expand_binop (int_mode, and_optab, op0,
4460                                           gen_int_mode (mask, int_mode),
4461                                           remainder, 1, methods);
4462                         if (remainder)
4463                           return gen_lowpart (mode, remainder);
4464                       }
4465                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4466                                              pre_shift, tquotient, 1);
4467                   }
4468                 else if (size <= HOST_BITS_PER_WIDE_INT)
4469                   {
4470                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4471                       {
4472                         /* Most significant bit of divisor is set; emit an scc
4473                            insn.  */
4474                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4475                                                           int_mode, 1, 1);
4476                       }
4477                     else
4478                       {
4479                         /* Find a suitable multiplier and right shift count
4480                            instead of multiplying with D.  */
4481
4482                         mh = choose_multiplier (d, size, size,
4483                                                 &ml, &post_shift, &dummy);
4484
4485                         /* If the suggested multiplier is more than SIZE bits,
4486                            we can do better for even divisors, using an
4487                            initial right shift.  */
4488                         if (mh != 0 && (d & 1) == 0)
4489                           {
4490                             pre_shift = ctz_or_zero (d);
4491                             mh = choose_multiplier (d >> pre_shift, size,
4492                                                     size - pre_shift,
4493                                                     &ml, &post_shift, &dummy);
4494                             gcc_assert (!mh);
4495                           }
4496                         else
4497                           pre_shift = 0;
4498
4499                         if (mh != 0)
4500                           {
4501                             rtx t1, t2, t3, t4;
4502
4503                             if (post_shift - 1 >= BITS_PER_WORD)
4504                               goto fail1;
4505
4506                             extra_cost
4507                               = (shift_cost (speed, int_mode, post_shift - 1)
4508                                  + shift_cost (speed, int_mode, 1)
4509                                  + 2 * add_cost (speed, int_mode));
4510                             t1 = expmed_mult_highpart
4511                               (int_mode, op0, gen_int_mode (ml, int_mode),
4512                                NULL_RTX, 1, max_cost - extra_cost);
4513                             if (t1 == 0)
4514                               goto fail1;
4515                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4516                                                                op0, t1),
4517                                                 NULL_RTX);
4518                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4519                                                t2, 1, NULL_RTX, 1);
4520                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4521                                                               t1, t3),
4522                                                 NULL_RTX);
4523                             quotient = expand_shift
4524                               (RSHIFT_EXPR, int_mode, t4,
4525                                post_shift - 1, tquotient, 1);
4526                           }
4527                         else
4528                           {
4529                             rtx t1, t2;
4530
4531                             if (pre_shift >= BITS_PER_WORD
4532                                 || post_shift >= BITS_PER_WORD)
4533                               goto fail1;
4534
4535                             t1 = expand_shift
4536                               (RSHIFT_EXPR, int_mode, op0,
4537                                pre_shift, NULL_RTX, 1);
4538                             extra_cost
4539                               = (shift_cost (speed, int_mode, pre_shift)
4540                                  + shift_cost (speed, int_mode, post_shift));
4541                             t2 = expmed_mult_highpart
4542                               (int_mode, t1,
4543                                gen_int_mode (ml, int_mode),
4544                                NULL_RTX, 1, max_cost - extra_cost);
4545                             if (t2 == 0)
4546                               goto fail1;
4547                             quotient = expand_shift
4548                               (RSHIFT_EXPR, int_mode, t2,
4549                                post_shift, tquotient, 1);
4550                           }
4551                       }
4552                   }
4553                 else            /* Too wide mode to use tricky code */
4554                   break;
4555
4556                 insn = get_last_insn ();
4557                 if (insn != last)
4558                   set_dst_reg_note (insn, REG_EQUAL,
4559                                     gen_rtx_UDIV (int_mode, op0, op1),
4560                                     quotient);
4561               }
4562             else                /* TRUNC_DIV, signed */
4563               {
4564                 unsigned HOST_WIDE_INT ml;
4565                 int lgup, post_shift;
4566                 rtx mlr;
4567                 HOST_WIDE_INT d = INTVAL (op1);
4568                 unsigned HOST_WIDE_INT abs_d;
4569
4570                 /* Not prepared to handle division/remainder by
4571                    0xffffffffffffffff8000000000000000 etc.  */
4572                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4573                   break;
4574
4575                 /* Since d might be INT_MIN, we have to cast to
4576                    unsigned HOST_WIDE_INT before negating to avoid
4577                    undefined signed overflow.  */
4578                 abs_d = (d >= 0
4579                          ? (unsigned HOST_WIDE_INT) d
4580                          : - (unsigned HOST_WIDE_INT) d);
4581
4582                 /* n rem d = n rem -d */
4583                 if (rem_flag && d < 0)
4584                   {
4585                     d = abs_d;
4586                     op1 = gen_int_mode (abs_d, int_mode);
4587                   }
4588
4589                 if (d == 1)
4590                   quotient = op0;
4591                 else if (d == -1)
4592                   quotient = expand_unop (int_mode, neg_optab, op0,
4593                                           tquotient, 0);
4594                 else if (size <= HOST_BITS_PER_WIDE_INT
4595                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4596                   {
4597                     /* This case is not handled correctly below.  */
4598                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4599                                                 int_mode, 1, 1);
4600                     if (quotient == 0)
4601                       goto fail1;
4602                   }
4603                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4604                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4605                          && (rem_flag
4606                              ? smod_pow2_cheap (speed, int_mode)
4607                              : sdiv_pow2_cheap (speed, int_mode))
4608                          /* We assume that cheap metric is true if the
4609                             optab has an expander for this mode.  */
4610                          && ((optab_handler ((rem_flag ? smod_optab
4611                                               : sdiv_optab),
4612                                              int_mode)
4613                               != CODE_FOR_nothing)
4614                              || (optab_handler (sdivmod_optab, int_mode)
4615                                  != CODE_FOR_nothing)))
4616                   ;
4617                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4618                   {
4619                     if (rem_flag)
4620                       {
4621                         remainder = expand_smod_pow2 (int_mode, op0, d);
4622                         if (remainder)
4623                           return gen_lowpart (mode, remainder);
4624                       }
4625
4626                     if (sdiv_pow2_cheap (speed, int_mode)
4627                         && ((optab_handler (sdiv_optab, int_mode)
4628                              != CODE_FOR_nothing)
4629                             || (optab_handler (sdivmod_optab, int_mode)
4630                                 != CODE_FOR_nothing)))
4631                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4632                                                 int_mode, treeop0, treeop1,
4633                                                 op0, gen_int_mode (abs_d,
4634                                                               int_mode),
4635                                                 NULL_RTX, 0);
4636                     else
4637                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4638
4639                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4640                        negate the quotient.  */
4641                     if (d < 0)
4642                       {
4643                         insn = get_last_insn ();
4644                         if (insn != last
4645                             && abs_d < (HOST_WIDE_INT_1U
4646                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4647                           set_dst_reg_note (insn, REG_EQUAL,
4648                                             gen_rtx_DIV (int_mode, op0,
4649                                                          gen_int_mode
4650                                                            (abs_d,
4651                                                             int_mode)),
4652                                             quotient);
4653
4654                         quotient = expand_unop (int_mode, neg_optab,
4655                                                 quotient, quotient, 0);
4656                       }
4657                   }
4658                 else if (size <= HOST_BITS_PER_WIDE_INT)
4659                   {
4660                     choose_multiplier (abs_d, size, size - 1,
4661                                        &ml, &post_shift, &lgup);
4662                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4663                       {
4664                         rtx t1, t2, t3;
4665
4666                         if (post_shift >= BITS_PER_WORD
4667                             || size - 1 >= BITS_PER_WORD)
4668                           goto fail1;
4669
4670                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4671                                       + shift_cost (speed, int_mode, size - 1)
4672                                       + add_cost (speed, int_mode));
4673                         t1 = expmed_mult_highpart
4674                           (int_mode, op0, gen_int_mode (ml, int_mode),
4675                            NULL_RTX, 0, max_cost - extra_cost);
4676                         if (t1 == 0)
4677                           goto fail1;
4678                         t2 = expand_shift
4679                           (RSHIFT_EXPR, int_mode, t1,
4680                            post_shift, NULL_RTX, 0);
4681                         t3 = expand_shift
4682                           (RSHIFT_EXPR, int_mode, op0,
4683                            size - 1, NULL_RTX, 0);
4684                         if (d < 0)
4685                           quotient
4686                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4687                                              tquotient);
4688                         else
4689                           quotient
4690                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4691                                              tquotient);
4692                       }
4693                     else
4694                       {
4695                         rtx t1, t2, t3, t4;
4696
4697                         if (post_shift >= BITS_PER_WORD
4698                             || size - 1 >= BITS_PER_WORD)
4699                           goto fail1;
4700
4701                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4702                         mlr = gen_int_mode (ml, int_mode);
4703                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4704                                       + shift_cost (speed, int_mode, size - 1)
4705                                       + 2 * add_cost (speed, int_mode));
4706                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4707                                                    NULL_RTX, 0,
4708                                                    max_cost - extra_cost);
4709                         if (t1 == 0)
4710                           goto fail1;
4711                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4712                                             NULL_RTX);
4713                         t3 = expand_shift
4714                           (RSHIFT_EXPR, int_mode, t2,
4715                            post_shift, NULL_RTX, 0);
4716                         t4 = expand_shift
4717                           (RSHIFT_EXPR, int_mode, op0,
4718                            size - 1, NULL_RTX, 0);
4719                         if (d < 0)
4720                           quotient
4721                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4722                                              tquotient);
4723                         else
4724                           quotient
4725                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4726                                              tquotient);
4727                       }
4728                   }
4729                 else            /* Too wide mode to use tricky code */
4730                   break;
4731
4732                 insn = get_last_insn ();
4733                 if (insn != last)
4734                   set_dst_reg_note (insn, REG_EQUAL,
4735                                     gen_rtx_DIV (int_mode, op0, op1),
4736                                     quotient);
4737               }
4738             break;
4739           }
4740       fail1:
4741         delete_insns_since (last);
4742         break;
4743
4744       case FLOOR_DIV_EXPR:
4745       case FLOOR_MOD_EXPR:
4746       /* We will come here only for signed operations.  */
4747         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4748           {
4749             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4750             int size = GET_MODE_BITSIZE (int_mode);
4751             unsigned HOST_WIDE_INT mh, ml;
4752             int pre_shift, lgup, post_shift;
4753             HOST_WIDE_INT d = INTVAL (op1);
4754
4755             if (d > 0)
4756               {
4757                 /* We could just as easily deal with negative constants here,
4758                    but it does not seem worth the trouble for GCC 2.6.  */
4759                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4760                   {
4761                     pre_shift = floor_log2 (d);
4762                     if (rem_flag)
4763                       {
4764                         unsigned HOST_WIDE_INT mask
4765                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4766                         remainder = expand_binop
4767                           (int_mode, and_optab, op0,
4768                            gen_int_mode (mask, int_mode),
4769                            remainder, 0, methods);
4770                         if (remainder)
4771                           return gen_lowpart (mode, remainder);
4772                       }
4773                     quotient = expand_shift
4774                       (RSHIFT_EXPR, int_mode, op0,
4775                        pre_shift, tquotient, 0);
4776                   }
4777                 else
4778                   {
4779                     rtx t1, t2, t3, t4;
4780
4781                     mh = choose_multiplier (d, size, size - 1,
4782                                             &ml, &post_shift, &lgup);
4783                     gcc_assert (!mh);
4784
4785                     if (post_shift < BITS_PER_WORD
4786                         && size - 1 < BITS_PER_WORD)
4787                       {
4788                         t1 = expand_shift
4789                           (RSHIFT_EXPR, int_mode, op0,
4790                            size - 1, NULL_RTX, 0);
4791                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4792                                            NULL_RTX, 0, OPTAB_WIDEN);
4793                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4794                                       + shift_cost (speed, int_mode, size - 1)
4795                                       + 2 * add_cost (speed, int_mode));
4796                         t3 = expmed_mult_highpart
4797                           (int_mode, t2, gen_int_mode (ml, int_mode),
4798                            NULL_RTX, 1, max_cost - extra_cost);
4799                         if (t3 != 0)
4800                           {
4801                             t4 = expand_shift
4802                               (RSHIFT_EXPR, int_mode, t3,
4803                                post_shift, NULL_RTX, 1);
4804                             quotient = expand_binop (int_mode, xor_optab,
4805                                                      t4, t1, tquotient, 0,
4806                                                      OPTAB_WIDEN);
4807                           }
4808                       }
4809                   }
4810               }
4811             else
4812               {
4813                 rtx nsign, t1, t2, t3, t4;
4814                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4815                                                   op0, constm1_rtx), NULL_RTX);
4816                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4817                                    0, OPTAB_WIDEN);
4818                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4819                                       size - 1, NULL_RTX, 0);
4820                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4821                                     NULL_RTX);
4822                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, treeop0,
4823                                     treeop1, t3, op1, NULL_RTX, 0);
4824                 if (t4)
4825                   {
4826                     rtx t5;
4827                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4828                                       NULL_RTX, 0);
4829                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4830                                               tquotient);
4831                   }
4832               }
4833           }
4834
4835         if (quotient != 0)
4836           break;
4837         delete_insns_since (last);
4838
4839         /* Try using an instruction that produces both the quotient and
4840            remainder, using truncation.  We can easily compensate the quotient
4841            or remainder to get floor rounding, once we have the remainder.
4842            Notice that we compute also the final remainder value here,
4843            and return the result right away.  */
4844         if (target == 0 || GET_MODE (target) != compute_mode)
4845           target = gen_reg_rtx (compute_mode);
4846
4847         if (rem_flag)
4848           {
4849             remainder
4850               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4851             quotient = gen_reg_rtx (compute_mode);
4852           }
4853         else
4854           {
4855             quotient
4856               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4857             remainder = gen_reg_rtx (compute_mode);
4858           }
4859
4860         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4861                                  quotient, remainder, 0))
4862           {
4863             /* This could be computed with a branch-less sequence.
4864                Save that for later.  */
4865             rtx tem;
4866             rtx_code_label *label = gen_label_rtx ();
4867             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4868             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4869                                 NULL_RTX, 0, OPTAB_WIDEN);
4870             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4871             expand_dec (quotient, const1_rtx);
4872             expand_inc (remainder, op1);
4873             emit_label (label);
4874             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4875           }
4876
4877         /* No luck with division elimination or divmod.  Have to do it
4878            by conditionally adjusting op0 *and* the result.  */
4879         {
4880           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4881           rtx adjusted_op0;
4882           rtx tem;
4883
4884           quotient = gen_reg_rtx (compute_mode);
4885           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4886           label1 = gen_label_rtx ();
4887           label2 = gen_label_rtx ();
4888           label3 = gen_label_rtx ();
4889           label4 = gen_label_rtx ();
4890           label5 = gen_label_rtx ();
4891           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4892           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4893           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4894                               quotient, 0, methods);
4895           if (tem != quotient)
4896             emit_move_insn (quotient, tem);
4897           emit_jump_insn (targetm.gen_jump (label5));
4898           emit_barrier ();
4899           emit_label (label1);
4900           expand_inc (adjusted_op0, const1_rtx);
4901           emit_jump_insn (targetm.gen_jump (label4));
4902           emit_barrier ();
4903           emit_label (label2);
4904           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4905           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4906                               quotient, 0, methods);
4907           if (tem != quotient)
4908             emit_move_insn (quotient, tem);
4909           emit_jump_insn (targetm.gen_jump (label5));
4910           emit_barrier ();
4911           emit_label (label3);
4912           expand_dec (adjusted_op0, const1_rtx);
4913           emit_label (label4);
4914           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4915                               quotient, 0, methods);
4916           if (tem != quotient)
4917             emit_move_insn (quotient, tem);
4918           expand_dec (quotient, const1_rtx);
4919           emit_label (label5);
4920         }
4921         break;
4922
4923       case CEIL_DIV_EXPR:
4924       case CEIL_MOD_EXPR:
4925         if (unsignedp)
4926           {
4927             if (op1_is_constant
4928                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4929                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4930                     || INTVAL (op1) >= 0))
4931               {
4932                 scalar_int_mode int_mode
4933                   = as_a <scalar_int_mode> (compute_mode);
4934                 rtx t1, t2, t3;
4935                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4936                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4937                                    floor_log2 (d), tquotient, 1);
4938                 t2 = expand_binop (int_mode, and_optab, op0,
4939                                    gen_int_mode (d - 1, int_mode),
4940                                    NULL_RTX, 1, methods);
4941                 t3 = gen_reg_rtx (int_mode);
4942                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4943                 if (t3 == 0)
4944                   {
4945                     rtx_code_label *lab;
4946                     lab = gen_label_rtx ();
4947                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4948                     expand_inc (t1, const1_rtx);
4949                     emit_label (lab);
4950                     quotient = t1;
4951                   }
4952                 else
4953                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4954                                             tquotient);
4955                 break;
4956               }
4957
4958             /* Try using an instruction that produces both the quotient and
4959                remainder, using truncation.  We can easily compensate the
4960                quotient or remainder to get ceiling rounding, once we have the
4961                remainder.  Notice that we compute also the final remainder
4962                value here, and return the result right away.  */
4963             if (target == 0 || GET_MODE (target) != compute_mode)
4964               target = gen_reg_rtx (compute_mode);
4965
4966             if (rem_flag)
4967               {
4968                 remainder = (REG_P (target)
4969                              ? target : gen_reg_rtx (compute_mode));
4970                 quotient = gen_reg_rtx (compute_mode);
4971               }
4972             else
4973               {
4974                 quotient = (REG_P (target)
4975                             ? target : gen_reg_rtx (compute_mode));
4976                 remainder = gen_reg_rtx (compute_mode);
4977               }
4978
4979             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4980                                      remainder, 1))
4981               {
4982                 /* This could be computed with a branch-less sequence.
4983                    Save that for later.  */
4984                 rtx_code_label *label = gen_label_rtx ();
4985                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4986                                  compute_mode, label);
4987                 expand_inc (quotient, const1_rtx);
4988                 expand_dec (remainder, op1);
4989                 emit_label (label);
4990                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4991               }
4992
4993             /* No luck with division elimination or divmod.  Have to do it
4994                by conditionally adjusting op0 *and* the result.  */
4995             {
4996               rtx_code_label *label1, *label2;
4997               rtx adjusted_op0, tem;
4998
4999               quotient = gen_reg_rtx (compute_mode);
5000               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5001               label1 = gen_label_rtx ();
5002               label2 = gen_label_rtx ();
5003               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
5004                                compute_mode, label1);
5005               emit_move_insn  (quotient, const0_rtx);
5006               emit_jump_insn (targetm.gen_jump (label2));
5007               emit_barrier ();
5008               emit_label (label1);
5009               expand_dec (adjusted_op0, const1_rtx);
5010               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
5011                                   quotient, 1, methods);
5012               if (tem != quotient)
5013                 emit_move_insn (quotient, tem);
5014               expand_inc (quotient, const1_rtx);
5015               emit_label (label2);
5016             }
5017           }
5018         else /* signed */
5019           {
5020             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5021                 && INTVAL (op1) >= 0)
5022               {
5023                 /* This is extremely similar to the code for the unsigned case
5024                    above.  For 2.7 we should merge these variants, but for
5025                    2.6.1 I don't want to touch the code for unsigned since that
5026                    get used in C.  The signed case will only be used by other
5027                    languages (Ada).  */
5028
5029                 rtx t1, t2, t3;
5030                 unsigned HOST_WIDE_INT d = INTVAL (op1);
5031                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5032                                    floor_log2 (d), tquotient, 0);
5033                 t2 = expand_binop (compute_mode, and_optab, op0,
5034                                    gen_int_mode (d - 1, compute_mode),
5035                                    NULL_RTX, 1, methods);
5036                 t3 = gen_reg_rtx (compute_mode);
5037                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5038                                       compute_mode, 1, 1);
5039                 if (t3 == 0)
5040                   {
5041                     rtx_code_label *lab;
5042                     lab = gen_label_rtx ();
5043                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5044                     expand_inc (t1, const1_rtx);
5045                     emit_label (lab);
5046                     quotient = t1;
5047                   }
5048                 else
5049                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5050                                                           t1, t3),
5051                                             tquotient);
5052                 break;
5053               }
5054
5055             /* Try using an instruction that produces both the quotient and
5056                remainder, using truncation.  We can easily compensate the
5057                quotient or remainder to get ceiling rounding, once we have the
5058                remainder.  Notice that we compute also the final remainder
5059                value here, and return the result right away.  */
5060             if (target == 0 || GET_MODE (target) != compute_mode)
5061               target = gen_reg_rtx (compute_mode);
5062             if (rem_flag)
5063               {
5064                 remainder= (REG_P (target)
5065                             ? target : gen_reg_rtx (compute_mode));
5066                 quotient = gen_reg_rtx (compute_mode);
5067               }
5068             else
5069               {
5070                 quotient = (REG_P (target)
5071                             ? target : gen_reg_rtx (compute_mode));
5072                 remainder = gen_reg_rtx (compute_mode);
5073               }
5074
5075             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5076                                      remainder, 0))
5077               {
5078                 /* This could be computed with a branch-less sequence.
5079                    Save that for later.  */
5080                 rtx tem;
5081                 rtx_code_label *label = gen_label_rtx ();
5082                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5083                                  compute_mode, label);
5084                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5085                                     NULL_RTX, 0, OPTAB_WIDEN);
5086                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5087                 expand_inc (quotient, const1_rtx);
5088                 expand_dec (remainder, op1);
5089                 emit_label (label);
5090                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5091               }
5092
5093             /* No luck with division elimination or divmod.  Have to do it
5094                by conditionally adjusting op0 *and* the result.  */
5095             {
5096               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5097               rtx adjusted_op0;
5098               rtx tem;
5099
5100               quotient = gen_reg_rtx (compute_mode);
5101               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5102               label1 = gen_label_rtx ();
5103               label2 = gen_label_rtx ();
5104               label3 = gen_label_rtx ();
5105               label4 = gen_label_rtx ();
5106               label5 = gen_label_rtx ();
5107               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5108               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5109                                compute_mode, label1);
5110               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5111                                   quotient, 0, methods);
5112               if (tem != quotient)
5113                 emit_move_insn (quotient, tem);
5114               emit_jump_insn (targetm.gen_jump (label5));
5115               emit_barrier ();
5116               emit_label (label1);
5117               expand_dec (adjusted_op0, const1_rtx);
5118               emit_jump_insn (targetm.gen_jump (label4));
5119               emit_barrier ();
5120               emit_label (label2);
5121               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5122                                compute_mode, label3);
5123               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5124                                   quotient, 0, methods);
5125               if (tem != quotient)
5126                 emit_move_insn (quotient, tem);
5127               emit_jump_insn (targetm.gen_jump (label5));
5128               emit_barrier ();
5129               emit_label (label3);
5130               expand_inc (adjusted_op0, const1_rtx);
5131               emit_label (label4);
5132               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5133                                   quotient, 0, methods);
5134               if (tem != quotient)
5135                 emit_move_insn (quotient, tem);
5136               expand_inc (quotient, const1_rtx);
5137               emit_label (label5);
5138             }
5139           }
5140         break;
5141
5142       case EXACT_DIV_EXPR:
5143         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5144           {
5145             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5146             int size = GET_MODE_BITSIZE (int_mode);
5147             HOST_WIDE_INT d = INTVAL (op1);
5148             unsigned HOST_WIDE_INT ml;
5149             int pre_shift;
5150             rtx t1;
5151
5152             pre_shift = ctz_or_zero (d);
5153             ml = invert_mod2n (d >> pre_shift, size);
5154             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5155                                pre_shift, NULL_RTX, unsignedp);
5156             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5157                                     NULL_RTX, 1);
5158
5159             insn = get_last_insn ();
5160             set_dst_reg_note (insn, REG_EQUAL,
5161                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5162                                               int_mode, op0, op1),
5163                               quotient);
5164           }
5165         break;
5166
5167       case ROUND_DIV_EXPR:
5168       case ROUND_MOD_EXPR:
5169         if (unsignedp)
5170           {
5171             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5172             rtx tem;
5173             rtx_code_label *label;
5174             label = gen_label_rtx ();
5175             quotient = gen_reg_rtx (int_mode);
5176             remainder = gen_reg_rtx (int_mode);
5177             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5178               {
5179                 rtx tem;
5180                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5181                                          quotient, 1, methods);
5182                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5183                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5184                                           remainder, 1, methods);
5185               }
5186             tem = plus_constant (int_mode, op1, -1);
5187             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5188             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5189             expand_inc (quotient, const1_rtx);
5190             expand_dec (remainder, op1);
5191             emit_label (label);
5192           }
5193         else
5194           {
5195             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5196             int size = GET_MODE_BITSIZE (int_mode);
5197             rtx abs_rem, abs_op1, tem, mask;
5198             rtx_code_label *label;
5199             label = gen_label_rtx ();
5200             quotient = gen_reg_rtx (int_mode);
5201             remainder = gen_reg_rtx (int_mode);
5202             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5203               {
5204                 rtx tem;
5205                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5206                                          quotient, 0, methods);
5207                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5208                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5209                                           remainder, 0, methods);
5210               }
5211             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5212             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5213             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5214                                 1, NULL_RTX, 1);
5215             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5216             tem = expand_binop (int_mode, xor_optab, op0, op1,
5217                                 NULL_RTX, 0, OPTAB_WIDEN);
5218             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5219                                  size - 1, NULL_RTX, 0);
5220             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5221                                 NULL_RTX, 0, OPTAB_WIDEN);
5222             tem = expand_binop (int_mode, sub_optab, tem, mask,
5223                                 NULL_RTX, 0, OPTAB_WIDEN);
5224             expand_inc (quotient, tem);
5225             tem = expand_binop (int_mode, xor_optab, mask, op1,
5226                                 NULL_RTX, 0, OPTAB_WIDEN);
5227             tem = expand_binop (int_mode, sub_optab, tem, mask,
5228                                 NULL_RTX, 0, OPTAB_WIDEN);
5229             expand_dec (remainder, tem);
5230             emit_label (label);
5231           }
5232         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5233
5234       default:
5235         gcc_unreachable ();
5236       }
5237
5238   if (quotient == 0)
5239     {
5240       if (target && GET_MODE (target) != compute_mode)
5241         target = 0;
5242
5243       if (rem_flag)
5244         {
5245           /* Try to produce the remainder without producing the quotient.
5246              If we seem to have a divmod pattern that does not require widening,
5247              don't try widening here.  We should really have a WIDEN argument
5248              to expand_twoval_binop, since what we'd really like to do here is
5249              1) try a mod insn in compute_mode
5250              2) try a divmod insn in compute_mode
5251              3) try a div insn in compute_mode and multiply-subtract to get
5252                 remainder
5253              4) try the same things with widening allowed.  */
5254           remainder
5255             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5256                                  op0, op1, target,
5257                                  unsignedp,
5258                                  ((optab_handler (optab2, compute_mode)
5259                                    != CODE_FOR_nothing)
5260                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5261           if (remainder == 0)
5262             {
5263               /* No luck there.  Can we do remainder and divide at once
5264                  without a library call?  */
5265               remainder = gen_reg_rtx (compute_mode);
5266               if (! expand_twoval_binop ((unsignedp
5267                                           ? udivmod_optab
5268                                           : sdivmod_optab),
5269                                          op0, op1,
5270                                          NULL_RTX, remainder, unsignedp))
5271                 remainder = 0;
5272             }
5273
5274           if (remainder)
5275             return gen_lowpart (mode, remainder);
5276         }
5277
5278       /* Produce the quotient.  Try a quotient insn, but not a library call.
5279          If we have a divmod in this mode, use it in preference to widening
5280          the div (for this test we assume it will not fail). Note that optab2
5281          is set to the one of the two optabs that the call below will use.  */
5282       quotient
5283         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5284                              op0, op1, rem_flag ? NULL_RTX : target,
5285                              unsignedp,
5286                              ((optab_handler (optab2, compute_mode)
5287                                != CODE_FOR_nothing)
5288                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5289
5290       if (quotient == 0)
5291         {
5292           /* No luck there.  Try a quotient-and-remainder insn,
5293              keeping the quotient alone.  */
5294           quotient = gen_reg_rtx (compute_mode);
5295           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5296                                      op0, op1,
5297                                      quotient, NULL_RTX, unsignedp))
5298             {
5299               quotient = 0;
5300               if (! rem_flag)
5301                 /* Still no luck.  If we are not computing the remainder,
5302                    use a library call for the quotient.  */
5303                 quotient = sign_expand_binop (compute_mode,
5304                                               udiv_optab, sdiv_optab,
5305                                               op0, op1, target,
5306                                               unsignedp, methods);
5307             }
5308         }
5309     }
5310
5311   if (rem_flag)
5312     {
5313       if (target && GET_MODE (target) != compute_mode)
5314         target = 0;
5315
5316       if (quotient == 0)
5317         {
5318           /* No divide instruction either.  Use library for remainder.  */
5319           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5320                                          op0, op1, target,
5321                                          unsignedp, methods);
5322           /* No remainder function.  Try a quotient-and-remainder
5323              function, keeping the remainder.  */
5324           if (!remainder
5325               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5326             {
5327               remainder = gen_reg_rtx (compute_mode);
5328               if (!expand_twoval_binop_libfunc
5329                   (unsignedp ? udivmod_optab : sdivmod_optab,
5330                    op0, op1,
5331                    NULL_RTX, remainder,
5332                    unsignedp ? UMOD : MOD))
5333                 remainder = NULL_RTX;
5334             }
5335         }
5336       else
5337         {
5338           /* We divided.  Now finish doing X - Y * (X / Y).  */
5339           remainder = expand_mult (compute_mode, quotient, op1,
5340                                    NULL_RTX, unsignedp);
5341           remainder = expand_binop (compute_mode, sub_optab, op0,
5342                                     remainder, target, unsignedp,
5343                                     methods);
5344         }
5345     }
5346
5347   if (methods != OPTAB_LIB_WIDEN
5348       && (rem_flag ? remainder : quotient) == NULL_RTX)
5349     return NULL_RTX;
5350
5351   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5352 }
5353 \f
5354 /* Return a tree node with data type TYPE, describing the value of X.
5355    Usually this is an VAR_DECL, if there is no obvious better choice.
5356    X may be an expression, however we only support those expressions
5357    generated by loop.c.  */
5358
5359 tree
5360 make_tree (tree type, rtx x)
5361 {
5362   tree t;
5363
5364   switch (GET_CODE (x))
5365     {
5366     case CONST_INT:
5367     case CONST_WIDE_INT:
5368       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5369       return t;
5370
5371     case CONST_DOUBLE:
5372       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5373       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5374         t = wide_int_to_tree (type,
5375                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5376                                                     HOST_BITS_PER_WIDE_INT * 2));
5377       else
5378         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5379
5380       return t;
5381
5382     case CONST_VECTOR:
5383       {
5384         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5385         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5386         tree itype = TREE_TYPE (type);
5387
5388         /* Build a tree with vector elements.  */
5389         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5390         unsigned int count = elts.encoded_nelts ();
5391         for (unsigned int i = 0; i < count; ++i)
5392           {
5393             rtx elt = CONST_VECTOR_ELT (x, i);
5394             elts.quick_push (make_tree (itype, elt));
5395           }
5396
5397         return elts.build ();
5398       }
5399
5400     case PLUS:
5401       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5402                           make_tree (type, XEXP (x, 1)));
5403
5404     case MINUS:
5405       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5406                           make_tree (type, XEXP (x, 1)));
5407
5408     case NEG:
5409       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5410
5411     case MULT:
5412       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5413                           make_tree (type, XEXP (x, 1)));
5414
5415     case ASHIFT:
5416       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5417                           make_tree (type, XEXP (x, 1)));
5418
5419     case LSHIFTRT:
5420       t = unsigned_type_for (type);
5421       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5422                                          make_tree (t, XEXP (x, 0)),
5423                                          make_tree (type, XEXP (x, 1))));
5424
5425     case ASHIFTRT:
5426       t = signed_type_for (type);
5427       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5428                                          make_tree (t, XEXP (x, 0)),
5429                                          make_tree (type, XEXP (x, 1))));
5430
5431     case DIV:
5432       if (TREE_CODE (type) != REAL_TYPE)
5433         t = signed_type_for (type);
5434       else
5435         t = type;
5436
5437       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5438                                          make_tree (t, XEXP (x, 0)),
5439                                          make_tree (t, XEXP (x, 1))));
5440     case UDIV:
5441       t = unsigned_type_for (type);
5442       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5443                                          make_tree (t, XEXP (x, 0)),
5444                                          make_tree (t, XEXP (x, 1))));
5445
5446     case SIGN_EXTEND:
5447     case ZERO_EXTEND:
5448       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5449                                           GET_CODE (x) == ZERO_EXTEND);
5450       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5451
5452     case CONST:
5453       return make_tree (type, XEXP (x, 0));
5454
5455     case SYMBOL_REF:
5456       t = SYMBOL_REF_DECL (x);
5457       if (t)
5458         return fold_convert (type, build_fold_addr_expr (t));
5459       /* fall through.  */
5460
5461     default:
5462       if (CONST_POLY_INT_P (x))
5463         return wide_int_to_tree (t, const_poly_int_value (x));
5464
5465       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5466
5467       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5468          address mode to pointer mode.  */
5469       if (POINTER_TYPE_P (type))
5470         x = convert_memory_address_addr_space
5471           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5472
5473       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5474          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5475       t->decl_with_rtl.rtl = x;
5476
5477       return t;
5478     }
5479 }
5480 \f
5481 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5482    and returning TARGET.
5483
5484    If TARGET is 0, a pseudo-register or constant is returned.  */
5485
5486 rtx
5487 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5488 {
5489   rtx tem = 0;
5490
5491   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5492     tem = simplify_binary_operation (AND, mode, op0, op1);
5493   if (tem == 0)
5494     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5495
5496   if (target == 0)
5497     target = tem;
5498   else if (tem != target)
5499     emit_move_insn (target, tem);
5500   return target;
5501 }
5502
5503 /* Helper function for emit_store_flag.  */
5504 rtx
5505 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5506              machine_mode mode, machine_mode compare_mode,
5507              int unsignedp, rtx x, rtx y, int normalizep,
5508              machine_mode target_mode)
5509 {
5510   class expand_operand ops[4];
5511   rtx op0, comparison, subtarget;
5512   rtx_insn *last;
5513   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5514   scalar_int_mode int_target_mode;
5515
5516   last = get_last_insn ();
5517   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5518   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5519   if (!x || !y)
5520     {
5521       delete_insns_since (last);
5522       return NULL_RTX;
5523     }
5524
5525   if (target_mode == VOIDmode)
5526     int_target_mode = result_mode;
5527   else
5528     int_target_mode = as_a <scalar_int_mode> (target_mode);
5529   if (!target)
5530     target = gen_reg_rtx (int_target_mode);
5531
5532   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5533
5534   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5535   create_fixed_operand (&ops[1], comparison);
5536   create_fixed_operand (&ops[2], x);
5537   create_fixed_operand (&ops[3], y);
5538   if (!maybe_expand_insn (icode, 4, ops))
5539     {
5540       delete_insns_since (last);
5541       return NULL_RTX;
5542     }
5543   subtarget = ops[0].value;
5544
5545   /* If we are converting to a wider mode, first convert to
5546      INT_TARGET_MODE, then normalize.  This produces better combining
5547      opportunities on machines that have a SIGN_EXTRACT when we are
5548      testing a single bit.  This mostly benefits the 68k.
5549
5550      If STORE_FLAG_VALUE does not have the sign bit set when
5551      interpreted in MODE, we can do this conversion as unsigned, which
5552      is usually more efficient.  */
5553   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5554     {
5555       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5556                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5557
5558       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5559       convert_move (target, subtarget, unsignedp);
5560
5561       op0 = target;
5562       result_mode = int_target_mode;
5563     }
5564   else
5565     op0 = subtarget;
5566
5567   /* If we want to keep subexpressions around, don't reuse our last
5568      target.  */
5569   if (optimize)
5570     subtarget = 0;
5571
5572   /* Now normalize to the proper value in MODE.  Sometimes we don't
5573      have to do anything.  */
5574   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5575     ;
5576   /* STORE_FLAG_VALUE might be the most negative number, so write
5577      the comparison this way to avoid a compiler-time warning.  */
5578   else if (- normalizep == STORE_FLAG_VALUE)
5579     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5580
5581   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5582      it hard to use a value of just the sign bit due to ANSI integer
5583      constant typing rules.  */
5584   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5585     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5586                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5587                         normalizep == 1);
5588   else
5589     {
5590       gcc_assert (STORE_FLAG_VALUE & 1);
5591
5592       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5593       if (normalizep == -1)
5594         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5595     }
5596
5597   /* If we were converting to a smaller mode, do the conversion now.  */
5598   if (int_target_mode != result_mode)
5599     {
5600       convert_move (target, op0, 0);
5601       return target;
5602     }
5603   else
5604     return op0;
5605 }
5606
5607
5608 /* A subroutine of emit_store_flag only including "tricks" that do not
5609    need a recursive call.  These are kept separate to avoid infinite
5610    loops.  */
5611
5612 static rtx
5613 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5614                    machine_mode mode, int unsignedp, int normalizep,
5615                    machine_mode target_mode)
5616 {
5617   rtx subtarget;
5618   enum insn_code icode;
5619   machine_mode compare_mode;
5620   enum mode_class mclass;
5621   enum rtx_code scode;
5622
5623   if (unsignedp)
5624     code = unsigned_condition (code);
5625   scode = swap_condition (code);
5626
5627   /* If one operand is constant, make it the second one.  Only do this
5628      if the other operand is not constant as well.  */
5629
5630   if (swap_commutative_operands_p (op0, op1))
5631     {
5632       std::swap (op0, op1);
5633       code = swap_condition (code);
5634     }
5635
5636   if (mode == VOIDmode)
5637     mode = GET_MODE (op0);
5638
5639   if (CONST_SCALAR_INT_P (op1))
5640     canonicalize_comparison (mode, &code, &op1);
5641
5642   /* For some comparisons with 1 and -1, we can convert this to
5643      comparisons with zero.  This will often produce more opportunities for
5644      store-flag insns.  */
5645
5646   switch (code)
5647     {
5648     case LT:
5649       if (op1 == const1_rtx)
5650         op1 = const0_rtx, code = LE;
5651       break;
5652     case LE:
5653       if (op1 == constm1_rtx)
5654         op1 = const0_rtx, code = LT;
5655       break;
5656     case GE:
5657       if (op1 == const1_rtx)
5658         op1 = const0_rtx, code = GT;
5659       break;
5660     case GT:
5661       if (op1 == constm1_rtx)
5662         op1 = const0_rtx, code = GE;
5663       break;
5664     case GEU:
5665       if (op1 == const1_rtx)
5666         op1 = const0_rtx, code = NE;
5667       break;
5668     case LTU:
5669       if (op1 == const1_rtx)
5670         op1 = const0_rtx, code = EQ;
5671       break;
5672     default:
5673       break;
5674     }
5675
5676   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5677      complement of A (for GE) and shifting the sign bit to the low bit.  */
5678   scalar_int_mode int_mode;
5679   if (op1 == const0_rtx && (code == LT || code == GE)
5680       && is_int_mode (mode, &int_mode)
5681       && (normalizep || STORE_FLAG_VALUE == 1
5682           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5683     {
5684       scalar_int_mode int_target_mode;
5685       subtarget = target;
5686
5687       if (!target)
5688         int_target_mode = int_mode;
5689       else
5690         {
5691           /* If the result is to be wider than OP0, it is best to convert it
5692              first.  If it is to be narrower, it is *incorrect* to convert it
5693              first.  */
5694           int_target_mode = as_a <scalar_int_mode> (target_mode);
5695           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5696             {
5697               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5698               int_mode = int_target_mode;
5699             }
5700         }
5701
5702       if (int_target_mode != int_mode)
5703         subtarget = 0;
5704
5705       if (code == GE)
5706         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5707                            ((STORE_FLAG_VALUE == 1 || normalizep)
5708                             ? 0 : subtarget), 0);
5709
5710       if (STORE_FLAG_VALUE == 1 || normalizep)
5711         /* If we are supposed to produce a 0/1 value, we want to do
5712            a logical shift from the sign bit to the low-order bit; for
5713            a -1/0 value, we do an arithmetic shift.  */
5714         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5715                             GET_MODE_BITSIZE (int_mode) - 1,
5716                             subtarget, normalizep != -1);
5717
5718       if (int_mode != int_target_mode)
5719         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5720
5721       return op0;
5722     }
5723
5724   /* Next try expanding this via the backend's cstore<mode>4.  */
5725   mclass = GET_MODE_CLASS (mode);
5726   FOR_EACH_WIDER_MODE_FROM (compare_mode, mode)
5727     {
5728      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5729      icode = optab_handler (cstore_optab, optab_mode);
5730      if (icode != CODE_FOR_nothing)
5731         {
5732           do_pending_stack_adjust ();
5733           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5734                                  unsignedp, op0, op1, normalizep, target_mode);
5735           if (tem)
5736             return tem;
5737
5738           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5739             {
5740               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5741                                  unsignedp, op1, op0, normalizep, target_mode);
5742               if (tem)
5743                 return tem;
5744             }
5745           break;
5746         }
5747     }
5748
5749   /* If we are comparing a double-word integer with zero or -1, we can
5750      convert the comparison into one involving a single word.  */
5751   if (is_int_mode (mode, &int_mode)
5752       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5753       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5754     {
5755       rtx tem;
5756       if ((code == EQ || code == NE)
5757           && (op1 == const0_rtx || op1 == constm1_rtx))
5758         {
5759           rtx op00, op01;
5760
5761           /* Do a logical OR or AND of the two words and compare the
5762              result.  */
5763           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5764           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5765           tem = expand_binop (word_mode,
5766                               op1 == const0_rtx ? ior_optab : and_optab,
5767                               op00, op01, NULL_RTX, unsignedp,
5768                               OPTAB_DIRECT);
5769
5770           if (tem != 0)
5771             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5772                                    unsignedp, normalizep);
5773         }
5774       else if ((code == LT || code == GE) && op1 == const0_rtx)
5775         {
5776           rtx op0h;
5777
5778           /* If testing the sign bit, can just test on high word.  */
5779           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5780                                       subreg_highpart_offset (word_mode,
5781                                                               int_mode));
5782           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5783                                  unsignedp, normalizep);
5784         }
5785       else
5786         tem = NULL_RTX;
5787
5788       if (tem)
5789         {
5790           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5791             return tem;
5792           if (!target)
5793             target = gen_reg_rtx (target_mode);
5794
5795           convert_move (target, tem,
5796                         !val_signbit_known_set_p (word_mode,
5797                                                   (normalizep ? normalizep
5798                                                    : STORE_FLAG_VALUE)));
5799           return target;
5800         }
5801     }
5802
5803   return 0;
5804 }
5805
5806 /* Subroutine of emit_store_flag that handles cases in which the operands
5807    are scalar integers.  SUBTARGET is the target to use for temporary
5808    operations and TRUEVAL is the value to store when the condition is
5809    true.  All other arguments are as for emit_store_flag.  */
5810
5811 rtx
5812 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5813                      rtx op1, scalar_int_mode mode, int unsignedp,
5814                      int normalizep, rtx trueval)
5815 {
5816   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5817   rtx_insn *last = get_last_insn ();
5818
5819   /* If this is an equality comparison of integers, we can try to exclusive-or
5820      (or subtract) the two operands and use a recursive call to try the
5821      comparison with zero.  Don't do any of these cases if branches are
5822      very cheap.  */
5823
5824   if ((code == EQ || code == NE) && op1 != const0_rtx)
5825     {
5826       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5827                               OPTAB_WIDEN);
5828
5829       if (tem == 0)
5830         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5831                             OPTAB_WIDEN);
5832       if (tem != 0)
5833         tem = emit_store_flag (target, code, tem, const0_rtx,
5834                                mode, unsignedp, normalizep);
5835       if (tem != 0)
5836         return tem;
5837
5838       delete_insns_since (last);
5839     }
5840
5841   /* For integer comparisons, try the reverse comparison.  However, for
5842      small X and if we'd have anyway to extend, implementing "X != 0"
5843      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5844   rtx_code rcode = reverse_condition (code);
5845   if (can_compare_p (rcode, mode, ccp_store_flag)
5846       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5847             && code == NE
5848             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5849             && op1 == const0_rtx))
5850     {
5851       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5852                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5853
5854       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5855       if (want_add
5856           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5857                        optimize_insn_for_speed_p ()) == 0)
5858         {
5859           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5860                                        STORE_FLAG_VALUE, target_mode);
5861           if (tem != 0)
5862             tem = expand_binop (target_mode, add_optab, tem,
5863                                 gen_int_mode (normalizep, target_mode),
5864                                 target, 0, OPTAB_WIDEN);
5865           if (tem != 0)
5866             return tem;
5867         }
5868       else if (!want_add
5869                && rtx_cost (trueval, mode, XOR, 1,
5870                             optimize_insn_for_speed_p ()) == 0)
5871         {
5872           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5873                                        normalizep, target_mode);
5874           if (tem != 0)
5875             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5876                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5877           if (tem != 0)
5878             return tem;
5879         }
5880
5881       delete_insns_since (last);
5882     }
5883
5884   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5885      the constant zero.  Reject all other comparisons at this point.  Only
5886      do LE and GT if branches are expensive since they are expensive on
5887      2-operand machines.  */
5888
5889   if (op1 != const0_rtx
5890       || (code != EQ && code != NE
5891           && (BRANCH_COST (optimize_insn_for_speed_p (),
5892                            false) <= 1 || (code != LE && code != GT))))
5893     return 0;
5894
5895   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5896      do the necessary operation below.  */
5897
5898   rtx tem = 0;
5899
5900   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5901      the sign bit set.  */
5902
5903   if (code == LE)
5904     {
5905       /* This is destructive, so SUBTARGET can't be OP0.  */
5906       if (rtx_equal_p (subtarget, op0))
5907         subtarget = 0;
5908
5909       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5910                           OPTAB_WIDEN);
5911       if (tem)
5912         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5913                             OPTAB_WIDEN);
5914     }
5915
5916   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5917      number of bits in the mode of OP0, minus one.  */
5918
5919   if (code == GT)
5920     {
5921       if (rtx_equal_p (subtarget, op0))
5922         subtarget = 0;
5923
5924       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5925                                 GET_MODE_BITSIZE (mode) - 1,
5926                                 subtarget, 0);
5927       if (tem)
5928         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5929                             OPTAB_WIDEN);
5930     }
5931
5932   if (code == EQ || code == NE)
5933     {
5934       /* For EQ or NE, one way to do the comparison is to apply an operation
5935          that converts the operand into a positive number if it is nonzero
5936          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5937          for NE we negate.  This puts the result in the sign bit.  Then we
5938          normalize with a shift, if needed.
5939
5940          Two operations that can do the above actions are ABS and FFS, so try
5941          them.  If that doesn't work, and MODE is smaller than a full word,
5942          we can use zero-extension to the wider mode (an unsigned conversion)
5943          as the operation.  */
5944
5945       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5946          that is compensated by the subsequent overflow when subtracting
5947          one / negating.  */
5948
5949       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5950         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5951       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5952         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5953       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5954         {
5955           tem = convert_modes (word_mode, mode, op0, 1);
5956           mode = word_mode;
5957         }
5958
5959       if (tem != 0)
5960         {
5961           if (code == EQ)
5962             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5963                                 0, OPTAB_WIDEN);
5964           else
5965             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5966         }
5967
5968       /* If we couldn't do it that way, for NE we can "or" the two's complement
5969          of the value with itself.  For EQ, we take the one's complement of
5970          that "or", which is an extra insn, so we only handle EQ if branches
5971          are expensive.  */
5972
5973       if (tem == 0
5974           && (code == NE
5975               || BRANCH_COST (optimize_insn_for_speed_p (),
5976                               false) > 1))
5977         {
5978           if (rtx_equal_p (subtarget, op0))
5979             subtarget = 0;
5980
5981           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5982           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5983                               OPTAB_WIDEN);
5984
5985           if (tem && code == EQ)
5986             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5987         }
5988     }
5989
5990   if (tem && normalizep)
5991     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5992                               GET_MODE_BITSIZE (mode) - 1,
5993                               subtarget, normalizep == 1);
5994
5995   if (tem)
5996     {
5997       if (!target)
5998         ;
5999       else if (GET_MODE (tem) != target_mode)
6000         {
6001           convert_move (target, tem, 0);
6002           tem = target;
6003         }
6004       else if (!subtarget)
6005         {
6006           emit_move_insn (target, tem);
6007           tem = target;
6008         }
6009     }
6010   else
6011     delete_insns_since (last);
6012
6013   return tem;
6014 }
6015
6016 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
6017    and storing in TARGET.  Normally return TARGET.
6018    Return 0 if that cannot be done.
6019
6020    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
6021    it is VOIDmode, they cannot both be CONST_INT.
6022
6023    UNSIGNEDP is for the case where we have to widen the operands
6024    to perform the operation.  It says to use zero-extension.
6025
6026    NORMALIZEP is 1 if we should convert the result to be either zero
6027    or one.  Normalize is -1 if we should convert the result to be
6028    either zero or -1.  If NORMALIZEP is zero, the result will be left
6029    "raw" out of the scc insn.  */
6030
6031 rtx
6032 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6033                  machine_mode mode, int unsignedp, int normalizep)
6034 {
6035   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6036   enum rtx_code rcode;
6037   rtx subtarget;
6038   rtx tem, trueval;
6039   rtx_insn *last;
6040
6041   /* If we compare constants, we shouldn't use a store-flag operation,
6042      but a constant load.  We can get there via the vanilla route that
6043      usually generates a compare-branch sequence, but will in this case
6044      fold the comparison to a constant, and thus elide the branch.  */
6045   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6046     return NULL_RTX;
6047
6048   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6049                            target_mode);
6050   if (tem)
6051     return tem;
6052
6053   /* If we reached here, we can't do this with a scc insn, however there
6054      are some comparisons that can be done in other ways.  Don't do any
6055      of these cases if branches are very cheap.  */
6056   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6057     return 0;
6058
6059   /* See what we need to return.  We can only return a 1, -1, or the
6060      sign bit.  */
6061
6062   if (normalizep == 0)
6063     {
6064       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6065         normalizep = STORE_FLAG_VALUE;
6066
6067       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6068         ;
6069       else
6070         return 0;
6071     }
6072
6073   last = get_last_insn ();
6074
6075   /* If optimizing, use different pseudo registers for each insn, instead
6076      of reusing the same pseudo.  This leads to better CSE, but slows
6077      down the compiler, since there are more pseudos.  */
6078   subtarget = (!optimize
6079                && (target_mode == mode)) ? target : NULL_RTX;
6080   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6081
6082   /* For floating-point comparisons, try the reverse comparison or try
6083      changing the "orderedness" of the comparison.  */
6084   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6085     {
6086       enum rtx_code first_code;
6087       bool and_them;
6088
6089       rcode = reverse_condition_maybe_unordered (code);
6090       if (can_compare_p (rcode, mode, ccp_store_flag)
6091           && (code == ORDERED || code == UNORDERED
6092               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6093               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6094         {
6095           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6096                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6097
6098           /* For the reverse comparison, use either an addition or a XOR.  */
6099           if (want_add
6100               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6101                            optimize_insn_for_speed_p ()) == 0)
6102             {
6103               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6104                                        STORE_FLAG_VALUE, target_mode);
6105               if (tem)
6106                 return expand_binop (target_mode, add_optab, tem,
6107                                      gen_int_mode (normalizep, target_mode),
6108                                      target, 0, OPTAB_WIDEN);
6109             }
6110           else if (!want_add
6111                    && rtx_cost (trueval, mode, XOR, 1,
6112                                 optimize_insn_for_speed_p ()) == 0)
6113             {
6114               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6115                                        normalizep, target_mode);
6116               if (tem)
6117                 return expand_binop (target_mode, xor_optab, tem, trueval,
6118                                      target, INTVAL (trueval) >= 0,
6119                                      OPTAB_WIDEN);
6120             }
6121         }
6122
6123       delete_insns_since (last);
6124
6125       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6126       if (code == ORDERED || code == UNORDERED)
6127         return 0;
6128
6129       and_them = split_comparison (code, mode, &first_code, &code);
6130
6131       /* If there are no NaNs, the first comparison should always fall through.
6132          Effectively change the comparison to the other one.  */
6133       if (!HONOR_NANS (mode))
6134         {
6135           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6136           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6137                                     target_mode);
6138         }
6139
6140       if (!HAVE_conditional_move)
6141         return 0;
6142
6143       /* Do not turn a trapping comparison into a non-trapping one.  */
6144       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6145           && flag_trapping_math)
6146         return 0;
6147
6148       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6149          conditional move.  */
6150       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6151                                normalizep, target_mode);
6152       if (tem == 0)
6153         return 0;
6154
6155       if (and_them)
6156         tem = emit_conditional_move (target, { code, op0, op1, mode },
6157                                      tem, const0_rtx, GET_MODE (tem), 0);
6158       else
6159         tem = emit_conditional_move (target, { code, op0, op1, mode },
6160                                      trueval, tem, GET_MODE (tem), 0);
6161
6162       if (tem == 0)
6163         delete_insns_since (last);
6164       return tem;
6165     }
6166
6167   /* The remaining tricks only apply to integer comparisons.  */
6168
6169   scalar_int_mode int_mode;
6170   if (is_int_mode (mode, &int_mode))
6171     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6172                                 unsignedp, normalizep, trueval);
6173
6174   return 0;
6175 }
6176
6177 /* Like emit_store_flag, but always succeeds.  */
6178
6179 rtx
6180 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6181                        machine_mode mode, int unsignedp, int normalizep)
6182 {
6183   rtx tem;
6184   rtx_code_label *label;
6185   rtx trueval, falseval;
6186
6187   /* First see if emit_store_flag can do the job.  */
6188   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6189   if (tem != 0)
6190     return tem;
6191
6192   /* If one operand is constant, make it the second one.  Only do this
6193      if the other operand is not constant as well.  */
6194   if (swap_commutative_operands_p (op0, op1))
6195     {
6196       std::swap (op0, op1);
6197       code = swap_condition (code);
6198     }
6199
6200   if (mode == VOIDmode)
6201     mode = GET_MODE (op0);
6202
6203   if (!target)
6204     target = gen_reg_rtx (word_mode);
6205
6206   /* If this failed, we have to do this with set/compare/jump/set code.
6207      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6208   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6209   if (code == NE
6210       && GET_MODE_CLASS (mode) == MODE_INT
6211       && REG_P (target)
6212       && op0 == target
6213       && op1 == const0_rtx)
6214     {
6215       label = gen_label_rtx ();
6216       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6217                                NULL_RTX, NULL, label,
6218                                profile_probability::uninitialized ());
6219       emit_move_insn (target, trueval);
6220       emit_label (label);
6221       return target;
6222     }
6223
6224   if (!REG_P (target)
6225       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6226     target = gen_reg_rtx (GET_MODE (target));
6227
6228   /* Jump in the right direction if the target cannot implement CODE
6229      but can jump on its reverse condition.  */
6230   falseval = const0_rtx;
6231   if (! can_compare_p (code, mode, ccp_jump)
6232       && (! FLOAT_MODE_P (mode)
6233           || code == ORDERED || code == UNORDERED
6234           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6235           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6236     {
6237       enum rtx_code rcode;
6238       if (FLOAT_MODE_P (mode))
6239         rcode = reverse_condition_maybe_unordered (code);
6240       else
6241         rcode = reverse_condition (code);
6242
6243       /* Canonicalize to UNORDERED for the libcall.  */
6244       if (can_compare_p (rcode, mode, ccp_jump)
6245           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6246         {
6247           falseval = trueval;
6248           trueval = const0_rtx;
6249           code = rcode;
6250         }
6251     }
6252
6253   emit_move_insn (target, trueval);
6254   label = gen_label_rtx ();
6255   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6256                            label, profile_probability::uninitialized ());
6257
6258   emit_move_insn (target, falseval);
6259   emit_label (label);
6260
6261   return target;
6262 }
6263
6264 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6265    and exclusive ranges in order to create an equivalent comparison.  See
6266    canonicalize_cmp_for_target for the possible cases.  */
6267
6268 static enum rtx_code
6269 equivalent_cmp_code (enum rtx_code code)
6270 {
6271   switch (code)
6272     {
6273     case GT:
6274       return GE;
6275     case GE:
6276       return GT;
6277     case LT:
6278       return LE;
6279     case LE:
6280       return LT;
6281     case GTU:
6282       return GEU;
6283     case GEU:
6284       return GTU;
6285     case LTU:
6286       return LEU;
6287     case LEU:
6288       return LTU;
6289
6290     default:
6291       return code;
6292     }
6293 }
6294
6295 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6296    purpose of this is to end up with an immediate which can be loaded into a
6297    register in fewer moves, if possible.
6298
6299    For each integer comparison there exists an equivalent choice:
6300      i)   a >  b or a >= b + 1
6301      ii)  a <= b or a <  b + 1
6302      iii) a >= b or a >  b - 1
6303      iv)  a <  b or a <= b - 1
6304
6305    MODE is the mode of the first operand.
6306    CODE points to the comparison code.
6307    IMM points to the rtx containing the immediate.  *IMM must satisfy
6308    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6309    on exit.  */
6310
6311 void
6312 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6313 {
6314   if (!SCALAR_INT_MODE_P (mode))
6315     return;
6316
6317   int to_add = 0;
6318   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6319
6320   /* Extract the immediate value from the rtx.  */
6321   wide_int imm_val = rtx_mode_t (*imm, mode);
6322
6323   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6324     to_add = 1;
6325   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6326     to_add = -1;
6327   else
6328     return;
6329
6330   /* Check for overflow/underflow in the case of signed values and
6331      wrapping around in the case of unsigned values.  If any occur
6332      cancel the optimization.  */
6333   wi::overflow_type overflow = wi::OVF_NONE;
6334   wide_int imm_modif;
6335
6336   if (to_add == 1)
6337     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6338   else
6339     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6340
6341   if (overflow)
6342     return;
6343
6344   /* The following creates a pseudo; if we cannot do that, bail out.  */
6345   if (!can_create_pseudo_p ())
6346     return;
6347
6348   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6349   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6350
6351   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6352   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6353
6354   /* Update the immediate and the code.  */
6355   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6356     {
6357       *code = equivalent_cmp_code (*code);
6358       *imm = new_imm;
6359     }
6360 }
6361
6362
6363 \f
6364 /* Perform possibly multi-word comparison and conditional jump to LABEL
6365    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6366    now a thin wrapper around do_compare_rtx_and_jump.  */
6367
6368 static void
6369 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6370                  rtx_code_label *label)
6371 {
6372   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6373   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6374                            NULL, label, profile_probability::uninitialized ());
6375 }