gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2021 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158   /* Restore all->reg's mode.  */
 159   PUT_MODE (all->reg, to_mode);
 160 }
 161
 162 static void
 163 init_expmed_one_mode (struct init_expmed_rtl *all,
 164                       machine_mode mode, int speed)
 165 {
 166   int m, n, mode_bitsize;
 167   machine_mode mode_from;
 168
 169   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 170
 171   PUT_MODE (all->reg, mode);
 172   PUT_MODE (all->plus, mode);
 173   PUT_MODE (all->neg, mode);
 174   PUT_MODE (all->mult, mode);
 175   PUT_MODE (all->sdiv, mode);
 176   PUT_MODE (all->udiv, mode);
 177   PUT_MODE (all->sdiv_32, mode);
 178   PUT_MODE (all->smod_32, mode);
 179   PUT_MODE (all->wide_trunc, mode);
 180   PUT_MODE (all->shift, mode);
 181   PUT_MODE (all->shift_mult, mode);
 182   PUT_MODE (all->shift_add, mode);
 183   PUT_MODE (all->shift_sub0, mode);
 184   PUT_MODE (all->shift_sub1, mode);
 185   PUT_MODE (all->zext, mode);
 186   PUT_MODE (all->trunc, mode);
 187
 188   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 189   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 190   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 191   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 192   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 193
 194   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 195                                      <= 2 * add_cost (speed, mode)));
 196   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 197                                      <= 4 * add_cost (speed, mode)));
 198
 199   set_shift_cost (speed, mode, 0, 0);
 200   {
 201     int cost = add_cost (speed, mode);
 202     set_shiftadd_cost (speed, mode, 0, cost);
 203     set_shiftsub0_cost (speed, mode, 0, cost);
 204     set_shiftsub1_cost (speed, mode, 0, cost);
 205   }
 206
 207   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 208   for (m = 1; m < n; m++)
 209     {
 210       XEXP (all->shift, 1) = all->cint[m];
 211       XEXP (all->shift_mult, 1) = all->pow2[m];
 212
 213       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 214       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 215                                                        speed));
 216       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 217                                                         speed));
 218       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 219                                                         speed));
 220     }
 221
 222   scalar_int_mode int_mode_to;
 223   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 224     {
 225       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 226            mode_from = (machine_mode)(mode_from + 1))
 227         init_expmed_one_conv (all, int_mode_to,
 228                               as_a <scalar_int_mode> (mode_from), speed);
 229
 230       scalar_int_mode wider_mode;
 231       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 232           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 233         {
 234           PUT_MODE (all->reg, mode);
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1)
 239             = gen_int_shift_amount (wider_mode, mode_bitsize);
 240
 241           set_mul_widen_cost (speed, wider_mode,
 242                               set_src_cost (all->wide_mult, wider_mode, speed));
 243           set_mul_highpart_cost (speed, int_mode_to,
 244                                  set_src_cost (all->wide_trunc,
 245                                                int_mode_to, speed));
 246         }
 247     }
 248 }
 249
 250 void
 251 init_expmed (void)
 252 {
 253   struct init_expmed_rtl all;
 254   machine_mode mode = QImode;
 255   int m, speed;
 256
 257   memset (&all, 0, sizeof all);
 258   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 259     {
 260       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 261       all.cint[m] = GEN_INT (m);
 262     }
 263
 264   /* Avoid using hard regs in ways which may be unsupported.  */
 265   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 266   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 267   all.neg = gen_rtx_NEG (mode, all.reg);
 268   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 269   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 270   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 271   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 272   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 273   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 274   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 275   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 276   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 277   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 278   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 279   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 280   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 282   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312
 313   ggc_free (all.trunc);
 314   ggc_free (all.shift_sub1);
 315   ggc_free (all.shift_sub0);
 316   ggc_free (all.shift_add);
 317   ggc_free (all.shift_mult);
 318   ggc_free (all.shift);
 319   ggc_free (all.wide_trunc);
 320   ggc_free (all.wide_lshr);
 321   ggc_free (all.wide_mult);
 322   ggc_free (all.zext);
 323   ggc_free (all.smod_32);
 324   ggc_free (all.sdiv_32);
 325   ggc_free (all.udiv);
 326   ggc_free (all.sdiv);
 327   ggc_free (all.mult);
 328   ggc_free (all.neg);
 329   ggc_free (all.plus);
 330   ggc_free (all.reg);
 331 }
 332
 333 /* Return an rtx representing minus the value of X.
 334    MODE is the intended mode of the result,
 335    useful if X is a CONST_INT.  */
 336
 337 rtx
 338 negate_rtx (machine_mode mode, rtx x)
 339 {
 340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 341
 342   if (result == 0)
 343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 344
 345   return result;
 346 }
 347
 348 /* Whether reverse storage order is supported on the target.  */
 349 static int reverse_storage_order_supported = -1;
 350
 351 /* Check whether reverse storage order is supported on the target.  */
 352
 353 static void
 354 check_reverse_storage_order_support (void)
 355 {
 356   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 357     {
 358       reverse_storage_order_supported = 0;
 359       sorry ("reverse scalar storage order");
 360     }
 361   else
 362     reverse_storage_order_supported = 1;
 363 }
 364
 365 /* Whether reverse FP storage order is supported on the target.  */
 366 static int reverse_float_storage_order_supported = -1;
 367
 368 /* Check whether reverse FP storage order is supported on the target.  */
 369
 370 static void
 371 check_reverse_float_storage_order_support (void)
 372 {
 373   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 374     {
 375       reverse_float_storage_order_supported = 0;
 376       sorry ("reverse floating-point scalar storage order");
 377     }
 378   else
 379     reverse_float_storage_order_supported = 1;
 380 }
 381
 382 /* Return an rtx representing value of X with reverse storage order.
 383    MODE is the intended mode of the result,
 384    useful if X is a CONST_INT.  */
 385
 386 rtx
 387 flip_storage_order (machine_mode mode, rtx x)
 388 {
 389   scalar_int_mode int_mode;
 390   rtx result;
 391
 392   if (mode == QImode)
 393     return x;
 394
 395   if (COMPLEX_MODE_P (mode))
 396     {
 397       rtx real = read_complex_part (x, false);
 398       rtx imag = read_complex_part (x, true);
 399
 400       real = flip_storage_order (GET_MODE_INNER (mode), real);
 401       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 402
 403       return gen_rtx_CONCAT (mode, real, imag);
 404     }
 405
 406   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 407     check_reverse_storage_order_support ();
 408
 409   if (!is_a <scalar_int_mode> (mode, &int_mode))
 410     {
 411       if (FLOAT_MODE_P (mode)
 412           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 413         check_reverse_float_storage_order_support ();
 414
 415       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 416           || !targetm.scalar_mode_supported_p (int_mode))
 417         {
 418           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 419           return x;
 420         }
 421       x = gen_lowpart (int_mode, x);
 422     }
 423
 424   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 425   if (result == 0)
 426     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 427
 428   if (int_mode != mode)
 429     result = gen_lowpart (mode, result);
 430
 431   return result;
 432 }
 433
 434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 435    first unit of mode MODE that contains a bitfield of size BITSIZE at
 436    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 437    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 438    of the field within the new memory.  */
 439
 440 static rtx
 441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 442                       unsigned HOST_WIDE_INT bitsize,
 443                       unsigned HOST_WIDE_INT bitnum,
 444                       unsigned HOST_WIDE_INT *new_bitnum)
 445 {
 446   scalar_int_mode imode;
 447   if (mode.exists (&imode))
 448     {
 449       unsigned int unit = GET_MODE_BITSIZE (imode);
 450       *new_bitnum = bitnum % unit;
 451       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 452       return adjust_bitfield_address (mem, imode, offset);
 453     }
 454   else
 455     {
 456       *new_bitnum = bitnum % BITS_PER_UNIT;
 457       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 458       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 459                             / BITS_PER_UNIT);
 460       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 461     }
 462 }
 463
 464 /* The caller wants to perform insertion or extraction PATTERN on a
 465    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 466    BITREGION_START and BITREGION_END are as for store_bit_field
 467    and FIELDMODE is the natural mode of the field.
 468
 469    Search for a mode that is compatible with the memory access
 470    restrictions and (where applicable) with a register insertion or
 471    extraction.  Return the new memory on success, storing the adjusted
 472    bit position in *NEW_BITNUM.  Return null otherwise.  */
 473
 474 static rtx
 475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 476                               rtx op0, HOST_WIDE_INT bitsize,
 477                               HOST_WIDE_INT bitnum,
 478                               poly_uint64 bitregion_start,
 479                               poly_uint64 bitregion_end,
 480                               machine_mode fieldmode,
 481                               unsigned HOST_WIDE_INT *new_bitnum)
 482 {
 483   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 484                                 bitregion_end, MEM_ALIGN (op0),
 485                                 MEM_VOLATILE_P (op0));
 486   scalar_int_mode best_mode;
 487   if (iter.next_mode (&best_mode))
 488     {
 489       /* We can use a memory in BEST_MODE.  See whether this is true for
 490          any wider modes.  All other things being equal, we prefer to
 491          use the widest mode possible because it tends to expose more
 492          CSE opportunities.  */
 493       if (!iter.prefer_smaller_modes ())
 494         {
 495           /* Limit the search to the mode required by the corresponding
 496              register insertion or extraction instruction, if any.  */
 497           scalar_int_mode limit_mode = word_mode;
 498           extraction_insn insn;
 499           if (get_best_reg_extraction_insn (&insn, pattern,
 500                                             GET_MODE_BITSIZE (best_mode),
 501                                             fieldmode))
 502             limit_mode = insn.field_mode;
 503
 504           scalar_int_mode wider_mode;
 505           while (iter.next_mode (&wider_mode)
 506                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 507             best_mode = wider_mode;
 508         }
 509       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 510                                    new_bitnum);
 511     }
 512   return NULL_RTX;
 513 }
 514
 515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 516    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 517    offset is then BITNUM / BITS_PER_UNIT.  */
 518
 519 static bool
 520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 521                      machine_mode struct_mode)
 522 {
 523   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 524   if (BYTES_BIG_ENDIAN)
 525     return (multiple_p (bitnum, BITS_PER_UNIT)
 526             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 527                 || multiple_p (bitnum + bitsize,
 528                                regsize * BITS_PER_UNIT)));
 529   else
 530     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 531 }
 532
 533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 534    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 535    Return false if the access would touch memory outside the range
 536    BITREGION_START to BITREGION_END for conformance to the C++ memory
 537    model.  */
 538
 539 static bool
 540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 541                             unsigned HOST_WIDE_INT bitnum,
 542                             scalar_int_mode fieldmode,
 543                             poly_uint64 bitregion_start,
 544                             poly_uint64 bitregion_end)
 545 {
 546   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 547
 548   /* -fstrict-volatile-bitfields must be enabled and we must have a
 549      volatile MEM.  */
 550   if (!MEM_P (op0)
 551       || !MEM_VOLATILE_P (op0)
 552       || flag_strict_volatile_bitfields <= 0)
 553     return false;
 554
 555   /* The bit size must not be larger than the field mode, and
 556      the field mode must not be larger than a word.  */
 557   if (bitsize > modesize || modesize > BITS_PER_WORD)
 558     return false;
 559
 560   /* Check for cases of unaligned fields that must be split.  */
 561   if (bitnum % modesize + bitsize > modesize)
 562     return false;
 563
 564   /* The memory must be sufficiently aligned for a MODESIZE access.
 565      This condition guarantees, that the memory access will not
 566      touch anything after the end of the structure.  */
 567   if (MEM_ALIGN (op0) < modesize)
 568     return false;
 569
 570   /* Check for cases where the C++ memory model applies.  */
 571   if (maybe_ne (bitregion_end, 0U)
 572       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 573           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 574                        bitregion_end)))
 575     return false;
 576
 577   return true;
 578 }
 579
 580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 581    bit number BITNUM can be treated as a simple value of mode MODE.
 582    Store the byte offset in *BYTENUM if so.  */
 583
 584 static bool
 585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 586                        machine_mode mode, poly_uint64 *bytenum)
 587 {
 588   return (MEM_P (op0)
 589           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 590           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 591           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 592               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 593                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 594 }
 595 \f
 596 /* Try to use instruction INSV to store VALUE into a field of OP0.
 597    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 598    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 599    are as for store_bit_field.  */
 600
 601 static bool
 602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 603                             opt_scalar_int_mode op0_mode,
 604                             unsigned HOST_WIDE_INT bitsize,
 605                             unsigned HOST_WIDE_INT bitnum,
 606                             rtx value, scalar_int_mode value_mode)
 607 {
 608   class expand_operand ops[4];
 609   rtx value1;
 610   rtx xop0 = op0;
 611   rtx_insn *last = get_last_insn ();
 612   bool copy_back = false;
 613
 614   scalar_int_mode op_mode = insv->field_mode;
 615   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 616   if (bitsize == 0 || bitsize > unit)
 617     return false;
 618
 619   if (MEM_P (xop0))
 620     /* Get a reference to the first byte of the field.  */
 621     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 622                                  &bitnum);
 623   else
 624     {
 625       /* Convert from counting within OP0 to counting in OP_MODE.  */
 626       if (BYTES_BIG_ENDIAN)
 627         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 628
 629       /* If xop0 is a register, we need it in OP_MODE
 630          to make it acceptable to the format of insv.  */
 631       if (GET_CODE (xop0) == SUBREG)
 632         {
 633           /* If such a SUBREG can't be created, give up.  */
 634           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 635                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 636             return false;
 637           /* We can't just change the mode, because this might clobber op0,
 638              and we will need the original value of op0 if insv fails.  */
 639           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 640                                  SUBREG_BYTE (xop0));
 641         }
 642       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 643         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 644     }
 645
 646   /* If the destination is a paradoxical subreg such that we need a
 647      truncate to the inner mode, perform the insertion on a temporary and
 648      truncate the result to the original destination.  Note that we can't
 649      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 650      X) 0)) is (reg:N X).  */
 651   if (GET_CODE (xop0) == SUBREG
 652       && REG_P (SUBREG_REG (xop0))
 653       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 654                                          op_mode))
 655     {
 656       rtx tem = gen_reg_rtx (op_mode);
 657       emit_move_insn (tem, xop0);
 658       xop0 = tem;
 659       copy_back = true;
 660     }
 661
 662   /* There are similar overflow check at the start of store_bit_field_1,
 663      but that only check the situation where the field lies completely
 664      outside the register, while there do have situation where the field
 665      lies partialy in the register, we need to adjust bitsize for this
 666      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 667      will broken on those arch support bit insert instruction, like arm, aarch64
 668      etc.  */
 669   if (bitsize + bitnum > unit && bitnum < unit)
 670     {
 671       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 672                "destination object, data truncated into %wu-bit",
 673                bitsize, unit - bitnum);
 674       bitsize = unit - bitnum;
 675     }
 676
 677   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 678      "backwards" from the size of the unit we are inserting into.
 679      Otherwise, we count bits from the most significant on a
 680      BYTES/BITS_BIG_ENDIAN machine.  */
 681
 682   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 683     bitnum = unit - bitsize - bitnum;
 684
 685   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 686   value1 = value;
 687   if (value_mode != op_mode)
 688     {
 689       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 690         {
 691           rtx tmp;
 692           /* Optimization: Don't bother really extending VALUE
 693              if it has all the bits we will actually use.  However,
 694              if we must narrow it, be sure we do it correctly.  */
 695
 696           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 697             {
 698               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 699               if (! tmp)
 700                 tmp = simplify_gen_subreg (op_mode,
 701                                            force_reg (value_mode, value1),
 702                                            value_mode, 0);
 703             }
 704           else
 705             {
 706               tmp = gen_lowpart_if_possible (op_mode, value1);
 707               if (! tmp)
 708                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 709             }
 710           value1 = tmp;
 711         }
 712       else if (CONST_INT_P (value))
 713         value1 = gen_int_mode (INTVAL (value), op_mode);
 714       else
 715         /* Parse phase is supposed to make VALUE's data type
 716            match that of the component reference, which is a type
 717            at least as wide as the field; so VALUE should have
 718            a mode that corresponds to that type.  */
 719         gcc_assert (CONSTANT_P (value));
 720     }
 721
 722   create_fixed_operand (&ops[0], xop0);
 723   create_integer_operand (&ops[1], bitsize);
 724   create_integer_operand (&ops[2], bitnum);
 725   create_input_operand (&ops[3], value1, op_mode);
 726   if (maybe_expand_insn (insv->icode, 4, ops))
 727     {
 728       if (copy_back)
 729         convert_move (op0, xop0, true);
 730       return true;
 731     }
 732   delete_insns_since (last);
 733   return false;
 734 }
 735
 736 /* A subroutine of store_bit_field, with the same arguments.  Return true
 737    if the operation could be implemented.
 738
 739    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 740    no other way of implementing the operation.  If FALLBACK_P is false,
 741    return false instead.  */
 742
 743 static bool
 744 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 745                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 746                    machine_mode fieldmode,
 747                    rtx value, bool reverse, bool fallback_p)
 748 {
 749   rtx op0 = str_rtx;
 750
 751   while (GET_CODE (op0) == SUBREG)
 752     {
 753       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 754       op0 = SUBREG_REG (op0);
 755     }
 756
 757   /* No action is needed if the target is a register and if the field
 758      lies completely outside that register.  This can occur if the source
 759      code contains an out-of-bounds access to a small array.  */
 760   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 761     return true;
 762
 763   /* Use vec_set patterns for inserting parts of vectors whenever
 764      available.  */
 765   machine_mode outermode = GET_MODE (op0);
 766   scalar_mode innermode = GET_MODE_INNER (outermode);
 767   poly_uint64 pos;
 768   if (VECTOR_MODE_P (outermode)
 769       && !MEM_P (op0)
 770       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 771       && fieldmode == innermode
 772       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 773       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 774     {
 775       class expand_operand ops[3];
 776       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 777
 778       create_fixed_operand (&ops[0], op0);
 779       create_input_operand (&ops[1], value, innermode);
 780       create_integer_operand (&ops[2], pos);
 781       if (maybe_expand_insn (icode, 3, ops))
 782         return true;
 783     }
 784
 785   /* If the target is a register, overwriting the entire object, or storing
 786      a full-word or multi-word field can be done with just a SUBREG.  */
 787   if (!MEM_P (op0)
 788       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794       HOST_WIDE_INT regnum;
 795       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 796       if (known_eq (bitnum, 0U)
 797           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 798         {
 799           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 800           if (sub)
 801             {
 802               if (reverse)
 803                 sub = flip_storage_order (GET_MODE (op0), sub);
 804               emit_move_insn (op0, sub);
 805               return true;
 806             }
 807         }
 808       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 809                && multiple_p (bitsize, regsize * BITS_PER_UNIT))
 810         {
 811           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 812                                      regnum * regsize);
 813           if (sub)
 814             {
 815               if (reverse)
 816                 value = flip_storage_order (fieldmode, value);
 817               emit_move_insn (sub, value);
 818               return true;
 819             }
 820         }
 821     }
 822
 823   /* If the target is memory, storing any naturally aligned field can be
 824      done with a simple store.  For targets that support fast unaligned
 825      memory, any naturally sized, unit aligned field can be done directly.  */
 826   poly_uint64 bytenum;
 827   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 828     {
 829       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 830       if (reverse)
 831         value = flip_storage_order (fieldmode, value);
 832       emit_move_insn (op0, value);
 833       return true;
 834     }
 835
 836   /* It's possible we'll need to handle other cases here for
 837      polynomial bitnum and bitsize.  */
 838
 839   /* From here on we need to be looking at a fixed-size insertion.  */
 840   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 841   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 842
 843   /* Make sure we are playing with integral modes.  Pun with subregs
 844      if we aren't.  This must come after the entire register case above,
 845      since that case is valid for any mode.  The following cases are only
 846      valid for integral modes.  */
 847   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 848   scalar_int_mode imode;
 849   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 850     {
 851       if (MEM_P (op0))
 852         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 853                                             0, MEM_SIZE (op0));
 854       else if (!op0_mode.exists ())
 855         {
 856           if (ibitnum == 0
 857               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 858               && MEM_P (value)
 859               && !reverse)
 860             {
 861               value = adjust_address (value, GET_MODE (op0), 0);
 862               emit_move_insn (op0, value);
 863               return true;
 864             }
 865           if (!fallback_p)
 866             return false;
 867           rtx temp = assign_stack_temp (GET_MODE (op0),
 868                                         GET_MODE_SIZE (GET_MODE (op0)));
 869           emit_move_insn (temp, op0);
 870           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 871                              reverse, fallback_p);
 872           emit_move_insn (op0, temp);
 873           return true;
 874         }
 875       else
 876         op0 = gen_lowpart (op0_mode.require (), op0);
 877     }
 878
 879   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 880                                    bitregion_start, bitregion_end,
 881                                    fieldmode, value, reverse, fallback_p);
 882 }
 883
 884 /* Subroutine of store_bit_field_1, with the same arguments, except
 885    that BITSIZE and BITNUM are constant.  Handle cases specific to
 886    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 887    otherwise OP0 is a BLKmode MEM.  */
 888
 889 static bool
 890 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 891                           unsigned HOST_WIDE_INT bitsize,
 892                           unsigned HOST_WIDE_INT bitnum,
 893                           poly_uint64 bitregion_start,
 894                           poly_uint64 bitregion_end,
 895                           machine_mode fieldmode,
 896                           rtx value, bool reverse, bool fallback_p)
 897 {
 898   /* Storing an lsb-aligned field in a register
 899      can be done with a movstrict instruction.  */
 900
 901   if (!MEM_P (op0)
 902       && !reverse
 903       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 904       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 905       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 906     {
 907       class expand_operand ops[2];
 908       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 909       rtx arg0 = op0;
 910       unsigned HOST_WIDE_INT subreg_off;
 911
 912       if (GET_CODE (arg0) == SUBREG)
 913         {
 914           /* Else we've got some float mode source being extracted into
 915              a different float mode destination -- this combination of
 916              subregs results in Severe Tire Damage.  */
 917           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 918                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 919                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 920           arg0 = SUBREG_REG (arg0);
 921         }
 922
 923       subreg_off = bitnum / BITS_PER_UNIT;
 924       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 925         {
 926           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 927
 928           create_fixed_operand (&ops[0], arg0);
 929           /* Shrink the source operand to FIELDMODE.  */
 930           create_convert_operand_to (&ops[1], value, fieldmode, false);
 931           if (maybe_expand_insn (icode, 2, ops))
 932             return true;
 933         }
 934     }
 935
 936   /* Handle fields bigger than a word.  */
 937
 938   if (bitsize > BITS_PER_WORD)
 939     {
 940       /* Here we transfer the words of the field
 941          in the order least significant first.
 942          This is because the most significant word is the one which may
 943          be less than full.
 944          However, only do that if the value is not BLKmode.  */
 945
 946       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 947       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 948       rtx_insn *last;
 949
 950       /* This is the mode we must force value to, so that there will be enough
 951          subwords to extract.  Note that fieldmode will often (always?) be
 952          VOIDmode, because that is what store_field uses to indicate that this
 953          is a bit field, but passing VOIDmode to operand_subword_force
 954          is not allowed.
 955
 956          The mode must be fixed-size, since insertions into variable-sized
 957          objects are meant to be handled before calling this function.  */
 958       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 959       if (value_mode == VOIDmode)
 960         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 961
 962       last = get_last_insn ();
 963       for (int i = 0; i < nwords; i++)
 964         {
 965           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 966              except maybe for the last iteration.  */
 967           const unsigned HOST_WIDE_INT new_bitsize
 968             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 969           /* Bit offset from the starting bit number in the target.  */
 970           const unsigned int bit_offset
 971             = backwards ^ reverse
 972               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 973               : i * BITS_PER_WORD;
 974           /* Starting word number in the value.  */
 975           const unsigned int wordnum
 976             = backwards
 977               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 978               : i;
 979           /* The chunk of the value in word_mode.  We use bit-field extraction
 980               in BLKmode to handle unaligned memory references and to shift the
 981               last chunk right on big-endian machines if need be.  */
 982           rtx value_word
 983             = fieldmode == BLKmode
 984               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 985                                    1, NULL_RTX, word_mode, word_mode, false,
 986                                    NULL)
 987               : operand_subword_force (value, wordnum, value_mode);
 988
 989           if (!store_bit_field_1 (op0, new_bitsize,
 990                                   bitnum + bit_offset,
 991                                   bitregion_start, bitregion_end,
 992                                   word_mode,
 993                                   value_word, reverse, fallback_p))
 994             {
 995               delete_insns_since (last);
 996               return false;
 997             }
 998         }
 999       return true;
1000     }
1001
1002   /* If VALUE has a floating-point or complex mode, access it as an
1003      integer of the corresponding size.  This can occur on a machine
1004      with 64 bit registers that uses SFmode for float.  It can also
1005      occur for unaligned float or complex fields.  */
1006   rtx orig_value = value;
1007   scalar_int_mode value_mode;
1008   if (GET_MODE (value) == VOIDmode)
1009     /* By this point we've dealt with values that are bigger than a word,
1010        so word_mode is a conservatively correct choice.  */
1011     value_mode = word_mode;
1012   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1013     {
1014       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1015       value = gen_reg_rtx (value_mode);
1016       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1017     }
1018
1019   /* If OP0 is a multi-word register, narrow it to the affected word.
1020      If the region spans two words, defer to store_split_bit_field.
1021      Don't do this if op0 is a single hard register wider than word
1022      such as a float or vector register.  */
1023   if (!MEM_P (op0)
1024       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1025       && (!REG_P (op0)
1026           || !HARD_REGISTER_P (op0)
1027           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1028     {
1029       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1030         {
1031           if (!fallback_p)
1032             return false;
1033
1034           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1035                                  bitregion_start, bitregion_end,
1036                                  value, value_mode, reverse);
1037           return true;
1038         }
1039       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1040                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1041       gcc_assert (op0);
1042       op0_mode = word_mode;
1043       bitnum %= BITS_PER_WORD;
1044     }
1045
1046   /* From here on we can assume that the field to be stored in fits
1047      within a word.  If the destination is a register, it too fits
1048      in a word.  */
1049
1050   extraction_insn insv;
1051   if (!MEM_P (op0)
1052       && !reverse
1053       && get_best_reg_extraction_insn (&insv, EP_insv,
1054                                        GET_MODE_BITSIZE (op0_mode.require ()),
1055                                        fieldmode)
1056       && store_bit_field_using_insv (&insv, op0, op0_mode,
1057                                      bitsize, bitnum, value, value_mode))
1058     return true;
1059
1060   /* If OP0 is a memory, try copying it to a register and seeing if a
1061      cheap register alternative is available.  */
1062   if (MEM_P (op0) && !reverse)
1063     {
1064       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1065                                         fieldmode)
1066           && store_bit_field_using_insv (&insv, op0, op0_mode,
1067                                          bitsize, bitnum, value, value_mode))
1068         return true;
1069
1070       rtx_insn *last = get_last_insn ();
1071
1072       /* Try loading part of OP0 into a register, inserting the bitfield
1073          into that, and then copying the result back to OP0.  */
1074       unsigned HOST_WIDE_INT bitpos;
1075       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1076                                                bitregion_start, bitregion_end,
1077                                                fieldmode, &bitpos);
1078       if (xop0)
1079         {
1080           rtx tempreg = copy_to_reg (xop0);
1081           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1082                                  bitregion_start, bitregion_end,
1083                                  fieldmode, orig_value, reverse, false))
1084             {
1085               emit_move_insn (xop0, tempreg);
1086               return true;
1087             }
1088           delete_insns_since (last);
1089         }
1090     }
1091
1092   if (!fallback_p)
1093     return false;
1094
1095   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1096                          bitregion_end, value, value_mode, reverse);
1097   return true;
1098 }
1099
1100 /* Generate code to store value from rtx VALUE
1101    into a bit-field within structure STR_RTX
1102    containing BITSIZE bits starting at bit BITNUM.
1103
1104    BITREGION_START is bitpos of the first bitfield in this region.
1105    BITREGION_END is the bitpos of the ending bitfield in this region.
1106    These two fields are 0, if the C++ memory model does not apply,
1107    or we are not interested in keeping track of bitfield regions.
1108
1109    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1110
1111    If REVERSE is true, the store is to be done in reverse order.  */
1112
1113 void
1114 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1115                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1116                  machine_mode fieldmode,
1117                  rtx value, bool reverse)
1118 {
1119   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1120   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1121   scalar_int_mode int_mode;
1122   if (bitsize.is_constant (&ibitsize)
1123       && bitnum.is_constant (&ibitnum)
1124       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1125       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1126                                      bitregion_start, bitregion_end))
1127     {
1128       /* Storing of a full word can be done with a simple store.
1129          We know here that the field can be accessed with one single
1130          instruction.  For targets that support unaligned memory,
1131          an unaligned access may be necessary.  */
1132       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1133         {
1134           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1135                                              ibitnum / BITS_PER_UNIT);
1136           if (reverse)
1137             value = flip_storage_order (int_mode, value);
1138           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1139           emit_move_insn (str_rtx, value);
1140         }
1141       else
1142         {
1143           rtx temp;
1144
1145           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1146                                           ibitnum, &ibitnum);
1147           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1148           temp = copy_to_reg (str_rtx);
1149           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1150                                   int_mode, value, reverse, true))
1151             gcc_unreachable ();
1152
1153           emit_move_insn (str_rtx, temp);
1154         }
1155
1156       return;
1157     }
1158
1159   /* Under the C++0x memory model, we must not touch bits outside the
1160      bit region.  Adjust the address to start at the beginning of the
1161      bit region.  */
1162   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1163     {
1164       scalar_int_mode best_mode;
1165       machine_mode addr_mode = VOIDmode;
1166
1167       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1168       bitnum -= bitregion_start;
1169       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1170       bitregion_end -= bitregion_start;
1171       bitregion_start = 0;
1172       if (bitsize.is_constant (&ibitsize)
1173           && bitnum.is_constant (&ibitnum)
1174           && get_best_mode (ibitsize, ibitnum,
1175                             bitregion_start, bitregion_end,
1176                             MEM_ALIGN (str_rtx), INT_MAX,
1177                             MEM_VOLATILE_P (str_rtx), &best_mode))
1178         addr_mode = best_mode;
1179       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1180                                               offset, size);
1181     }
1182
1183   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1184                           bitregion_start, bitregion_end,
1185                           fieldmode, value, reverse, true))
1186     gcc_unreachable ();
1187 }
1188 \f
1189 /* Use shifts and boolean operations to store VALUE into a bit field of
1190    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1191    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1192    the mode of VALUE.
1193
1194    If REVERSE is true, the store is to be done in reverse order.  */
1195
1196 static void
1197 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1198                        unsigned HOST_WIDE_INT bitsize,
1199                        unsigned HOST_WIDE_INT bitnum,
1200                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1201                        rtx value, scalar_int_mode value_mode, bool reverse)
1202 {
1203   /* There is a case not handled here:
1204      a structure with a known alignment of just a halfword
1205      and a field split across two aligned halfwords within the structure.
1206      Or likewise a structure with a known alignment of just a byte
1207      and a field split across two bytes.
1208      Such cases are not supposed to be able to occur.  */
1209
1210   scalar_int_mode best_mode;
1211   if (MEM_P (op0))
1212     {
1213       unsigned int max_bitsize = BITS_PER_WORD;
1214       scalar_int_mode imode;
1215       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1216         max_bitsize = GET_MODE_BITSIZE (imode);
1217
1218       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1219                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1220                           &best_mode))
1221         {
1222           /* The only way this should occur is if the field spans word
1223              boundaries.  */
1224           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1225                                  bitregion_start, bitregion_end,
1226                                  value, value_mode, reverse);
1227           return;
1228         }
1229
1230       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1231     }
1232   else
1233     best_mode = op0_mode.require ();
1234
1235   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1236                            value, value_mode, reverse);
1237 }
1238
1239 /* Helper function for store_fixed_bit_field, stores
1240    the bit field always using MODE, which is the mode of OP0.  The other
1241    arguments are as for store_fixed_bit_field.  */
1242
1243 static void
1244 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1245                          unsigned HOST_WIDE_INT bitsize,
1246                          unsigned HOST_WIDE_INT bitnum,
1247                          rtx value, scalar_int_mode value_mode, bool reverse)
1248 {
1249   rtx temp;
1250   int all_zero = 0;
1251   int all_one = 0;
1252
1253   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1254      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1255
1256   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1257     /* BITNUM is the distance between our msb
1258        and that of the containing datum.
1259        Convert it to the distance from the lsb.  */
1260     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1261
1262   /* Now BITNUM is always the distance between our lsb
1263      and that of OP0.  */
1264
1265   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1266      we must first convert its mode to MODE.  */
1267
1268   if (CONST_INT_P (value))
1269     {
1270       unsigned HOST_WIDE_INT v = UINTVAL (value);
1271
1272       if (bitsize < HOST_BITS_PER_WIDE_INT)
1273         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1274
1275       if (v == 0)
1276         all_zero = 1;
1277       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1278                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1279                || (bitsize == HOST_BITS_PER_WIDE_INT
1280                    && v == HOST_WIDE_INT_M1U))
1281         all_one = 1;
1282
1283       value = lshift_value (mode, v, bitnum);
1284     }
1285   else
1286     {
1287       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1288                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1289
1290       if (value_mode != mode)
1291         value = convert_to_mode (mode, value, 1);
1292
1293       if (must_and)
1294         value = expand_binop (mode, and_optab, value,
1295                               mask_rtx (mode, 0, bitsize, 0),
1296                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1297       if (bitnum > 0)
1298         value = expand_shift (LSHIFT_EXPR, mode, value,
1299                               bitnum, NULL_RTX, 1);
1300     }
1301
1302   if (reverse)
1303     value = flip_storage_order (mode, value);
1304
1305   /* Now clear the chosen bits in OP0,
1306      except that if VALUE is -1 we need not bother.  */
1307   /* We keep the intermediates in registers to allow CSE to combine
1308      consecutive bitfield assignments.  */
1309
1310   temp = force_reg (mode, op0);
1311
1312   if (! all_one)
1313     {
1314       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1315       if (reverse)
1316         mask = flip_storage_order (mode, mask);
1317       temp = expand_binop (mode, and_optab, temp, mask,
1318                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1319       temp = force_reg (mode, temp);
1320     }
1321
1322   /* Now logical-or VALUE into OP0, unless it is zero.  */
1323
1324   if (! all_zero)
1325     {
1326       temp = expand_binop (mode, ior_optab, temp, value,
1327                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1328       temp = force_reg (mode, temp);
1329     }
1330
1331   if (op0 != temp)
1332     {
1333       op0 = copy_rtx (op0);
1334       emit_move_insn (op0, temp);
1335     }
1336 }
1337 \f
1338 /* Store a bit field that is split across multiple accessible memory objects.
1339
1340    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1341    BITSIZE is the field width; BITPOS the position of its first bit
1342    (within the word).
1343    VALUE is the value to store, which has mode VALUE_MODE.
1344    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1345    a BLKmode MEM.
1346
1347    If REVERSE is true, the store is to be done in reverse order.
1348
1349    This does not yet handle fields wider than BITS_PER_WORD.  */
1350
1351 static void
1352 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1353                        unsigned HOST_WIDE_INT bitsize,
1354                        unsigned HOST_WIDE_INT bitpos,
1355                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1356                        rtx value, scalar_int_mode value_mode, bool reverse)
1357 {
1358   unsigned int unit, total_bits, bitsdone = 0;
1359
1360   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1361      much at a time.  */
1362   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1363     unit = BITS_PER_WORD;
1364   else
1365     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1366
1367   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1368      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1369      again, and we will mutually recurse forever.  */
1370   if (MEM_P (op0) && op0_mode.exists ())
1371     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1372
1373   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1374      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1375      that VALUE might be a floating-point constant.  */
1376   if (CONSTANT_P (value) && !CONST_INT_P (value))
1377     {
1378       rtx word = gen_lowpart_common (word_mode, value);
1379
1380       if (word && (value != word))
1381         value = word;
1382       else
1383         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1384       value_mode = word_mode;
1385     }
1386
1387   total_bits = GET_MODE_BITSIZE (value_mode);
1388
1389   while (bitsdone < bitsize)
1390     {
1391       unsigned HOST_WIDE_INT thissize;
1392       unsigned HOST_WIDE_INT thispos;
1393       unsigned HOST_WIDE_INT offset;
1394       rtx part;
1395
1396       offset = (bitpos + bitsdone) / unit;
1397       thispos = (bitpos + bitsdone) % unit;
1398
1399       /* When region of bytes we can touch is restricted, decrease
1400          UNIT close to the end of the region as needed.  If op0 is a REG
1401          or SUBREG of REG, don't do this, as there can't be data races
1402          on a register and we can expand shorter code in some cases.  */
1403       if (maybe_ne (bitregion_end, 0U)
1404           && unit > BITS_PER_UNIT
1405           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1406           && !REG_P (op0)
1407           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1408         {
1409           unit = unit / 2;
1410           continue;
1411         }
1412
1413       /* THISSIZE must not overrun a word boundary.  Otherwise,
1414          store_fixed_bit_field will call us again, and we will mutually
1415          recurse forever.  */
1416       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1417       thissize = MIN (thissize, unit - thispos);
1418
1419       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1420         {
1421           /* Fetch successively less significant portions.  */
1422           if (CONST_INT_P (value))
1423             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1424                              >> (bitsize - bitsdone - thissize))
1425                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1426           /* Likewise, but the source is little-endian.  */
1427           else if (reverse)
1428             part = extract_fixed_bit_field (word_mode, value, value_mode,
1429                                             thissize,
1430                                             bitsize - bitsdone - thissize,
1431                                             NULL_RTX, 1, false);
1432           else
1433             /* The args are chosen so that the last part includes the
1434                lsb.  Give extract_bit_field the value it needs (with
1435                endianness compensation) to fetch the piece we want.  */
1436             part = extract_fixed_bit_field (word_mode, value, value_mode,
1437                                             thissize,
1438                                             total_bits - bitsize + bitsdone,
1439                                             NULL_RTX, 1, false);
1440         }
1441       else
1442         {
1443           /* Fetch successively more significant portions.  */
1444           if (CONST_INT_P (value))
1445             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1446                              >> bitsdone)
1447                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1448           /* Likewise, but the source is big-endian.  */
1449           else if (reverse)
1450             part = extract_fixed_bit_field (word_mode, value, value_mode,
1451                                             thissize,
1452                                             total_bits - bitsdone - thissize,
1453                                             NULL_RTX, 1, false);
1454           else
1455             part = extract_fixed_bit_field (word_mode, value, value_mode,
1456                                             thissize, bitsdone, NULL_RTX,
1457                                             1, false);
1458         }
1459
1460       /* If OP0 is a register, then handle OFFSET here.  */
1461       rtx op0_piece = op0;
1462       opt_scalar_int_mode op0_piece_mode = op0_mode;
1463       if (SUBREG_P (op0) || REG_P (op0))
1464         {
1465           scalar_int_mode imode;
1466           if (op0_mode.exists (&imode)
1467               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1468             {
1469               if (offset)
1470                 op0_piece = const0_rtx;
1471             }
1472           else
1473             {
1474               op0_piece = operand_subword_force (op0,
1475                                                  offset * unit / BITS_PER_WORD,
1476                                                  GET_MODE (op0));
1477               op0_piece_mode = word_mode;
1478             }
1479           offset &= BITS_PER_WORD / unit - 1;
1480         }
1481
1482       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1483          it is just an out-of-bounds access.  Ignore it.  */
1484       if (op0_piece != const0_rtx)
1485         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1486                                offset * unit + thispos, bitregion_start,
1487                                bitregion_end, part, word_mode, reverse);
1488       bitsdone += thissize;
1489     }
1490 }
1491 \f
1492 /* A subroutine of extract_bit_field_1 that converts return value X
1493    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1494    to extract_bit_field.  */
1495
1496 static rtx
1497 convert_extracted_bit_field (rtx x, machine_mode mode,
1498                              machine_mode tmode, bool unsignedp)
1499 {
1500   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1501     return x;
1502
1503   /* If the x mode is not a scalar integral, first convert to the
1504      integer mode of that size and then access it as a floating-point
1505      value via a SUBREG.  */
1506   if (!SCALAR_INT_MODE_P (tmode))
1507     {
1508       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1509       x = convert_to_mode (int_mode, x, unsignedp);
1510       x = force_reg (int_mode, x);
1511       return gen_lowpart (tmode, x);
1512     }
1513
1514   return convert_to_mode (tmode, x, unsignedp);
1515 }
1516
1517 /* Try to use an ext(z)v pattern to extract a field from OP0.
1518    Return the extracted value on success, otherwise return null.
1519    EXTV describes the extraction instruction to use.  If OP0_MODE
1520    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1521    The other arguments are as for extract_bit_field.  */
1522
1523 static rtx
1524 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1525                               opt_scalar_int_mode op0_mode,
1526                               unsigned HOST_WIDE_INT bitsize,
1527                               unsigned HOST_WIDE_INT bitnum,
1528                               int unsignedp, rtx target,
1529                               machine_mode mode, machine_mode tmode)
1530 {
1531   class expand_operand ops[4];
1532   rtx spec_target = target;
1533   rtx spec_target_subreg = 0;
1534   scalar_int_mode ext_mode = extv->field_mode;
1535   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1536
1537   if (bitsize == 0 || unit < bitsize)
1538     return NULL_RTX;
1539
1540   if (MEM_P (op0))
1541     /* Get a reference to the first byte of the field.  */
1542     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1543                                 &bitnum);
1544   else
1545     {
1546       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1547       if (BYTES_BIG_ENDIAN)
1548         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1549
1550       /* If op0 is a register, we need it in EXT_MODE to make it
1551          acceptable to the format of ext(z)v.  */
1552       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1553         return NULL_RTX;
1554       if (REG_P (op0) && op0_mode.require () != ext_mode)
1555         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1556     }
1557
1558   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1559      "backwards" from the size of the unit we are extracting from.
1560      Otherwise, we count bits from the most significant on a
1561      BYTES/BITS_BIG_ENDIAN machine.  */
1562
1563   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1564     bitnum = unit - bitsize - bitnum;
1565
1566   if (target == 0)
1567     target = spec_target = gen_reg_rtx (tmode);
1568
1569   if (GET_MODE (target) != ext_mode)
1570     {
1571       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1572          between the mode of the extraction (word_mode) and the target
1573          mode.  Instead, create a temporary and use convert_move to set
1574          the target.  */
1575       if (REG_P (target)
1576           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1577         {
1578           target = gen_lowpart (ext_mode, target);
1579           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1580             spec_target_subreg = target;
1581         }
1582       else
1583         target = gen_reg_rtx (ext_mode);
1584     }
1585
1586   create_output_operand (&ops[0], target, ext_mode);
1587   create_fixed_operand (&ops[1], op0);
1588   create_integer_operand (&ops[2], bitsize);
1589   create_integer_operand (&ops[3], bitnum);
1590   if (maybe_expand_insn (extv->icode, 4, ops))
1591     {
1592       target = ops[0].value;
1593       if (target == spec_target)
1594         return target;
1595       if (target == spec_target_subreg)
1596         return spec_target;
1597       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1598     }
1599   return NULL_RTX;
1600 }
1601
1602 /* See whether it would be valid to extract the part of OP0 described
1603    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1604    operation.  Return the subreg if so, otherwise return null.  */
1605
1606 static rtx
1607 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1608                              poly_uint64 bitsize, poly_uint64 bitnum)
1609 {
1610   poly_uint64 bytenum;
1611   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1612       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1613       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1614       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1615     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1616   return NULL_RTX;
1617 }
1618
1619 /* A subroutine of extract_bit_field, with the same arguments.
1620    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1621    if we can find no other means of implementing the operation.
1622    if FALLBACK_P is false, return NULL instead.  */
1623
1624 static rtx
1625 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1626                      int unsignedp, rtx target, machine_mode mode,
1627                      machine_mode tmode, bool reverse, bool fallback_p,
1628                      rtx *alt_rtl)
1629 {
1630   rtx op0 = str_rtx;
1631   machine_mode mode1;
1632
1633   if (tmode == VOIDmode)
1634     tmode = mode;
1635
1636   while (GET_CODE (op0) == SUBREG)
1637     {
1638       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1639       op0 = SUBREG_REG (op0);
1640     }
1641
1642   /* If we have an out-of-bounds access to a register, just return an
1643      uninitialized register of the required mode.  This can occur if the
1644      source code contains an out-of-bounds access to a small array.  */
1645   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1646     return gen_reg_rtx (tmode);
1647
1648   if (REG_P (op0)
1649       && mode == GET_MODE (op0)
1650       && known_eq (bitnum, 0U)
1651       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1652     {
1653       if (reverse)
1654         op0 = flip_storage_order (mode, op0);
1655       /* We're trying to extract a full register from itself.  */
1656       return op0;
1657     }
1658
1659   /* First try to check for vector from vector extractions.  */
1660   if (VECTOR_MODE_P (GET_MODE (op0))
1661       && !MEM_P (op0)
1662       && VECTOR_MODE_P (tmode)
1663       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1664       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1665     {
1666       machine_mode new_mode = GET_MODE (op0);
1667       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1668         {
1669           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1670           poly_uint64 nunits;
1671           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1672                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1673               || !related_vector_mode (tmode, inner_mode,
1674                                        nunits).exists (&new_mode)
1675               || maybe_ne (GET_MODE_SIZE (new_mode),
1676                            GET_MODE_SIZE (GET_MODE (op0))))
1677             new_mode = VOIDmode;
1678         }
1679       poly_uint64 pos;
1680       if (new_mode != VOIDmode
1681           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1682               != CODE_FOR_nothing)
1683           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1684         {
1685           class expand_operand ops[3];
1686           machine_mode outermode = new_mode;
1687           machine_mode innermode = tmode;
1688           enum insn_code icode
1689             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1690
1691           if (new_mode != GET_MODE (op0))
1692             op0 = gen_lowpart (new_mode, op0);
1693           create_output_operand (&ops[0], target, innermode);
1694           ops[0].target = 1;
1695           create_input_operand (&ops[1], op0, outermode);
1696           create_integer_operand (&ops[2], pos);
1697           if (maybe_expand_insn (icode, 3, ops))
1698             {
1699               if (alt_rtl && ops[0].target)
1700                 *alt_rtl = target;
1701               target = ops[0].value;
1702               if (GET_MODE (target) != mode)
1703                 return gen_lowpart (tmode, target);
1704               return target;
1705             }
1706         }
1707     }
1708
1709   /* See if we can get a better vector mode before extracting.  */
1710   if (VECTOR_MODE_P (GET_MODE (op0))
1711       && !MEM_P (op0)
1712       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1713     {
1714       machine_mode new_mode;
1715
1716       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1717         new_mode = MIN_MODE_VECTOR_FLOAT;
1718       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1719         new_mode = MIN_MODE_VECTOR_FRACT;
1720       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1721         new_mode = MIN_MODE_VECTOR_UFRACT;
1722       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1723         new_mode = MIN_MODE_VECTOR_ACCUM;
1724       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1725         new_mode = MIN_MODE_VECTOR_UACCUM;
1726       else
1727         new_mode = MIN_MODE_VECTOR_INT;
1728
1729       FOR_EACH_MODE_FROM (new_mode, new_mode)
1730         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1731             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1732             && targetm.vector_mode_supported_p (new_mode))
1733           break;
1734       if (new_mode != VOIDmode)
1735         op0 = gen_lowpart (new_mode, op0);
1736     }
1737
1738   /* Use vec_extract patterns for extracting parts of vectors whenever
1739      available.  If that fails, see whether the current modes and bitregion
1740      give a natural subreg.  */
1741   machine_mode outermode = GET_MODE (op0);
1742   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1743     {
1744       scalar_mode innermode = GET_MODE_INNER (outermode);
1745       enum insn_code icode
1746         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1747       poly_uint64 pos;
1748       if (icode != CODE_FOR_nothing
1749           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1750           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1751         {
1752           class expand_operand ops[3];
1753
1754           create_output_operand (&ops[0], target, innermode);
1755           ops[0].target = 1;
1756           create_input_operand (&ops[1], op0, outermode);
1757           create_integer_operand (&ops[2], pos);
1758           if (maybe_expand_insn (icode, 3, ops))
1759             {
1760               if (alt_rtl && ops[0].target)
1761                 *alt_rtl = target;
1762               target = ops[0].value;
1763               if (GET_MODE (target) != mode)
1764                 return gen_lowpart (tmode, target);
1765               return target;
1766             }
1767         }
1768       /* Using subregs is useful if we're extracting one register vector
1769          from a multi-register vector.  extract_bit_field_as_subreg checks
1770          for valid bitsize and bitnum, so we don't need to do that here.  */
1771       if (VECTOR_MODE_P (mode))
1772         {
1773           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1774           if (sub)
1775             return sub;
1776         }
1777     }
1778
1779   /* Make sure we are playing with integral modes.  Pun with subregs
1780      if we aren't.  */
1781   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1782   scalar_int_mode imode;
1783   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1784     {
1785       if (MEM_P (op0))
1786         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1787                                             0, MEM_SIZE (op0));
1788       else if (op0_mode.exists (&imode))
1789         {
1790           op0 = gen_lowpart (imode, op0);
1791
1792           /* If we got a SUBREG, force it into a register since we
1793              aren't going to be able to do another SUBREG on it.  */
1794           if (GET_CODE (op0) == SUBREG)
1795             op0 = force_reg (imode, op0);
1796         }
1797       else
1798         {
1799           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1800           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1801           emit_move_insn (mem, op0);
1802           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1803         }
1804     }
1805
1806   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1807      If that's wrong, the solution is to test for it and set TARGET to 0
1808      if needed.  */
1809
1810   /* Get the mode of the field to use for atomic access or subreg
1811      conversion.  */
1812   if (!SCALAR_INT_MODE_P (tmode)
1813       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1814     mode1 = mode;
1815   gcc_assert (mode1 != BLKmode);
1816
1817   /* Extraction of a full MODE1 value can be done with a subreg as long
1818      as the least significant bit of the value is the least significant
1819      bit of either OP0 or a word of OP0.  */
1820   if (!MEM_P (op0) && !reverse)
1821     {
1822       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1823       if (sub)
1824         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1825     }
1826
1827   /* Extraction of a full MODE1 value can be done with a load as long as
1828      the field is on a byte boundary and is sufficiently aligned.  */
1829   poly_uint64 bytenum;
1830   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1831     {
1832       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1833       if (reverse)
1834         op0 = flip_storage_order (mode1, op0);
1835       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1836     }
1837
1838   /* If we have a memory source and a non-constant bit offset, restrict
1839      the memory to the referenced bytes.  This is a worst-case fallback
1840      but is useful for things like vector booleans.  */
1841   if (MEM_P (op0) && !bitnum.is_constant ())
1842     {
1843       bytenum = bits_to_bytes_round_down (bitnum);
1844       bitnum = num_trailing_bits (bitnum);
1845       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1846       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1847       op0_mode = opt_scalar_int_mode ();
1848     }
1849
1850   /* It's possible we'll need to handle other cases here for
1851      polynomial bitnum and bitsize.  */
1852
1853   /* From here on we need to be looking at a fixed-size insertion.  */
1854   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1855                                      bitnum.to_constant (), unsignedp,
1856                                      target, mode, tmode, reverse, fallback_p);
1857 }
1858
1859 /* Subroutine of extract_bit_field_1, with the same arguments, except
1860    that BITSIZE and BITNUM are constant.  Handle cases specific to
1861    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1862    otherwise OP0 is a BLKmode MEM.  */
1863
1864 static rtx
1865 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1866                             unsigned HOST_WIDE_INT bitsize,
1867                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1868                             rtx target, machine_mode mode, machine_mode tmode,
1869                             bool reverse, bool fallback_p)
1870 {
1871   /* Handle fields bigger than a word.  */
1872
1873   if (bitsize > BITS_PER_WORD)
1874     {
1875       /* Here we transfer the words of the field
1876          in the order least significant first.
1877          This is because the most significant word is the one which may
1878          be less than full.  */
1879
1880       const bool backwards = WORDS_BIG_ENDIAN;
1881       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1882       unsigned int i;
1883       rtx_insn *last;
1884
1885       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1886         target = gen_reg_rtx (mode);
1887
1888       /* In case we're about to clobber a base register or something
1889          (see gcc.c-torture/execute/20040625-1.c).   */
1890       if (reg_mentioned_p (target, op0))
1891         target = gen_reg_rtx (mode);
1892
1893       /* Indicate for flow that the entire target reg is being set.  */
1894       emit_clobber (target);
1895
1896       /* The mode must be fixed-size, since extract_bit_field_1 handles
1897          extractions from variable-sized objects before calling this
1898          function.  */
1899       unsigned int target_size
1900         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1901       last = get_last_insn ();
1902       for (i = 0; i < nwords; i++)
1903         {
1904           /* If I is 0, use the low-order word in both field and target;
1905              if I is 1, use the next to lowest word; and so on.  */
1906           /* Word number in TARGET to use.  */
1907           unsigned int wordnum
1908             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1909           /* Offset from start of field in OP0.  */
1910           unsigned int bit_offset = (backwards ^ reverse
1911                                      ? MAX ((int) bitsize - ((int) i + 1)
1912                                             * BITS_PER_WORD,
1913                                             0)
1914                                      : (int) i * BITS_PER_WORD);
1915           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1916           rtx result_part
1917             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1918                                              bitsize - i * BITS_PER_WORD),
1919                                    bitnum + bit_offset, 1, target_part,
1920                                    mode, word_mode, reverse, fallback_p, NULL);
1921
1922           gcc_assert (target_part);
1923           if (!result_part)
1924             {
1925               delete_insns_since (last);
1926               return NULL;
1927             }
1928
1929           if (result_part != target_part)
1930             emit_move_insn (target_part, result_part);
1931         }
1932
1933       if (unsignedp)
1934         {
1935           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1936              need to be zero'd out.  */
1937           if (target_size > nwords * UNITS_PER_WORD)
1938             {
1939               unsigned int i, total_words;
1940
1941               total_words = target_size / UNITS_PER_WORD;
1942               for (i = nwords; i < total_words; i++)
1943                 emit_move_insn
1944                   (operand_subword (target,
1945                                     backwards ? total_words - i - 1 : i,
1946                                     1, VOIDmode),
1947                    const0_rtx);
1948             }
1949           return target;
1950         }
1951
1952       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1953       target = expand_shift (LSHIFT_EXPR, mode, target,
1954                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1955       return expand_shift (RSHIFT_EXPR, mode, target,
1956                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1957     }
1958
1959   /* If OP0 is a multi-word register, narrow it to the affected word.
1960      If the region spans two words, defer to extract_split_bit_field.  */
1961   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1962     {
1963       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1964         {
1965           if (!fallback_p)
1966             return NULL_RTX;
1967           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1968                                             unsignedp, reverse);
1969           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1970         }
1971       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1972                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1973       op0_mode = word_mode;
1974       bitnum %= BITS_PER_WORD;
1975     }
1976
1977   /* From here on we know the desired field is smaller than a word.
1978      If OP0 is a register, it too fits within a word.  */
1979   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1980   extraction_insn extv;
1981   if (!MEM_P (op0)
1982       && !reverse
1983       /* ??? We could limit the structure size to the part of OP0 that
1984          contains the field, with appropriate checks for endianness
1985          and TARGET_TRULY_NOOP_TRUNCATION.  */
1986       && get_best_reg_extraction_insn (&extv, pattern,
1987                                        GET_MODE_BITSIZE (op0_mode.require ()),
1988                                        tmode))
1989     {
1990       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1991                                                  bitsize, bitnum,
1992                                                  unsignedp, target, mode,
1993                                                  tmode);
1994       if (result)
1995         return result;
1996     }
1997
1998   /* If OP0 is a memory, try copying it to a register and seeing if a
1999      cheap register alternative is available.  */
2000   if (MEM_P (op0) & !reverse)
2001     {
2002       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2003                                         tmode))
2004         {
2005           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2006                                                      bitsize, bitnum,
2007                                                      unsignedp, target, mode,
2008                                                      tmode);
2009           if (result)
2010             return result;
2011         }
2012
2013       rtx_insn *last = get_last_insn ();
2014
2015       /* Try loading part of OP0 into a register and extracting the
2016          bitfield from that.  */
2017       unsigned HOST_WIDE_INT bitpos;
2018       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2019                                                0, 0, tmode, &bitpos);
2020       if (xop0)
2021         {
2022           xop0 = copy_to_reg (xop0);
2023           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2024                                             unsignedp, target,
2025                                             mode, tmode, reverse, false, NULL);
2026           if (result)
2027             return result;
2028           delete_insns_since (last);
2029         }
2030     }
2031
2032   if (!fallback_p)
2033     return NULL;
2034
2035   /* Find a correspondingly-sized integer field, so we can apply
2036      shifts and masks to it.  */
2037   scalar_int_mode int_mode;
2038   if (!int_mode_for_mode (tmode).exists (&int_mode))
2039     /* If this fails, we should probably push op0 out to memory and then
2040        do a load.  */
2041     int_mode = int_mode_for_mode (mode).require ();
2042
2043   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2044                                     bitnum, target, unsignedp, reverse);
2045
2046   /* Complex values must be reversed piecewise, so we need to undo the global
2047      reversal, convert to the complex mode and reverse again.  */
2048   if (reverse && COMPLEX_MODE_P (tmode))
2049     {
2050       target = flip_storage_order (int_mode, target);
2051       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2052       target = flip_storage_order (tmode, target);
2053     }
2054   else
2055     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2056
2057   return target;
2058 }
2059
2060 /* Generate code to extract a byte-field from STR_RTX
2061    containing BITSIZE bits, starting at BITNUM,
2062    and put it in TARGET if possible (if TARGET is nonzero).
2063    Regardless of TARGET, we return the rtx for where the value is placed.
2064
2065    STR_RTX is the structure containing the byte (a REG or MEM).
2066    UNSIGNEDP is nonzero if this is an unsigned bit field.
2067    MODE is the natural mode of the field value once extracted.
2068    TMODE is the mode the caller would like the value to have;
2069    but the value may be returned with type MODE instead.
2070
2071    If REVERSE is true, the extraction is to be done in reverse order.
2072
2073    If a TARGET is specified and we can store in it at no extra cost,
2074    we do so, and return TARGET.
2075    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2076    if they are equally easy.
2077
2078    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2079    then *ALT_RTL is set to TARGET (before legitimziation).  */
2080
2081 rtx
2082 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2083                    int unsignedp, rtx target, machine_mode mode,
2084                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2085 {
2086   machine_mode mode1;
2087
2088   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2089   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2090     mode1 = GET_MODE (str_rtx);
2091   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2092     mode1 = GET_MODE (target);
2093   else
2094     mode1 = tmode;
2095
2096   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2097   scalar_int_mode int_mode;
2098   if (bitsize.is_constant (&ibitsize)
2099       && bitnum.is_constant (&ibitnum)
2100       && is_a <scalar_int_mode> (mode1, &int_mode)
2101       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2102                                      int_mode, 0, 0))
2103     {
2104       /* Extraction of a full INT_MODE value can be done with a simple load.
2105          We know here that the field can be accessed with one single
2106          instruction.  For targets that support unaligned memory,
2107          an unaligned access may be necessary.  */
2108       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2109         {
2110           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2111                                                 ibitnum / BITS_PER_UNIT);
2112           if (reverse)
2113             result = flip_storage_order (int_mode, result);
2114           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2115           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2116         }
2117
2118       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2119                                       &ibitnum);
2120       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2121       str_rtx = copy_to_reg (str_rtx);
2122       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2123                                   target, mode, tmode, reverse, true, alt_rtl);
2124     }
2125
2126   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2127                               target, mode, tmode, reverse, true, alt_rtl);
2128 }
2129 \f
2130 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2131    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2132    otherwise OP0 is a BLKmode MEM.
2133
2134    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2135    If REVERSE is true, the extraction is to be done in reverse order.
2136
2137    If TARGET is nonzero, attempts to store the value there
2138    and return TARGET, but this is not guaranteed.
2139    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2140
2141 static rtx
2142 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2143                          opt_scalar_int_mode op0_mode,
2144                          unsigned HOST_WIDE_INT bitsize,
2145                          unsigned HOST_WIDE_INT bitnum, rtx target,
2146                          int unsignedp, bool reverse)
2147 {
2148   scalar_int_mode mode;
2149   if (MEM_P (op0))
2150     {
2151       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2152                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2153         /* The only way this should occur is if the field spans word
2154            boundaries.  */
2155         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2156                                         unsignedp, reverse);
2157
2158       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2159     }
2160   else
2161     mode = op0_mode.require ();
2162
2163   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2164                                     target, unsignedp, reverse);
2165 }
2166
2167 /* Helper function for extract_fixed_bit_field, extracts
2168    the bit field always using MODE, which is the mode of OP0.
2169    The other arguments are as for extract_fixed_bit_field.  */
2170
2171 static rtx
2172 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2173                            unsigned HOST_WIDE_INT bitsize,
2174                            unsigned HOST_WIDE_INT bitnum, rtx target,
2175                            int unsignedp, bool reverse)
2176 {
2177   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2178      for invalid input, such as extract equivalent of f5 from
2179      gcc.dg/pr48335-2.c.  */
2180
2181   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2182     /* BITNUM is the distance between our msb and that of OP0.
2183        Convert it to the distance from the lsb.  */
2184     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2185
2186   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2187      We have reduced the big-endian case to the little-endian case.  */
2188   if (reverse)
2189     op0 = flip_storage_order (mode, op0);
2190
2191   if (unsignedp)
2192     {
2193       if (bitnum)
2194         {
2195           /* If the field does not already start at the lsb,
2196              shift it so it does.  */
2197           /* Maybe propagate the target for the shift.  */
2198           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2199           if (tmode != mode)
2200             subtarget = 0;
2201           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2202         }
2203       /* Convert the value to the desired mode.  TMODE must also be a
2204          scalar integer for this conversion to make sense, since we
2205          shouldn't reinterpret the bits.  */
2206       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2207       if (mode != new_mode)
2208         op0 = convert_to_mode (new_mode, op0, 1);
2209
2210       /* Unless the msb of the field used to be the msb when we shifted,
2211          mask out the upper bits.  */
2212
2213       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2214         return expand_binop (new_mode, and_optab, op0,
2215                              mask_rtx (new_mode, 0, bitsize, 0),
2216                              target, 1, OPTAB_LIB_WIDEN);
2217       return op0;
2218     }
2219
2220   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2221      then arithmetic-shift its lsb to the lsb of the word.  */
2222   op0 = force_reg (mode, op0);
2223
2224   /* Find the narrowest integer mode that contains the field.  */
2225
2226   opt_scalar_int_mode mode_iter;
2227   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2228     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2229       break;
2230
2231   mode = mode_iter.require ();
2232   op0 = convert_to_mode (mode, op0, 0);
2233
2234   if (mode != tmode)
2235     target = 0;
2236
2237   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2238     {
2239       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2240       /* Maybe propagate the target for the shift.  */
2241       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2242       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2243     }
2244
2245   return expand_shift (RSHIFT_EXPR, mode, op0,
2246                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2247 }
2248
2249 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2250    VALUE << BITPOS.  */
2251
2252 static rtx
2253 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2254               int bitpos)
2255 {
2256   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2257 }
2258 \f
2259 /* Extract a bit field that is split across two words
2260    and return an RTX for the result.
2261
2262    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2263    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2264    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2265    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2266    a BLKmode MEM.
2267
2268    If REVERSE is true, the extraction is to be done in reverse order.  */
2269
2270 static rtx
2271 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2272                          unsigned HOST_WIDE_INT bitsize,
2273                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2274                          bool reverse)
2275 {
2276   unsigned int unit;
2277   unsigned int bitsdone = 0;
2278   rtx result = NULL_RTX;
2279   int first = 1;
2280
2281   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2282      much at a time.  */
2283   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2284     unit = BITS_PER_WORD;
2285   else
2286     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2287
2288   while (bitsdone < bitsize)
2289     {
2290       unsigned HOST_WIDE_INT thissize;
2291       rtx part;
2292       unsigned HOST_WIDE_INT thispos;
2293       unsigned HOST_WIDE_INT offset;
2294
2295       offset = (bitpos + bitsdone) / unit;
2296       thispos = (bitpos + bitsdone) % unit;
2297
2298       /* THISSIZE must not overrun a word boundary.  Otherwise,
2299          extract_fixed_bit_field will call us again, and we will mutually
2300          recurse forever.  */
2301       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2302       thissize = MIN (thissize, unit - thispos);
2303
2304       /* If OP0 is a register, then handle OFFSET here.  */
2305       rtx op0_piece = op0;
2306       opt_scalar_int_mode op0_piece_mode = op0_mode;
2307       if (SUBREG_P (op0) || REG_P (op0))
2308         {
2309           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2310           op0_piece_mode = word_mode;
2311           offset = 0;
2312         }
2313
2314       /* Extract the parts in bit-counting order,
2315          whose meaning is determined by BYTES_PER_UNIT.
2316          OFFSET is in UNITs, and UNIT is in bits.  */
2317       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2318                                       thissize, offset * unit + thispos,
2319                                       0, 1, reverse);
2320       bitsdone += thissize;
2321
2322       /* Shift this part into place for the result.  */
2323       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2324         {
2325           if (bitsize != bitsdone)
2326             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2327                                  bitsize - bitsdone, 0, 1);
2328         }
2329       else
2330         {
2331           if (bitsdone != thissize)
2332             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2333                                  bitsdone - thissize, 0, 1);
2334         }
2335
2336       if (first)
2337         result = part;
2338       else
2339         /* Combine the parts with bitwise or.  This works
2340            because we extracted each part as an unsigned bit field.  */
2341         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2342                                OPTAB_LIB_WIDEN);
2343
2344       first = 0;
2345     }
2346
2347   /* Unsigned bit field: we are done.  */
2348   if (unsignedp)
2349     return result;
2350   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2351   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2352                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2353   return expand_shift (RSHIFT_EXPR, word_mode, result,
2354                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2355 }
2356 \f
2357 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2358    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2359    MODE, fill the upper bits with zeros.  Fail if the layout of either
2360    mode is unknown (as for CC modes) or if the extraction would involve
2361    unprofitable mode punning.  Return the value on success, otherwise
2362    return null.
2363
2364    This is different from gen_lowpart* in these respects:
2365
2366      - the returned value must always be considered an rvalue
2367
2368      - when MODE is wider than SRC_MODE, the extraction involves
2369        a zero extension
2370
2371      - when MODE is smaller than SRC_MODE, the extraction involves
2372        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2373
2374    In other words, this routine performs a computation, whereas the
2375    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2376    operations.  */
2377
2378 rtx
2379 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2380 {
2381   scalar_int_mode int_mode, src_int_mode;
2382
2383   if (mode == src_mode)
2384     return src;
2385
2386   if (CONSTANT_P (src))
2387     {
2388       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2389          fails, it will happily create (subreg (symbol_ref)) or similar
2390          invalid SUBREGs.  */
2391       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2392       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2393       if (ret)
2394         return ret;
2395
2396       if (GET_MODE (src) == VOIDmode
2397           || !validate_subreg (mode, src_mode, src, byte))
2398         return NULL_RTX;
2399
2400       src = force_reg (GET_MODE (src), src);
2401       return gen_rtx_SUBREG (mode, src, byte);
2402     }
2403
2404   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2405     return NULL_RTX;
2406
2407   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2408       && targetm.modes_tieable_p (mode, src_mode))
2409     {
2410       rtx x = gen_lowpart_common (mode, src);
2411       if (x)
2412         return x;
2413     }
2414
2415   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2416       || !int_mode_for_mode (mode).exists (&int_mode))
2417     return NULL_RTX;
2418
2419   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2420     return NULL_RTX;
2421   if (!targetm.modes_tieable_p (int_mode, mode))
2422     return NULL_RTX;
2423
2424   src = gen_lowpart (src_int_mode, src);
2425   if (!validate_subreg (int_mode, src_int_mode, src,
2426                         subreg_lowpart_offset (int_mode, src_int_mode)))
2427     return NULL_RTX;
2428
2429   src = convert_modes (int_mode, src_int_mode, src, true);
2430   src = gen_lowpart (mode, src);
2431   return src;
2432 }
2433 \f
2434 /* Add INC into TARGET.  */
2435
2436 void
2437 expand_inc (rtx target, rtx inc)
2438 {
2439   rtx value = expand_binop (GET_MODE (target), add_optab,
2440                             target, inc,
2441                             target, 0, OPTAB_LIB_WIDEN);
2442   if (value != target)
2443     emit_move_insn (target, value);
2444 }
2445
2446 /* Subtract DEC from TARGET.  */
2447
2448 void
2449 expand_dec (rtx target, rtx dec)
2450 {
2451   rtx value = expand_binop (GET_MODE (target), sub_optab,
2452                             target, dec,
2453                             target, 0, OPTAB_LIB_WIDEN);
2454   if (value != target)
2455     emit_move_insn (target, value);
2456 }
2457 \f
2458 /* Output a shift instruction for expression code CODE,
2459    with SHIFTED being the rtx for the value to shift,
2460    and AMOUNT the rtx for the amount to shift by.
2461    Store the result in the rtx TARGET, if that is convenient.
2462    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2463    Return the rtx for where the value is.
2464    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2465    in which case 0 is returned.  */
2466
2467 static rtx
2468 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2469                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2470 {
2471   rtx op1, temp = 0;
2472   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2473   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2474   optab lshift_optab = ashl_optab;
2475   optab rshift_arith_optab = ashr_optab;
2476   optab rshift_uns_optab = lshr_optab;
2477   optab lrotate_optab = rotl_optab;
2478   optab rrotate_optab = rotr_optab;
2479   machine_mode op1_mode;
2480   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2481   int attempt;
2482   bool speed = optimize_insn_for_speed_p ();
2483
2484   op1 = amount;
2485   op1_mode = GET_MODE (op1);
2486
2487   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2488      shift amount is a vector, use the vector/vector shift patterns.  */
2489   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2490     {
2491       lshift_optab = vashl_optab;
2492       rshift_arith_optab = vashr_optab;
2493       rshift_uns_optab = vlshr_optab;
2494       lrotate_optab = vrotl_optab;
2495       rrotate_optab = vrotr_optab;
2496     }
2497
2498   /* Previously detected shift-counts computed by NEGATE_EXPR
2499      and shifted in the other direction; but that does not work
2500      on all machines.  */
2501
2502   if (SHIFT_COUNT_TRUNCATED)
2503     {
2504       if (CONST_INT_P (op1)
2505           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2506               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2507         op1 = gen_int_shift_amount (mode,
2508                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2509                                     % GET_MODE_BITSIZE (scalar_mode));
2510       else if (GET_CODE (op1) == SUBREG
2511                && subreg_lowpart_p (op1)
2512                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2513                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2514         op1 = SUBREG_REG (op1);
2515     }
2516
2517   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2518      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2519      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2520      amount instead.  */
2521   if (rotate
2522       && CONST_INT_P (op1)
2523       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2524                    GET_MODE_BITSIZE (scalar_mode) - 1))
2525     {
2526       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2527                                          - INTVAL (op1)));
2528       left = !left;
2529       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2530     }
2531
2532   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2533      Note that this is not the case for bigger values.  For instance a rotation
2534      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2535      0x04030201 (bswapsi).  */
2536   if (rotate
2537       && CONST_INT_P (op1)
2538       && INTVAL (op1) == BITS_PER_UNIT
2539       && GET_MODE_SIZE (scalar_mode) == 2
2540       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2541     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2542
2543   if (op1 == const0_rtx)
2544     return shifted;
2545
2546   /* Check whether its cheaper to implement a left shift by a constant
2547      bit count by a sequence of additions.  */
2548   if (code == LSHIFT_EXPR
2549       && CONST_INT_P (op1)
2550       && INTVAL (op1) > 0
2551       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2552       && INTVAL (op1) < MAX_BITS_PER_WORD
2553       && (shift_cost (speed, mode, INTVAL (op1))
2554           > INTVAL (op1) * add_cost (speed, mode))
2555       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2556     {
2557       int i;
2558       for (i = 0; i < INTVAL (op1); i++)
2559         {
2560           temp = force_reg (mode, shifted);
2561           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2562                                   unsignedp, OPTAB_LIB_WIDEN);
2563         }
2564       return shifted;
2565     }
2566
2567   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2568     {
2569       enum optab_methods methods;
2570
2571       if (attempt == 0)
2572         methods = OPTAB_DIRECT;
2573       else if (attempt == 1)
2574         methods = OPTAB_WIDEN;
2575       else
2576         methods = OPTAB_LIB_WIDEN;
2577
2578       if (rotate)
2579         {
2580           /* Widening does not work for rotation.  */
2581           if (methods == OPTAB_WIDEN)
2582             continue;
2583           else if (methods == OPTAB_LIB_WIDEN)
2584             {
2585               /* If we have been unable to open-code this by a rotation,
2586                  do it as the IOR of two shifts.  I.e., to rotate A
2587                  by N bits, compute
2588                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2589                  where C is the bitsize of A.
2590
2591                  It is theoretically possible that the target machine might
2592                  not be able to perform either shift and hence we would
2593                  be making two libcalls rather than just the one for the
2594                  shift (similarly if IOR could not be done).  We will allow
2595                  this extremely unlikely lossage to avoid complicating the
2596                  code below.  */
2597
2598               rtx subtarget = target == shifted ? 0 : target;
2599               rtx new_amount, other_amount;
2600               rtx temp1;
2601
2602               new_amount = op1;
2603               if (op1 == const0_rtx)
2604                 return shifted;
2605               else if (CONST_INT_P (op1))
2606                 other_amount = gen_int_shift_amount
2607                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2608               else
2609                 {
2610                   other_amount
2611                     = simplify_gen_unary (NEG, GET_MODE (op1),
2612                                           op1, GET_MODE (op1));
2613                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2614                   other_amount
2615                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2616                                            gen_int_mode (mask, GET_MODE (op1)));
2617                 }
2618
2619               shifted = force_reg (mode, shifted);
2620
2621               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2622                                      mode, shifted, new_amount, 0, 1);
2623               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2624                                       mode, shifted, other_amount,
2625                                       subtarget, 1);
2626               return expand_binop (mode, ior_optab, temp, temp1, target,
2627                                    unsignedp, methods);
2628             }
2629
2630           temp = expand_binop (mode,
2631                                left ? lrotate_optab : rrotate_optab,
2632                                shifted, op1, target, unsignedp, methods);
2633         }
2634       else if (unsignedp)
2635         temp = expand_binop (mode,
2636                              left ? lshift_optab : rshift_uns_optab,
2637                              shifted, op1, target, unsignedp, methods);
2638
2639       /* Do arithmetic shifts.
2640          Also, if we are going to widen the operand, we can just as well
2641          use an arithmetic right-shift instead of a logical one.  */
2642       if (temp == 0 && ! rotate
2643           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2644         {
2645           enum optab_methods methods1 = methods;
2646
2647           /* If trying to widen a log shift to an arithmetic shift,
2648              don't accept an arithmetic shift of the same size.  */
2649           if (unsignedp)
2650             methods1 = OPTAB_MUST_WIDEN;
2651
2652           /* Arithmetic shift */
2653
2654           temp = expand_binop (mode,
2655                                left ? lshift_optab : rshift_arith_optab,
2656                                shifted, op1, target, unsignedp, methods1);
2657         }
2658
2659       /* We used to try extzv here for logical right shifts, but that was
2660          only useful for one machine, the VAX, and caused poor code
2661          generation there for lshrdi3, so the code was deleted and a
2662          define_expand for lshrsi3 was added to vax.md.  */
2663     }
2664
2665   gcc_assert (temp != NULL_RTX || may_fail);
2666   return temp;
2667 }
2668
2669 /* Output a shift instruction for expression code CODE,
2670    with SHIFTED being the rtx for the value to shift,
2671    and AMOUNT the amount to shift by.
2672    Store the result in the rtx TARGET, if that is convenient.
2673    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2674    Return the rtx for where the value is.  */
2675
2676 rtx
2677 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2678               poly_int64 amount, rtx target, int unsignedp)
2679 {
2680   return expand_shift_1 (code, mode, shifted,
2681                          gen_int_shift_amount (mode, amount),
2682                          target, unsignedp);
2683 }
2684
2685 /* Likewise, but return 0 if that cannot be done.  */
2686
2687 static rtx
2688 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2689                     int amount, rtx target, int unsignedp)
2690 {
2691   return expand_shift_1 (code, mode,
2692                          shifted, GEN_INT (amount), target, unsignedp, true);
2693 }
2694
2695 /* Output a shift instruction for expression code CODE,
2696    with SHIFTED being the rtx for the value to shift,
2697    and AMOUNT the tree for the amount to shift by.
2698    Store the result in the rtx TARGET, if that is convenient.
2699    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2700    Return the rtx for where the value is.  */
2701
2702 rtx
2703 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2704                        tree amount, rtx target, int unsignedp)
2705 {
2706   return expand_shift_1 (code, mode,
2707                          shifted, expand_normal (amount), target, unsignedp);
2708 }
2709
2710 \f
2711 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2712                         const struct mult_cost *, machine_mode mode);
2713 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2714                               const struct algorithm *, enum mult_variant);
2715 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2716 static rtx extract_high_half (scalar_int_mode, rtx);
2717 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2718 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2719                                        int, int);
2720 /* Compute and return the best algorithm for multiplying by T.
2721    The algorithm must cost less than cost_limit
2722    If retval.cost >= COST_LIMIT, no algorithm was found and all
2723    other field of the returned struct are undefined.
2724    MODE is the machine mode of the multiplication.  */
2725
2726 static void
2727 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2728             const struct mult_cost *cost_limit, machine_mode mode)
2729 {
2730   int m;
2731   struct algorithm *alg_in, *best_alg;
2732   struct mult_cost best_cost;
2733   struct mult_cost new_limit;
2734   int op_cost, op_latency;
2735   unsigned HOST_WIDE_INT orig_t = t;
2736   unsigned HOST_WIDE_INT q;
2737   int maxm, hash_index;
2738   bool cache_hit = false;
2739   enum alg_code cache_alg = alg_zero;
2740   bool speed = optimize_insn_for_speed_p ();
2741   scalar_int_mode imode;
2742   struct alg_hash_entry *entry_ptr;
2743
2744   /* Indicate that no algorithm is yet found.  If no algorithm
2745      is found, this value will be returned and indicate failure.  */
2746   alg_out->cost.cost = cost_limit->cost + 1;
2747   alg_out->cost.latency = cost_limit->latency + 1;
2748
2749   if (cost_limit->cost < 0
2750       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2751     return;
2752
2753   /* Be prepared for vector modes.  */
2754   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2755
2756   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2757
2758   /* Restrict the bits of "t" to the multiplication's mode.  */
2759   t &= GET_MODE_MASK (imode);
2760
2761   /* t == 1 can be done in zero cost.  */
2762   if (t == 1)
2763     {
2764       alg_out->ops = 1;
2765       alg_out->cost.cost = 0;
2766       alg_out->cost.latency = 0;
2767       alg_out->op[0] = alg_m;
2768       return;
2769     }
2770
2771   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2772      fail now.  */
2773   if (t == 0)
2774     {
2775       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2776         return;
2777       else
2778         {
2779           alg_out->ops = 1;
2780           alg_out->cost.cost = zero_cost (speed);
2781           alg_out->cost.latency = zero_cost (speed);
2782           alg_out->op[0] = alg_zero;
2783           return;
2784         }
2785     }
2786
2787   /* We'll be needing a couple extra algorithm structures now.  */
2788
2789   alg_in = XALLOCA (struct algorithm);
2790   best_alg = XALLOCA (struct algorithm);
2791   best_cost = *cost_limit;
2792
2793   /* Compute the hash index.  */
2794   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2795
2796   /* See if we already know what to do for T.  */
2797   entry_ptr = alg_hash_entry_ptr (hash_index);
2798   if (entry_ptr->t == t
2799       && entry_ptr->mode == mode
2800       && entry_ptr->speed == speed
2801       && entry_ptr->alg != alg_unknown)
2802     {
2803       cache_alg = entry_ptr->alg;
2804
2805       if (cache_alg == alg_impossible)
2806         {
2807           /* The cache tells us that it's impossible to synthesize
2808              multiplication by T within entry_ptr->cost.  */
2809           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2810             /* COST_LIMIT is at least as restrictive as the one
2811                recorded in the hash table, in which case we have no
2812                hope of synthesizing a multiplication.  Just
2813                return.  */
2814             return;
2815
2816           /* If we get here, COST_LIMIT is less restrictive than the
2817              one recorded in the hash table, so we may be able to
2818              synthesize a multiplication.  Proceed as if we didn't
2819              have the cache entry.  */
2820         }
2821       else
2822         {
2823           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2824             /* The cached algorithm shows that this multiplication
2825                requires more cost than COST_LIMIT.  Just return.  This
2826                way, we don't clobber this cache entry with
2827                alg_impossible but retain useful information.  */
2828             return;
2829
2830           cache_hit = true;
2831
2832           switch (cache_alg)
2833             {
2834             case alg_shift:
2835               goto do_alg_shift;
2836
2837             case alg_add_t_m2:
2838             case alg_sub_t_m2:
2839               goto do_alg_addsub_t_m2;
2840
2841             case alg_add_factor:
2842             case alg_sub_factor:
2843               goto do_alg_addsub_factor;
2844
2845             case alg_add_t2_m:
2846               goto do_alg_add_t2_m;
2847
2848             case alg_sub_t2_m:
2849               goto do_alg_sub_t2_m;
2850
2851             default:
2852               gcc_unreachable ();
2853             }
2854         }
2855     }
2856
2857   /* If we have a group of zero bits at the low-order part of T, try
2858      multiplying by the remaining bits and then doing a shift.  */
2859
2860   if ((t & 1) == 0)
2861     {
2862     do_alg_shift:
2863       m = ctz_or_zero (t); /* m = number of low zero bits */
2864       if (m < maxm)
2865         {
2866           q = t >> m;
2867           /* The function expand_shift will choose between a shift and
2868              a sequence of additions, so the observed cost is given as
2869              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2870           op_cost = m * add_cost (speed, mode);
2871           if (shift_cost (speed, mode, m) < op_cost)
2872             op_cost = shift_cost (speed, mode, m);
2873           new_limit.cost = best_cost.cost - op_cost;
2874           new_limit.latency = best_cost.latency - op_cost;
2875           synth_mult (alg_in, q, &new_limit, mode);
2876
2877           alg_in->cost.cost += op_cost;
2878           alg_in->cost.latency += op_cost;
2879           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2880             {
2881               best_cost = alg_in->cost;
2882               std::swap (alg_in, best_alg);
2883               best_alg->log[best_alg->ops] = m;
2884               best_alg->op[best_alg->ops] = alg_shift;
2885             }
2886
2887           /* See if treating ORIG_T as a signed number yields a better
2888              sequence.  Try this sequence only for a negative ORIG_T
2889              as it would be useless for a non-negative ORIG_T.  */
2890           if ((HOST_WIDE_INT) orig_t < 0)
2891             {
2892               /* Shift ORIG_T as follows because a right shift of a
2893                  negative-valued signed type is implementation
2894                  defined.  */
2895               q = ~(~orig_t >> m);
2896               /* The function expand_shift will choose between a shift
2897                  and a sequence of additions, so the observed cost is
2898                  given as MIN (m * add_cost(speed, mode),
2899                  shift_cost(speed, mode, m)).  */
2900               op_cost = m * add_cost (speed, mode);
2901               if (shift_cost (speed, mode, m) < op_cost)
2902                 op_cost = shift_cost (speed, mode, m);
2903               new_limit.cost = best_cost.cost - op_cost;
2904               new_limit.latency = best_cost.latency - op_cost;
2905               synth_mult (alg_in, q, &new_limit, mode);
2906
2907               alg_in->cost.cost += op_cost;
2908               alg_in->cost.latency += op_cost;
2909               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2910                 {
2911                   best_cost = alg_in->cost;
2912                   std::swap (alg_in, best_alg);
2913                   best_alg->log[best_alg->ops] = m;
2914                   best_alg->op[best_alg->ops] = alg_shift;
2915                 }
2916             }
2917         }
2918       if (cache_hit)
2919         goto done;
2920     }
2921
2922   /* If we have an odd number, add or subtract one.  */
2923   if ((t & 1) != 0)
2924     {
2925       unsigned HOST_WIDE_INT w;
2926
2927     do_alg_addsub_t_m2:
2928       for (w = 1; (w & t) != 0; w <<= 1)
2929         ;
2930       /* If T was -1, then W will be zero after the loop.  This is another
2931          case where T ends with ...111.  Handling this with (T + 1) and
2932          subtract 1 produces slightly better code and results in algorithm
2933          selection much faster than treating it like the ...0111 case
2934          below.  */
2935       if (w == 0
2936           || (w > 2
2937               /* Reject the case where t is 3.
2938                  Thus we prefer addition in that case.  */
2939               && t != 3))
2940         {
2941           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2942
2943           op_cost = add_cost (speed, mode);
2944           new_limit.cost = best_cost.cost - op_cost;
2945           new_limit.latency = best_cost.latency - op_cost;
2946           synth_mult (alg_in, t + 1, &new_limit, mode);
2947
2948           alg_in->cost.cost += op_cost;
2949           alg_in->cost.latency += op_cost;
2950           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2951             {
2952               best_cost = alg_in->cost;
2953               std::swap (alg_in, best_alg);
2954               best_alg->log[best_alg->ops] = 0;
2955               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2956             }
2957         }
2958       else
2959         {
2960           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2961
2962           op_cost = add_cost (speed, mode);
2963           new_limit.cost = best_cost.cost - op_cost;
2964           new_limit.latency = best_cost.latency - op_cost;
2965           synth_mult (alg_in, t - 1, &new_limit, mode);
2966
2967           alg_in->cost.cost += op_cost;
2968           alg_in->cost.latency += op_cost;
2969           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2970             {
2971               best_cost = alg_in->cost;
2972               std::swap (alg_in, best_alg);
2973               best_alg->log[best_alg->ops] = 0;
2974               best_alg->op[best_alg->ops] = alg_add_t_m2;
2975             }
2976         }
2977
2978       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2979          quickly with a - a * n for some appropriate constant n.  */
2980       m = exact_log2 (-orig_t + 1);
2981       if (m >= 0 && m < maxm)
2982         {
2983           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2984           /* If the target has a cheap shift-and-subtract insn use
2985              that in preference to a shift insn followed by a sub insn.
2986              Assume that the shift-and-sub is "atomic" with a latency
2987              equal to it's cost, otherwise assume that on superscalar
2988              hardware the shift may be executed concurrently with the
2989              earlier steps in the algorithm.  */
2990           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2991             {
2992               op_cost = shiftsub1_cost (speed, mode, m);
2993               op_latency = op_cost;
2994             }
2995           else
2996             op_latency = add_cost (speed, mode);
2997
2998           new_limit.cost = best_cost.cost - op_cost;
2999           new_limit.latency = best_cost.latency - op_latency;
3000           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3001                       &new_limit, mode);
3002
3003           alg_in->cost.cost += op_cost;
3004           alg_in->cost.latency += op_latency;
3005           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3006             {
3007               best_cost = alg_in->cost;
3008               std::swap (alg_in, best_alg);
3009               best_alg->log[best_alg->ops] = m;
3010               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3011             }
3012         }
3013
3014       if (cache_hit)
3015         goto done;
3016     }
3017
3018   /* Look for factors of t of the form
3019      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3020      If we find such a factor, we can multiply by t using an algorithm that
3021      multiplies by q, shift the result by m and add/subtract it to itself.
3022
3023      We search for large factors first and loop down, even if large factors
3024      are less probable than small; if we find a large factor we will find a
3025      good sequence quickly, and therefore be able to prune (by decreasing
3026      COST_LIMIT) the search.  */
3027
3028  do_alg_addsub_factor:
3029   for (m = floor_log2 (t - 1); m >= 2; m--)
3030     {
3031       unsigned HOST_WIDE_INT d;
3032
3033       d = (HOST_WIDE_INT_1U << m) + 1;
3034       if (t % d == 0 && t > d && m < maxm
3035           && (!cache_hit || cache_alg == alg_add_factor))
3036         {
3037           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3038           if (shiftadd_cost (speed, mode, m) <= op_cost)
3039             op_cost = shiftadd_cost (speed, mode, m);
3040
3041           op_latency = op_cost;
3042
3043
3044           new_limit.cost = best_cost.cost - op_cost;
3045           new_limit.latency = best_cost.latency - op_latency;
3046           synth_mult (alg_in, t / d, &new_limit, mode);
3047
3048           alg_in->cost.cost += op_cost;
3049           alg_in->cost.latency += op_latency;
3050           if (alg_in->cost.latency < op_cost)
3051             alg_in->cost.latency = op_cost;
3052           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3053             {
3054               best_cost = alg_in->cost;
3055               std::swap (alg_in, best_alg);
3056               best_alg->log[best_alg->ops] = m;
3057               best_alg->op[best_alg->ops] = alg_add_factor;
3058             }
3059           /* Other factors will have been taken care of in the recursion.  */
3060           break;
3061         }
3062
3063       d = (HOST_WIDE_INT_1U << m) - 1;
3064       if (t % d == 0 && t > d && m < maxm
3065           && (!cache_hit || cache_alg == alg_sub_factor))
3066         {
3067           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3068           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3069             op_cost = shiftsub0_cost (speed, mode, m);
3070
3071           op_latency = op_cost;
3072
3073           new_limit.cost = best_cost.cost - op_cost;
3074           new_limit.latency = best_cost.latency - op_latency;
3075           synth_mult (alg_in, t / d, &new_limit, mode);
3076
3077           alg_in->cost.cost += op_cost;
3078           alg_in->cost.latency += op_latency;
3079           if (alg_in->cost.latency < op_cost)
3080             alg_in->cost.latency = op_cost;
3081           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3082             {
3083               best_cost = alg_in->cost;
3084               std::swap (alg_in, best_alg);
3085               best_alg->log[best_alg->ops] = m;
3086               best_alg->op[best_alg->ops] = alg_sub_factor;
3087             }
3088           break;
3089         }
3090     }
3091   if (cache_hit)
3092     goto done;
3093
3094   /* Try shift-and-add (load effective address) instructions,
3095      i.e. do a*3, a*5, a*9.  */
3096   if ((t & 1) != 0)
3097     {
3098     do_alg_add_t2_m:
3099       q = t - 1;
3100       m = ctz_hwi (q);
3101       if (q && m < maxm)
3102         {
3103           op_cost = shiftadd_cost (speed, mode, m);
3104           new_limit.cost = best_cost.cost - op_cost;
3105           new_limit.latency = best_cost.latency - op_cost;
3106           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3107
3108           alg_in->cost.cost += op_cost;
3109           alg_in->cost.latency += op_cost;
3110           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3111             {
3112               best_cost = alg_in->cost;
3113               std::swap (alg_in, best_alg);
3114               best_alg->log[best_alg->ops] = m;
3115               best_alg->op[best_alg->ops] = alg_add_t2_m;
3116             }
3117         }
3118       if (cache_hit)
3119         goto done;
3120
3121     do_alg_sub_t2_m:
3122       q = t + 1;
3123       m = ctz_hwi (q);
3124       if (q && m < maxm)
3125         {
3126           op_cost = shiftsub0_cost (speed, mode, m);
3127           new_limit.cost = best_cost.cost - op_cost;
3128           new_limit.latency = best_cost.latency - op_cost;
3129           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3130
3131           alg_in->cost.cost += op_cost;
3132           alg_in->cost.latency += op_cost;
3133           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3134             {
3135               best_cost = alg_in->cost;
3136               std::swap (alg_in, best_alg);
3137               best_alg->log[best_alg->ops] = m;
3138               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3139             }
3140         }
3141       if (cache_hit)
3142         goto done;
3143     }
3144
3145  done:
3146   /* If best_cost has not decreased, we have not found any algorithm.  */
3147   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3148     {
3149       /* We failed to find an algorithm.  Record alg_impossible for
3150          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3151          we are asked to find an algorithm for T within the same or
3152          lower COST_LIMIT, we can immediately return to the
3153          caller.  */
3154       entry_ptr->t = t;
3155       entry_ptr->mode = mode;
3156       entry_ptr->speed = speed;
3157       entry_ptr->alg = alg_impossible;
3158       entry_ptr->cost = *cost_limit;
3159       return;
3160     }
3161
3162   /* Cache the result.  */
3163   if (!cache_hit)
3164     {
3165       entry_ptr->t = t;
3166       entry_ptr->mode = mode;
3167       entry_ptr->speed = speed;
3168       entry_ptr->alg = best_alg->op[best_alg->ops];
3169       entry_ptr->cost.cost = best_cost.cost;
3170       entry_ptr->cost.latency = best_cost.latency;
3171     }
3172
3173   /* If we are getting a too long sequence for `struct algorithm'
3174      to record, make this search fail.  */
3175   if (best_alg->ops == MAX_BITS_PER_WORD)
3176     return;
3177
3178   /* Copy the algorithm from temporary space to the space at alg_out.
3179      We avoid using structure assignment because the majority of
3180      best_alg is normally undefined, and this is a critical function.  */
3181   alg_out->ops = best_alg->ops + 1;
3182   alg_out->cost = best_cost;
3183   memcpy (alg_out->op, best_alg->op,
3184           alg_out->ops * sizeof *alg_out->op);
3185   memcpy (alg_out->log, best_alg->log,
3186           alg_out->ops * sizeof *alg_out->log);
3187 }
3188 \f
3189 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3190    Try three variations:
3191
3192        - a shift/add sequence based on VAL itself
3193        - a shift/add sequence based on -VAL, followed by a negation
3194        - a shift/add sequence based on VAL - 1, followed by an addition.
3195
3196    Return true if the cheapest of these cost less than MULT_COST,
3197    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3198
3199 bool
3200 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3201                      struct algorithm *alg, enum mult_variant *variant,
3202                      int mult_cost)
3203 {
3204   struct algorithm alg2;
3205   struct mult_cost limit;
3206   int op_cost;
3207   bool speed = optimize_insn_for_speed_p ();
3208
3209   /* Fail quickly for impossible bounds.  */
3210   if (mult_cost < 0)
3211     return false;
3212
3213   /* Ensure that mult_cost provides a reasonable upper bound.
3214      Any constant multiplication can be performed with less
3215      than 2 * bits additions.  */
3216   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3217   if (mult_cost > op_cost)
3218     mult_cost = op_cost;
3219
3220   *variant = basic_variant;
3221   limit.cost = mult_cost;
3222   limit.latency = mult_cost;
3223   synth_mult (alg, val, &limit, mode);
3224
3225   /* This works only if the inverted value actually fits in an
3226      `unsigned int' */
3227   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3228     {
3229       op_cost = neg_cost (speed, mode);
3230       if (MULT_COST_LESS (&alg->cost, mult_cost))
3231         {
3232           limit.cost = alg->cost.cost - op_cost;
3233           limit.latency = alg->cost.latency - op_cost;
3234         }
3235       else
3236         {
3237           limit.cost = mult_cost - op_cost;
3238           limit.latency = mult_cost - op_cost;
3239         }
3240
3241       synth_mult (&alg2, -val, &limit, mode);
3242       alg2.cost.cost += op_cost;
3243       alg2.cost.latency += op_cost;
3244       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3245         *alg = alg2, *variant = negate_variant;
3246     }
3247
3248   /* This proves very useful for division-by-constant.  */
3249   op_cost = add_cost (speed, mode);
3250   if (MULT_COST_LESS (&alg->cost, mult_cost))
3251     {
3252       limit.cost = alg->cost.cost - op_cost;
3253       limit.latency = alg->cost.latency - op_cost;
3254     }
3255   else
3256     {
3257       limit.cost = mult_cost - op_cost;
3258       limit.latency = mult_cost - op_cost;
3259     }
3260
3261   synth_mult (&alg2, val - 1, &limit, mode);
3262   alg2.cost.cost += op_cost;
3263   alg2.cost.latency += op_cost;
3264   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3265     *alg = alg2, *variant = add_variant;
3266
3267   return MULT_COST_LESS (&alg->cost, mult_cost);
3268 }
3269
3270 /* A subroutine of expand_mult, used for constant multiplications.
3271    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3272    convenient.  Use the shift/add sequence described by ALG and apply
3273    the final fixup specified by VARIANT.  */
3274
3275 static rtx
3276 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3277                    rtx target, const struct algorithm *alg,
3278                    enum mult_variant variant)
3279 {
3280   unsigned HOST_WIDE_INT val_so_far;
3281   rtx_insn *insn;
3282   rtx accum, tem;
3283   int opno;
3284   machine_mode nmode;
3285
3286   /* Avoid referencing memory over and over and invalid sharing
3287      on SUBREGs.  */
3288   op0 = force_reg (mode, op0);
3289
3290   /* ACCUM starts out either as OP0 or as a zero, depending on
3291      the first operation.  */
3292
3293   if (alg->op[0] == alg_zero)
3294     {
3295       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3296       val_so_far = 0;
3297     }
3298   else if (alg->op[0] == alg_m)
3299     {
3300       accum = copy_to_mode_reg (mode, op0);
3301       val_so_far = 1;
3302     }
3303   else
3304     gcc_unreachable ();
3305
3306   for (opno = 1; opno < alg->ops; opno++)
3307     {
3308       int log = alg->log[opno];
3309       rtx shift_subtarget = optimize ? 0 : accum;
3310       rtx add_target
3311         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3312            && !optimize)
3313           ? target : 0;
3314       rtx accum_target = optimize ? 0 : accum;
3315       rtx accum_inner;
3316
3317       switch (alg->op[opno])
3318         {
3319         case alg_shift:
3320           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3321           /* REG_EQUAL note will be attached to the following insn.  */
3322           emit_move_insn (accum, tem);
3323           val_so_far <<= log;
3324           break;
3325
3326         case alg_add_t_m2:
3327           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3328           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3329                                  add_target ? add_target : accum_target);
3330           val_so_far += HOST_WIDE_INT_1U << log;
3331           break;
3332
3333         case alg_sub_t_m2:
3334           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3335           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3336                                  add_target ? add_target : accum_target);
3337           val_so_far -= HOST_WIDE_INT_1U << log;
3338           break;
3339
3340         case alg_add_t2_m:
3341           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3342                                 log, shift_subtarget, 0);
3343           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3344                                  add_target ? add_target : accum_target);
3345           val_so_far = (val_so_far << log) + 1;
3346           break;
3347
3348         case alg_sub_t2_m:
3349           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3350                                 log, shift_subtarget, 0);
3351           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3352                                  add_target ? add_target : accum_target);
3353           val_so_far = (val_so_far << log) - 1;
3354           break;
3355
3356         case alg_add_factor:
3357           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3358           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3359                                  add_target ? add_target : accum_target);
3360           val_so_far += val_so_far << log;
3361           break;
3362
3363         case alg_sub_factor:
3364           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3365           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3366                                  (add_target
3367                                   ? add_target : (optimize ? 0 : tem)));
3368           val_so_far = (val_so_far << log) - val_so_far;
3369           break;
3370
3371         default:
3372           gcc_unreachable ();
3373         }
3374
3375       if (SCALAR_INT_MODE_P (mode))
3376         {
3377           /* Write a REG_EQUAL note on the last insn so that we can cse
3378              multiplication sequences.  Note that if ACCUM is a SUBREG,
3379              we've set the inner register and must properly indicate that.  */
3380           tem = op0, nmode = mode;
3381           accum_inner = accum;
3382           if (GET_CODE (accum) == SUBREG)
3383             {
3384               accum_inner = SUBREG_REG (accum);
3385               nmode = GET_MODE (accum_inner);
3386               tem = gen_lowpart (nmode, op0);
3387             }
3388
3389           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3390              In that case, only the low bits of accum would be guaranteed to
3391              be equal to the content of the REG_EQUAL note, the upper bits
3392              can be anything.  */
3393           if (!paradoxical_subreg_p (tem))
3394             {
3395               insn = get_last_insn ();
3396               wide_int wval_so_far
3397                 = wi::uhwi (val_so_far,
3398                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3399               rtx c = immed_wide_int_const (wval_so_far, nmode);
3400               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3401                                 accum_inner);
3402             }
3403         }
3404     }
3405
3406   if (variant == negate_variant)
3407     {
3408       val_so_far = -val_so_far;
3409       accum = expand_unop (mode, neg_optab, accum, target, 0);
3410     }
3411   else if (variant == add_variant)
3412     {
3413       val_so_far = val_so_far + 1;
3414       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3415     }
3416
3417   /* Compare only the bits of val and val_so_far that are significant
3418      in the result mode, to avoid sign-/zero-extension confusion.  */
3419   nmode = GET_MODE_INNER (mode);
3420   val &= GET_MODE_MASK (nmode);
3421   val_so_far &= GET_MODE_MASK (nmode);
3422   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3423
3424   return accum;
3425 }
3426
3427 /* Perform a multiplication and return an rtx for the result.
3428    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3429    TARGET is a suggestion for where to store the result (an rtx).
3430
3431    We check specially for a constant integer as OP1.
3432    If you want this check for OP0 as well, then before calling
3433    you should swap the two operands if OP0 would be constant.  */
3434
3435 rtx
3436 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3437              int unsignedp, bool no_libcall)
3438 {
3439   enum mult_variant variant;
3440   struct algorithm algorithm;
3441   rtx scalar_op1;
3442   int max_cost;
3443   bool speed = optimize_insn_for_speed_p ();
3444   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3445
3446   if (CONSTANT_P (op0))
3447     std::swap (op0, op1);
3448
3449   /* For vectors, there are several simplifications that can be made if
3450      all elements of the vector constant are identical.  */
3451   scalar_op1 = unwrap_const_vec_duplicate (op1);
3452
3453   if (INTEGRAL_MODE_P (mode))
3454     {
3455       rtx fake_reg;
3456       HOST_WIDE_INT coeff;
3457       bool is_neg;
3458       int mode_bitsize;
3459
3460       if (op1 == CONST0_RTX (mode))
3461         return op1;
3462       if (op1 == CONST1_RTX (mode))
3463         return op0;
3464       if (op1 == CONSTM1_RTX (mode))
3465         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3466                             op0, target, 0);
3467
3468       if (do_trapv)
3469         goto skip_synth;
3470
3471       /* If mode is integer vector mode, check if the backend supports
3472          vector lshift (by scalar or vector) at all.  If not, we can't use
3473          synthetized multiply.  */
3474       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3475           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3476           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3477         goto skip_synth;
3478
3479       /* These are the operations that are potentially turned into
3480          a sequence of shifts and additions.  */
3481       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3482
3483       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3484          less than or equal in size to `unsigned int' this doesn't matter.
3485          If the mode is larger than `unsigned int', then synth_mult works
3486          only if the constant value exactly fits in an `unsigned int' without
3487          any truncation.  This means that multiplying by negative values does
3488          not work; results are off by 2^32 on a 32 bit machine.  */
3489       if (CONST_INT_P (scalar_op1))
3490         {
3491           coeff = INTVAL (scalar_op1);
3492           is_neg = coeff < 0;
3493         }
3494 #if TARGET_SUPPORTS_WIDE_INT
3495       else if (CONST_WIDE_INT_P (scalar_op1))
3496 #else
3497       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3498 #endif
3499         {
3500           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3501           /* Perfect power of 2 (other than 1, which is handled above).  */
3502           if (shift > 0)
3503             return expand_shift (LSHIFT_EXPR, mode, op0,
3504                                  shift, target, unsignedp);
3505           else
3506             goto skip_synth;
3507         }
3508       else
3509         goto skip_synth;
3510
3511       /* We used to test optimize here, on the grounds that it's better to
3512          produce a smaller program when -O is not used.  But this causes
3513          such a terrible slowdown sometimes that it seems better to always
3514          use synth_mult.  */
3515
3516       /* Special case powers of two.  */
3517       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3518           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3519         return expand_shift (LSHIFT_EXPR, mode, op0,
3520                              floor_log2 (coeff), target, unsignedp);
3521
3522       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3523
3524       /* Attempt to handle multiplication of DImode values by negative
3525          coefficients, by performing the multiplication by a positive
3526          multiplier and then inverting the result.  */
3527       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3528         {
3529           /* Its safe to use -coeff even for INT_MIN, as the
3530              result is interpreted as an unsigned coefficient.
3531              Exclude cost of op0 from max_cost to match the cost
3532              calculation of the synth_mult.  */
3533           coeff = -(unsigned HOST_WIDE_INT) coeff;
3534           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3535                                     mode, speed)
3536                       - neg_cost (speed, mode));
3537           if (max_cost <= 0)
3538             goto skip_synth;
3539
3540           /* Special case powers of two.  */
3541           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3542             {
3543               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3544                                        floor_log2 (coeff), target, unsignedp);
3545               return expand_unop (mode, neg_optab, temp, target, 0);
3546             }
3547
3548           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3549                                    max_cost))
3550             {
3551               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3552                                             &algorithm, variant);
3553               return expand_unop (mode, neg_optab, temp, target, 0);
3554             }
3555           goto skip_synth;
3556         }
3557
3558       /* Exclude cost of op0 from max_cost to match the cost
3559          calculation of the synth_mult.  */
3560       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3561       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3562         return expand_mult_const (mode, op0, coeff, target,
3563                                   &algorithm, variant);
3564     }
3565  skip_synth:
3566
3567   /* Expand x*2.0 as x+x.  */
3568   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3569       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3570     {
3571       op0 = force_reg (GET_MODE (op0), op0);
3572       return expand_binop (mode, add_optab, op0, op0,
3573                            target, unsignedp,
3574                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3575     }
3576
3577   /* This used to use umul_optab if unsigned, but for non-widening multiply
3578      there is no difference between signed and unsigned.  */
3579   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3580                       op0, op1, target, unsignedp,
3581                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3582   gcc_assert (op0 || no_libcall);
3583   return op0;
3584 }
3585
3586 /* Return a cost estimate for multiplying a register by the given
3587    COEFFicient in the given MODE and SPEED.  */
3588
3589 int
3590 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3591 {
3592   int max_cost;
3593   struct algorithm algorithm;
3594   enum mult_variant variant;
3595
3596   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3597   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3598                            mode, speed);
3599   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3600     return algorithm.cost.cost;
3601   else
3602     return max_cost;
3603 }
3604
3605 /* Perform a widening multiplication and return an rtx for the result.
3606    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3607    TARGET is a suggestion for where to store the result (an rtx).
3608    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3609    or smul_widen_optab.
3610
3611    We check specially for a constant integer as OP1, comparing the
3612    cost of a widening multiply against the cost of a sequence of shifts
3613    and adds.  */
3614
3615 rtx
3616 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3617                       int unsignedp, optab this_optab)
3618 {
3619   bool speed = optimize_insn_for_speed_p ();
3620   rtx cop1;
3621
3622   if (CONST_INT_P (op1)
3623       && GET_MODE (op0) != VOIDmode
3624       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3625                                 this_optab == umul_widen_optab))
3626       && CONST_INT_P (cop1)
3627       && (INTVAL (cop1) >= 0
3628           || HWI_COMPUTABLE_MODE_P (mode)))
3629     {
3630       HOST_WIDE_INT coeff = INTVAL (cop1);
3631       int max_cost;
3632       enum mult_variant variant;
3633       struct algorithm algorithm;
3634
3635       if (coeff == 0)
3636         return CONST0_RTX (mode);
3637
3638       /* Special case powers of two.  */
3639       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3640         {
3641           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3642           return expand_shift (LSHIFT_EXPR, mode, op0,
3643                                floor_log2 (coeff), target, unsignedp);
3644         }
3645
3646       /* Exclude cost of op0 from max_cost to match the cost
3647          calculation of the synth_mult.  */
3648       max_cost = mul_widen_cost (speed, mode);
3649       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3650                                max_cost))
3651         {
3652           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3653           return expand_mult_const (mode, op0, coeff, target,
3654                                     &algorithm, variant);
3655         }
3656     }
3657   return expand_binop (mode, this_optab, op0, op1, target,
3658                        unsignedp, OPTAB_LIB_WIDEN);
3659 }
3660 \f
3661 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3662    replace division by D, and put the least significant N bits of the result
3663    in *MULTIPLIER_PTR and return the most significant bit.
3664
3665    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3666    needed precision is in PRECISION (should be <= N).
3667
3668    PRECISION should be as small as possible so this function can choose
3669    multiplier more freely.
3670
3671    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3672    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3673
3674    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3675    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3676
3677 unsigned HOST_WIDE_INT
3678 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3679                    unsigned HOST_WIDE_INT *multiplier_ptr,
3680                    int *post_shift_ptr, int *lgup_ptr)
3681 {
3682   int lgup, post_shift;
3683   int pow, pow2;
3684
3685   /* lgup = ceil(log2(divisor)); */
3686   lgup = ceil_log2 (d);
3687
3688   gcc_assert (lgup <= n);
3689
3690   pow = n + lgup;
3691   pow2 = n + lgup - precision;
3692
3693   /* mlow = 2^(N + lgup)/d */
3694   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3695   wide_int mlow = wi::udiv_trunc (val, d);
3696
3697   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3698   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3699   wide_int mhigh = wi::udiv_trunc (val, d);
3700
3701   /* If precision == N, then mlow, mhigh exceed 2^N
3702      (but they do not exceed 2^(N+1)).  */
3703
3704   /* Reduce to lowest terms.  */
3705   for (post_shift = lgup; post_shift > 0; post_shift--)
3706     {
3707       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3708                                                        HOST_BITS_PER_WIDE_INT);
3709       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3710                                                        HOST_BITS_PER_WIDE_INT);
3711       if (ml_lo >= mh_lo)
3712         break;
3713
3714       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3715       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3716     }
3717
3718   *post_shift_ptr = post_shift;
3719   *lgup_ptr = lgup;
3720   if (n < HOST_BITS_PER_WIDE_INT)
3721     {
3722       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3723       *multiplier_ptr = mhigh.to_uhwi () & mask;
3724       return mhigh.to_uhwi () > mask;
3725     }
3726   else
3727     {
3728       *multiplier_ptr = mhigh.to_uhwi ();
3729       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3730     }
3731 }
3732
3733 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3734    congruent to 1 (mod 2**N).  */
3735
3736 static unsigned HOST_WIDE_INT
3737 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3738 {
3739   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3740
3741   /* The algorithm notes that the choice y = x satisfies
3742      x*y == 1 mod 2^3, since x is assumed odd.
3743      Each iteration doubles the number of bits of significance in y.  */
3744
3745   unsigned HOST_WIDE_INT mask;
3746   unsigned HOST_WIDE_INT y = x;
3747   int nbit = 3;
3748
3749   mask = (n == HOST_BITS_PER_WIDE_INT
3750           ? HOST_WIDE_INT_M1U
3751           : (HOST_WIDE_INT_1U << n) - 1);
3752
3753   while (nbit < n)
3754     {
3755       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3756       nbit *= 2;
3757     }
3758   return y;
3759 }
3760
3761 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3762    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3763    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3764    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3765    become signed.
3766
3767    The result is put in TARGET if that is convenient.
3768
3769    MODE is the mode of operation.  */
3770
3771 rtx
3772 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3773                              rtx op1, rtx target, int unsignedp)
3774 {
3775   rtx tem;
3776   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3777
3778   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3779                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3780   tem = expand_and (mode, tem, op1, NULL_RTX);
3781   adj_operand
3782     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3783                      adj_operand);
3784
3785   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3786                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3787   tem = expand_and (mode, tem, op0, NULL_RTX);
3788   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3789                           target);
3790
3791   return target;
3792 }
3793
3794 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3795
3796 static rtx
3797 extract_high_half (scalar_int_mode mode, rtx op)
3798 {
3799   if (mode == word_mode)
3800     return gen_highpart (mode, op);
3801
3802   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3803
3804   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3805                      GET_MODE_BITSIZE (mode), 0, 1);
3806   return convert_modes (mode, wider_mode, op, 0);
3807 }
3808
3809 /* Like expmed_mult_highpart, but only consider using a multiplication
3810    optab.  OP1 is an rtx for the constant operand.  */
3811
3812 static rtx
3813 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3814                             rtx target, int unsignedp, int max_cost)
3815 {
3816   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3817   optab moptab;
3818   rtx tem;
3819   int size;
3820   bool speed = optimize_insn_for_speed_p ();
3821
3822   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3823
3824   size = GET_MODE_BITSIZE (mode);
3825
3826   /* Firstly, try using a multiplication insn that only generates the needed
3827      high part of the product, and in the sign flavor of unsignedp.  */
3828   if (mul_highpart_cost (speed, mode) < max_cost)
3829     {
3830       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3831       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3832                           unsignedp, OPTAB_DIRECT);
3833       if (tem)
3834         return tem;
3835     }
3836
3837   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3838      Need to adjust the result after the multiplication.  */
3839   if (size - 1 < BITS_PER_WORD
3840       && (mul_highpart_cost (speed, mode)
3841           + 2 * shift_cost (speed, mode, size-1)
3842           + 4 * add_cost (speed, mode) < max_cost))
3843     {
3844       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3845       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3846                           unsignedp, OPTAB_DIRECT);
3847       if (tem)
3848         /* We used the wrong signedness.  Adjust the result.  */
3849         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3850                                             tem, unsignedp);
3851     }
3852
3853   /* Try widening multiplication.  */
3854   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3855   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3856       && mul_widen_cost (speed, wider_mode) < max_cost)
3857     {
3858       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3859                           unsignedp, OPTAB_WIDEN);
3860       if (tem)
3861         return extract_high_half (mode, tem);
3862     }
3863
3864   /* Try widening the mode and perform a non-widening multiplication.  */
3865   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3866       && size - 1 < BITS_PER_WORD
3867       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3868           < max_cost))
3869     {
3870       rtx_insn *insns;
3871       rtx wop0, wop1;
3872
3873       /* We need to widen the operands, for example to ensure the
3874          constant multiplier is correctly sign or zero extended.
3875          Use a sequence to clean-up any instructions emitted by
3876          the conversions if things don't work out.  */
3877       start_sequence ();
3878       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3879       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3880       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3881                           unsignedp, OPTAB_WIDEN);
3882       insns = get_insns ();
3883       end_sequence ();
3884
3885       if (tem)
3886         {
3887           emit_insn (insns);
3888           return extract_high_half (mode, tem);
3889         }
3890     }
3891
3892   /* Try widening multiplication of opposite signedness, and adjust.  */
3893   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3894   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3895       && size - 1 < BITS_PER_WORD
3896       && (mul_widen_cost (speed, wider_mode)
3897           + 2 * shift_cost (speed, mode, size-1)
3898           + 4 * add_cost (speed, mode) < max_cost))
3899     {
3900       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3901                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3902       if (tem != 0)
3903         {
3904           tem = extract_high_half (mode, tem);
3905           /* We used the wrong signedness.  Adjust the result.  */
3906           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3907                                               target, unsignedp);
3908         }
3909     }
3910
3911   return 0;
3912 }
3913
3914 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3915    putting the high half of the result in TARGET if that is convenient,
3916    and return where the result is.  If the operation cannot be performed,
3917    0 is returned.
3918
3919    MODE is the mode of operation and result.
3920
3921    UNSIGNEDP nonzero means unsigned multiply.
3922
3923    MAX_COST is the total allowed cost for the expanded RTL.  */
3924
3925 static rtx
3926 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3927                       rtx target, int unsignedp, int max_cost)
3928 {
3929   unsigned HOST_WIDE_INT cnst1;
3930   int extra_cost;
3931   bool sign_adjust = false;
3932   enum mult_variant variant;
3933   struct algorithm alg;
3934   rtx tem;
3935   bool speed = optimize_insn_for_speed_p ();
3936
3937   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3938   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3939
3940   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3941
3942   /* We can't optimize modes wider than BITS_PER_WORD.
3943      ??? We might be able to perform double-word arithmetic if
3944      mode == word_mode, however all the cost calculations in
3945      synth_mult etc. assume single-word operations.  */
3946   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3947   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3948     return expmed_mult_highpart_optab (mode, op0, op1, target,
3949                                        unsignedp, max_cost);
3950
3951   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3952
3953   /* Check whether we try to multiply by a negative constant.  */
3954   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3955     {
3956       sign_adjust = true;
3957       extra_cost += add_cost (speed, mode);
3958     }
3959
3960   /* See whether shift/add multiplication is cheap enough.  */
3961   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3962                            max_cost - extra_cost))
3963     {
3964       /* See whether the specialized multiplication optabs are
3965          cheaper than the shift/add version.  */
3966       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3967                                         alg.cost.cost + extra_cost);
3968       if (tem)
3969         return tem;
3970
3971       tem = convert_to_mode (wider_mode, op0, unsignedp);
3972       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3973       tem = extract_high_half (mode, tem);
3974
3975       /* Adjust result for signedness.  */
3976       if (sign_adjust)
3977         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3978
3979       return tem;
3980     }
3981   return expmed_mult_highpart_optab (mode, op0, op1, target,
3982                                      unsignedp, max_cost);
3983 }
3984
3985
3986 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3987
3988 static rtx
3989 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3990 {
3991   rtx result, temp, shift;
3992   rtx_code_label *label;
3993   int logd;
3994   int prec = GET_MODE_PRECISION (mode);
3995
3996   logd = floor_log2 (d);
3997   result = gen_reg_rtx (mode);
3998
3999   /* Avoid conditional branches when they're expensive.  */
4000   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4001       && optimize_insn_for_speed_p ())
4002     {
4003       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4004                                       mode, 0, -1);
4005       if (signmask)
4006         {
4007           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4008           signmask = force_reg (mode, signmask);
4009           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4010
4011           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4012              which instruction sequence to use.  If logical right shifts
4013              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4014              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4015
4016           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4017           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4018               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4019                   > COSTS_N_INSNS (2)))
4020             {
4021               temp = expand_binop (mode, xor_optab, op0, signmask,
4022                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4023               temp = expand_binop (mode, sub_optab, temp, signmask,
4024                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4025               temp = expand_binop (mode, and_optab, temp,
4026                                    gen_int_mode (masklow, mode),
4027                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4028               temp = expand_binop (mode, xor_optab, temp, signmask,
4029                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4030               temp = expand_binop (mode, sub_optab, temp, signmask,
4031                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4032             }
4033           else
4034             {
4035               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4036                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4037               signmask = force_reg (mode, signmask);
4038
4039               temp = expand_binop (mode, add_optab, op0, signmask,
4040                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4041               temp = expand_binop (mode, and_optab, temp,
4042                                    gen_int_mode (masklow, mode),
4043                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4044               temp = expand_binop (mode, sub_optab, temp, signmask,
4045                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4046             }
4047           return temp;
4048         }
4049     }
4050
4051   /* Mask contains the mode's signbit and the significant bits of the
4052      modulus.  By including the signbit in the operation, many targets
4053      can avoid an explicit compare operation in the following comparison
4054      against zero.  */
4055   wide_int mask = wi::mask (logd, false, prec);
4056   mask = wi::set_bit (mask, prec - 1);
4057
4058   temp = expand_binop (mode, and_optab, op0,
4059                        immed_wide_int_const (mask, mode),
4060                        result, 1, OPTAB_LIB_WIDEN);
4061   if (temp != result)
4062     emit_move_insn (result, temp);
4063
4064   label = gen_label_rtx ();
4065   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4066
4067   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4068                        0, OPTAB_LIB_WIDEN);
4069
4070   mask = wi::mask (logd, true, prec);
4071   temp = expand_binop (mode, ior_optab, temp,
4072                        immed_wide_int_const (mask, mode),
4073                        result, 1, OPTAB_LIB_WIDEN);
4074   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4075                        0, OPTAB_LIB_WIDEN);
4076   if (temp != result)
4077     emit_move_insn (result, temp);
4078   emit_label (label);
4079   return result;
4080 }
4081
4082 /* Expand signed division of OP0 by a power of two D in mode MODE.
4083    This routine is only called for positive values of D.  */
4084
4085 static rtx
4086 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4087 {
4088   rtx temp;
4089   rtx_code_label *label;
4090   int logd;
4091
4092   logd = floor_log2 (d);
4093
4094   if (d == 2
4095       && BRANCH_COST (optimize_insn_for_speed_p (),
4096                       false) >= 1)
4097     {
4098       temp = gen_reg_rtx (mode);
4099       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4100       if (temp != NULL_RTX)
4101         {
4102           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4103                                0, OPTAB_LIB_WIDEN);
4104           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4105         }
4106     }
4107
4108   if (HAVE_conditional_move
4109       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4110     {
4111       rtx temp2;
4112
4113       start_sequence ();
4114       temp2 = copy_to_mode_reg (mode, op0);
4115       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4116                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4117       temp = force_reg (mode, temp);
4118
4119       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4120       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4121                                      mode, temp, temp2, mode, 0);
4122       if (temp2)
4123         {
4124           rtx_insn *seq = get_insns ();
4125           end_sequence ();
4126           emit_insn (seq);
4127           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4128         }
4129       end_sequence ();
4130     }
4131
4132   if (BRANCH_COST (optimize_insn_for_speed_p (),
4133                    false) >= 2)
4134     {
4135       int ushift = GET_MODE_BITSIZE (mode) - logd;
4136
4137       temp = gen_reg_rtx (mode);
4138       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4139       if (temp != NULL_RTX)
4140         {
4141           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4142               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4143               > COSTS_N_INSNS (1))
4144             temp = expand_binop (mode, and_optab, temp,
4145                                  gen_int_mode (d - 1, mode),
4146                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4147           else
4148             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4149                                  ushift, NULL_RTX, 1);
4150           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4151                                0, OPTAB_LIB_WIDEN);
4152           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4153         }
4154     }
4155
4156   label = gen_label_rtx ();
4157   temp = copy_to_mode_reg (mode, op0);
4158   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4159   expand_inc (temp, gen_int_mode (d - 1, mode));
4160   emit_label (label);
4161   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4162 }
4163 \f
4164 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4165    if that is convenient, and returning where the result is.
4166    You may request either the quotient or the remainder as the result;
4167    specify REM_FLAG nonzero to get the remainder.
4168
4169    CODE is the expression code for which kind of division this is;
4170    it controls how rounding is done.  MODE is the machine mode to use.
4171    UNSIGNEDP nonzero means do unsigned division.  */
4172
4173 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4174    and then correct it by or'ing in missing high bits
4175    if result of ANDI is nonzero.
4176    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4177    This could optimize to a bfexts instruction.
4178    But C doesn't use these operations, so their optimizations are
4179    left for later.  */
4180 /* ??? For modulo, we don't actually need the highpart of the first product,
4181    the low part will do nicely.  And for small divisors, the second multiply
4182    can also be a low-part only multiply or even be completely left out.
4183    E.g. to calculate the remainder of a division by 3 with a 32 bit
4184    multiply, multiply with 0x55555556 and extract the upper two bits;
4185    the result is exact for inputs up to 0x1fffffff.
4186    The input range can be reduced by using cross-sum rules.
4187    For odd divisors >= 3, the following table gives right shift counts
4188    so that if a number is shifted by an integer multiple of the given
4189    amount, the remainder stays the same:
4190    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4191    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4192    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4193    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4194    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4195
4196    Cross-sum rules for even numbers can be derived by leaving as many bits
4197    to the right alone as the divisor has zeros to the right.
4198    E.g. if x is an unsigned 32 bit number:
4199    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4200    */
4201
4202 rtx
4203 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4204                rtx op0, rtx op1, rtx target, int unsignedp,
4205                enum optab_methods methods)
4206 {
4207   machine_mode compute_mode;
4208   rtx tquotient;
4209   rtx quotient = 0, remainder = 0;
4210   rtx_insn *last;
4211   rtx_insn *insn;
4212   optab optab1, optab2;
4213   int op1_is_constant, op1_is_pow2 = 0;
4214   int max_cost, extra_cost;
4215   static HOST_WIDE_INT last_div_const = 0;
4216   bool speed = optimize_insn_for_speed_p ();
4217
4218   op1_is_constant = CONST_INT_P (op1);
4219   if (op1_is_constant)
4220     {
4221       wide_int ext_op1 = rtx_mode_t (op1, mode);
4222       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4223                      || (! unsignedp
4224                          && wi::popcount (wi::neg (ext_op1)) == 1));
4225     }
4226
4227   /*
4228      This is the structure of expand_divmod:
4229
4230      First comes code to fix up the operands so we can perform the operations
4231      correctly and efficiently.
4232
4233      Second comes a switch statement with code specific for each rounding mode.
4234      For some special operands this code emits all RTL for the desired
4235      operation, for other cases, it generates only a quotient and stores it in
4236      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4237      to indicate that it has not done anything.
4238
4239      Last comes code that finishes the operation.  If QUOTIENT is set and
4240      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4241      QUOTIENT is not set, it is computed using trunc rounding.
4242
4243      We try to generate special code for division and remainder when OP1 is a
4244      constant.  If |OP1| = 2**n we can use shifts and some other fast
4245      operations.  For other values of OP1, we compute a carefully selected
4246      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4247      by m.
4248
4249      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4250      half of the product.  Different strategies for generating the product are
4251      implemented in expmed_mult_highpart.
4252
4253      If what we actually want is the remainder, we generate that by another
4254      by-constant multiplication and a subtraction.  */
4255
4256   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4257      code below will malfunction if we are, so check here and handle
4258      the special case if so.  */
4259   if (op1 == const1_rtx)
4260     return rem_flag ? const0_rtx : op0;
4261
4262     /* When dividing by -1, we could get an overflow.
4263      negv_optab can handle overflows.  */
4264   if (! unsignedp && op1 == constm1_rtx)
4265     {
4266       if (rem_flag)
4267         return const0_rtx;
4268       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4269                           ? negv_optab : neg_optab, op0, target, 0);
4270     }
4271
4272   if (target
4273       /* Don't use the function value register as a target
4274          since we have to read it as well as write it,
4275          and function-inlining gets confused by this.  */
4276       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4277           /* Don't clobber an operand while doing a multi-step calculation.  */
4278           || ((rem_flag || op1_is_constant)
4279               && (reg_mentioned_p (target, op0)
4280                   || (MEM_P (op0) && MEM_P (target))))
4281           || reg_mentioned_p (target, op1)
4282           || (MEM_P (op1) && MEM_P (target))))
4283     target = 0;
4284
4285   /* Get the mode in which to perform this computation.  Normally it will
4286      be MODE, but sometimes we can't do the desired operation in MODE.
4287      If so, pick a wider mode in which we can do the operation.  Convert
4288      to that mode at the start to avoid repeated conversions.
4289
4290      First see what operations we need.  These depend on the expression
4291      we are evaluating.  (We assume that divxx3 insns exist under the
4292      same conditions that modxx3 insns and that these insns don't normally
4293      fail.  If these assumptions are not correct, we may generate less
4294      efficient code in some cases.)
4295
4296      Then see if we find a mode in which we can open-code that operation
4297      (either a division, modulus, or shift).  Finally, check for the smallest
4298      mode for which we can do the operation with a library call.  */
4299
4300   /* We might want to refine this now that we have division-by-constant
4301      optimization.  Since expmed_mult_highpart tries so many variants, it is
4302      not straightforward to generalize this.  Maybe we should make an array
4303      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4304
4305   optab1 = (op1_is_pow2
4306             ? (unsignedp ? lshr_optab : ashr_optab)
4307             : (unsignedp ? udiv_optab : sdiv_optab));
4308   optab2 = (op1_is_pow2 ? optab1
4309             : (unsignedp ? udivmod_optab : sdivmod_optab));
4310
4311   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4312     {
4313       FOR_EACH_MODE_FROM (compute_mode, mode)
4314       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4315           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4316         break;
4317
4318       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4319         FOR_EACH_MODE_FROM (compute_mode, mode)
4320           if (optab_libfunc (optab1, compute_mode)
4321               || optab_libfunc (optab2, compute_mode))
4322             break;
4323     }
4324   else
4325     compute_mode = mode;
4326
4327   /* If we still couldn't find a mode, use MODE, but expand_binop will
4328      probably die.  */
4329   if (compute_mode == VOIDmode)
4330     compute_mode = mode;
4331
4332   if (target && GET_MODE (target) == compute_mode)
4333     tquotient = target;
4334   else
4335     tquotient = gen_reg_rtx (compute_mode);
4336
4337 #if 0
4338   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4339      (mode), and thereby get better code when OP1 is a constant.  Do that
4340      later.  It will require going over all usages of SIZE below.  */
4341   size = GET_MODE_BITSIZE (mode);
4342 #endif
4343
4344   /* Only deduct something for a REM if the last divide done was
4345      for a different constant.   Then set the constant of the last
4346      divide.  */
4347   max_cost = (unsignedp
4348               ? udiv_cost (speed, compute_mode)
4349               : sdiv_cost (speed, compute_mode));
4350   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4351                      && INTVAL (op1) == last_div_const))
4352     max_cost -= (mul_cost (speed, compute_mode)
4353                  + add_cost (speed, compute_mode));
4354
4355   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4356
4357   /* Now convert to the best mode to use.  */
4358   if (compute_mode != mode)
4359     {
4360       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4361       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4362
4363       /* convert_modes may have placed op1 into a register, so we
4364          must recompute the following.  */
4365       op1_is_constant = CONST_INT_P (op1);
4366       if (op1_is_constant)
4367         {
4368           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4369           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4370                          || (! unsignedp
4371                              && wi::popcount (wi::neg (ext_op1)) == 1));
4372         }
4373       else
4374         op1_is_pow2 = 0;
4375     }
4376
4377   /* If one of the operands is a volatile MEM, copy it into a register.  */
4378
4379   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4380     op0 = force_reg (compute_mode, op0);
4381   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4382     op1 = force_reg (compute_mode, op1);
4383
4384   /* If we need the remainder or if OP1 is constant, we need to
4385      put OP0 in a register in case it has any queued subexpressions.  */
4386   if (rem_flag || op1_is_constant)
4387     op0 = force_reg (compute_mode, op0);
4388
4389   last = get_last_insn ();
4390
4391   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4392   if (unsignedp)
4393     {
4394       if (code == FLOOR_DIV_EXPR)
4395         code = TRUNC_DIV_EXPR;
4396       if (code == FLOOR_MOD_EXPR)
4397         code = TRUNC_MOD_EXPR;
4398       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4399         code = TRUNC_DIV_EXPR;
4400     }
4401
4402   if (op1 != const0_rtx)
4403     switch (code)
4404       {
4405       case TRUNC_MOD_EXPR:
4406       case TRUNC_DIV_EXPR:
4407         if (op1_is_constant)
4408           {
4409             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4410             int size = GET_MODE_BITSIZE (int_mode);
4411             if (unsignedp)
4412               {
4413                 unsigned HOST_WIDE_INT mh, ml;
4414                 int pre_shift, post_shift;
4415                 int dummy;
4416                 wide_int wd = rtx_mode_t (op1, int_mode);
4417                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4418
4419                 if (wi::popcount (wd) == 1)
4420                   {
4421                     pre_shift = floor_log2 (d);
4422                     if (rem_flag)
4423                       {
4424                         unsigned HOST_WIDE_INT mask
4425                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4426                         remainder
4427                           = expand_binop (int_mode, and_optab, op0,
4428                                           gen_int_mode (mask, int_mode),
4429                                           remainder, 1, methods);
4430                         if (remainder)
4431                           return gen_lowpart (mode, remainder);
4432                       }
4433                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4434                                              pre_shift, tquotient, 1);
4435                   }
4436                 else if (size <= HOST_BITS_PER_WIDE_INT)
4437                   {
4438                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4439                       {
4440                         /* Most significant bit of divisor is set; emit an scc
4441                            insn.  */
4442                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4443                                                           int_mode, 1, 1);
4444                       }
4445                     else
4446                       {
4447                         /* Find a suitable multiplier and right shift count
4448                            instead of multiplying with D.  */
4449
4450                         mh = choose_multiplier (d, size, size,
4451                                                 &ml, &post_shift, &dummy);
4452
4453                         /* If the suggested multiplier is more than SIZE bits,
4454                            we can do better for even divisors, using an
4455                            initial right shift.  */
4456                         if (mh != 0 && (d & 1) == 0)
4457                           {
4458                             pre_shift = ctz_or_zero (d);
4459                             mh = choose_multiplier (d >> pre_shift, size,
4460                                                     size - pre_shift,
4461                                                     &ml, &post_shift, &dummy);
4462                             gcc_assert (!mh);
4463                           }
4464                         else
4465                           pre_shift = 0;
4466
4467                         if (mh != 0)
4468                           {
4469                             rtx t1, t2, t3, t4;
4470
4471                             if (post_shift - 1 >= BITS_PER_WORD)
4472                               goto fail1;
4473
4474                             extra_cost
4475                               = (shift_cost (speed, int_mode, post_shift - 1)
4476                                  + shift_cost (speed, int_mode, 1)
4477                                  + 2 * add_cost (speed, int_mode));
4478                             t1 = expmed_mult_highpart
4479                               (int_mode, op0, gen_int_mode (ml, int_mode),
4480                                NULL_RTX, 1, max_cost - extra_cost);
4481                             if (t1 == 0)
4482                               goto fail1;
4483                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4484                                                                op0, t1),
4485                                                 NULL_RTX);
4486                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4487                                                t2, 1, NULL_RTX, 1);
4488                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4489                                                               t1, t3),
4490                                                 NULL_RTX);
4491                             quotient = expand_shift
4492                               (RSHIFT_EXPR, int_mode, t4,
4493                                post_shift - 1, tquotient, 1);
4494                           }
4495                         else
4496                           {
4497                             rtx t1, t2;
4498
4499                             if (pre_shift >= BITS_PER_WORD
4500                                 || post_shift >= BITS_PER_WORD)
4501                               goto fail1;
4502
4503                             t1 = expand_shift
4504                               (RSHIFT_EXPR, int_mode, op0,
4505                                pre_shift, NULL_RTX, 1);
4506                             extra_cost
4507                               = (shift_cost (speed, int_mode, pre_shift)
4508                                  + shift_cost (speed, int_mode, post_shift));
4509                             t2 = expmed_mult_highpart
4510                               (int_mode, t1,
4511                                gen_int_mode (ml, int_mode),
4512                                NULL_RTX, 1, max_cost - extra_cost);
4513                             if (t2 == 0)
4514                               goto fail1;
4515                             quotient = expand_shift
4516                               (RSHIFT_EXPR, int_mode, t2,
4517                                post_shift, tquotient, 1);
4518                           }
4519                       }
4520                   }
4521                 else            /* Too wide mode to use tricky code */
4522                   break;
4523
4524                 insn = get_last_insn ();
4525                 if (insn != last)
4526                   set_dst_reg_note (insn, REG_EQUAL,
4527                                     gen_rtx_UDIV (int_mode, op0, op1),
4528                                     quotient);
4529               }
4530             else                /* TRUNC_DIV, signed */
4531               {
4532                 unsigned HOST_WIDE_INT ml;
4533                 int lgup, post_shift;
4534                 rtx mlr;
4535                 HOST_WIDE_INT d = INTVAL (op1);
4536                 unsigned HOST_WIDE_INT abs_d;
4537
4538                 /* Not prepared to handle division/remainder by
4539                    0xffffffffffffffff8000000000000000 etc.  */
4540                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4541                   break;
4542
4543                 /* Since d might be INT_MIN, we have to cast to
4544                    unsigned HOST_WIDE_INT before negating to avoid
4545                    undefined signed overflow.  */
4546                 abs_d = (d >= 0
4547                          ? (unsigned HOST_WIDE_INT) d
4548                          : - (unsigned HOST_WIDE_INT) d);
4549
4550                 /* n rem d = n rem -d */
4551                 if (rem_flag && d < 0)
4552                   {
4553                     d = abs_d;
4554                     op1 = gen_int_mode (abs_d, int_mode);
4555                   }
4556
4557                 if (d == 1)
4558                   quotient = op0;
4559                 else if (d == -1)
4560                   quotient = expand_unop (int_mode, neg_optab, op0,
4561                                           tquotient, 0);
4562                 else if (size <= HOST_BITS_PER_WIDE_INT
4563                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4564                   {
4565                     /* This case is not handled correctly below.  */
4566                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4567                                                 int_mode, 1, 1);
4568                     if (quotient == 0)
4569                       goto fail1;
4570                   }
4571                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4572                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4573                          && (rem_flag
4574                              ? smod_pow2_cheap (speed, int_mode)
4575                              : sdiv_pow2_cheap (speed, int_mode))
4576                          /* We assume that cheap metric is true if the
4577                             optab has an expander for this mode.  */
4578                          && ((optab_handler ((rem_flag ? smod_optab
4579                                               : sdiv_optab),
4580                                              int_mode)
4581                               != CODE_FOR_nothing)
4582                              || (optab_handler (sdivmod_optab, int_mode)
4583                                  != CODE_FOR_nothing)))
4584                   ;
4585                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4586                   {
4587                     if (rem_flag)
4588                       {
4589                         remainder = expand_smod_pow2 (int_mode, op0, d);
4590                         if (remainder)
4591                           return gen_lowpart (mode, remainder);
4592                       }
4593
4594                     if (sdiv_pow2_cheap (speed, int_mode)
4595                         && ((optab_handler (sdiv_optab, int_mode)
4596                              != CODE_FOR_nothing)
4597                             || (optab_handler (sdivmod_optab, int_mode)
4598                                 != CODE_FOR_nothing)))
4599                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4600                                                 int_mode, op0,
4601                                                 gen_int_mode (abs_d,
4602                                                               int_mode),
4603                                                 NULL_RTX, 0);
4604                     else
4605                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4606
4607                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4608                        negate the quotient.  */
4609                     if (d < 0)
4610                       {
4611                         insn = get_last_insn ();
4612                         if (insn != last
4613                             && abs_d < (HOST_WIDE_INT_1U
4614                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4615                           set_dst_reg_note (insn, REG_EQUAL,
4616                                             gen_rtx_DIV (int_mode, op0,
4617                                                          gen_int_mode
4618                                                            (abs_d,
4619                                                             int_mode)),
4620                                             quotient);
4621
4622                         quotient = expand_unop (int_mode, neg_optab,
4623                                                 quotient, quotient, 0);
4624                       }
4625                   }
4626                 else if (size <= HOST_BITS_PER_WIDE_INT)
4627                   {
4628                     choose_multiplier (abs_d, size, size - 1,
4629                                        &ml, &post_shift, &lgup);
4630                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4631                       {
4632                         rtx t1, t2, t3;
4633
4634                         if (post_shift >= BITS_PER_WORD
4635                             || size - 1 >= BITS_PER_WORD)
4636                           goto fail1;
4637
4638                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4639                                       + shift_cost (speed, int_mode, size - 1)
4640                                       + add_cost (speed, int_mode));
4641                         t1 = expmed_mult_highpart
4642                           (int_mode, op0, gen_int_mode (ml, int_mode),
4643                            NULL_RTX, 0, max_cost - extra_cost);
4644                         if (t1 == 0)
4645                           goto fail1;
4646                         t2 = expand_shift
4647                           (RSHIFT_EXPR, int_mode, t1,
4648                            post_shift, NULL_RTX, 0);
4649                         t3 = expand_shift
4650                           (RSHIFT_EXPR, int_mode, op0,
4651                            size - 1, NULL_RTX, 0);
4652                         if (d < 0)
4653                           quotient
4654                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4655                                              tquotient);
4656                         else
4657                           quotient
4658                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4659                                              tquotient);
4660                       }
4661                     else
4662                       {
4663                         rtx t1, t2, t3, t4;
4664
4665                         if (post_shift >= BITS_PER_WORD
4666                             || size - 1 >= BITS_PER_WORD)
4667                           goto fail1;
4668
4669                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4670                         mlr = gen_int_mode (ml, int_mode);
4671                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4672                                       + shift_cost (speed, int_mode, size - 1)
4673                                       + 2 * add_cost (speed, int_mode));
4674                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4675                                                    NULL_RTX, 0,
4676                                                    max_cost - extra_cost);
4677                         if (t1 == 0)
4678                           goto fail1;
4679                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4680                                             NULL_RTX);
4681                         t3 = expand_shift
4682                           (RSHIFT_EXPR, int_mode, t2,
4683                            post_shift, NULL_RTX, 0);
4684                         t4 = expand_shift
4685                           (RSHIFT_EXPR, int_mode, op0,
4686                            size - 1, NULL_RTX, 0);
4687                         if (d < 0)
4688                           quotient
4689                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4690                                              tquotient);
4691                         else
4692                           quotient
4693                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4694                                              tquotient);
4695                       }
4696                   }
4697                 else            /* Too wide mode to use tricky code */
4698                   break;
4699
4700                 insn = get_last_insn ();
4701                 if (insn != last)
4702                   set_dst_reg_note (insn, REG_EQUAL,
4703                                     gen_rtx_DIV (int_mode, op0, op1),
4704                                     quotient);
4705               }
4706             break;
4707           }
4708       fail1:
4709         delete_insns_since (last);
4710         break;
4711
4712       case FLOOR_DIV_EXPR:
4713       case FLOOR_MOD_EXPR:
4714       /* We will come here only for signed operations.  */
4715         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4716           {
4717             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4718             int size = GET_MODE_BITSIZE (int_mode);
4719             unsigned HOST_WIDE_INT mh, ml;
4720             int pre_shift, lgup, post_shift;
4721             HOST_WIDE_INT d = INTVAL (op1);
4722
4723             if (d > 0)
4724               {
4725                 /* We could just as easily deal with negative constants here,
4726                    but it does not seem worth the trouble for GCC 2.6.  */
4727                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4728                   {
4729                     pre_shift = floor_log2 (d);
4730                     if (rem_flag)
4731                       {
4732                         unsigned HOST_WIDE_INT mask
4733                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4734                         remainder = expand_binop
4735                           (int_mode, and_optab, op0,
4736                            gen_int_mode (mask, int_mode),
4737                            remainder, 0, methods);
4738                         if (remainder)
4739                           return gen_lowpart (mode, remainder);
4740                       }
4741                     quotient = expand_shift
4742                       (RSHIFT_EXPR, int_mode, op0,
4743                        pre_shift, tquotient, 0);
4744                   }
4745                 else
4746                   {
4747                     rtx t1, t2, t3, t4;
4748
4749                     mh = choose_multiplier (d, size, size - 1,
4750                                             &ml, &post_shift, &lgup);
4751                     gcc_assert (!mh);
4752
4753                     if (post_shift < BITS_PER_WORD
4754                         && size - 1 < BITS_PER_WORD)
4755                       {
4756                         t1 = expand_shift
4757                           (RSHIFT_EXPR, int_mode, op0,
4758                            size - 1, NULL_RTX, 0);
4759                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4760                                            NULL_RTX, 0, OPTAB_WIDEN);
4761                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4762                                       + shift_cost (speed, int_mode, size - 1)
4763                                       + 2 * add_cost (speed, int_mode));
4764                         t3 = expmed_mult_highpart
4765                           (int_mode, t2, gen_int_mode (ml, int_mode),
4766                            NULL_RTX, 1, max_cost - extra_cost);
4767                         if (t3 != 0)
4768                           {
4769                             t4 = expand_shift
4770                               (RSHIFT_EXPR, int_mode, t3,
4771                                post_shift, NULL_RTX, 1);
4772                             quotient = expand_binop (int_mode, xor_optab,
4773                                                      t4, t1, tquotient, 0,
4774                                                      OPTAB_WIDEN);
4775                           }
4776                       }
4777                   }
4778               }
4779             else
4780               {
4781                 rtx nsign, t1, t2, t3, t4;
4782                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4783                                                   op0, constm1_rtx), NULL_RTX);
4784                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4785                                    0, OPTAB_WIDEN);
4786                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4787                                       size - 1, NULL_RTX, 0);
4788                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4789                                     NULL_RTX);
4790                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4791                                     NULL_RTX, 0);
4792                 if (t4)
4793                   {
4794                     rtx t5;
4795                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4796                                       NULL_RTX, 0);
4797                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4798                                               tquotient);
4799                   }
4800               }
4801           }
4802
4803         if (quotient != 0)
4804           break;
4805         delete_insns_since (last);
4806
4807         /* Try using an instruction that produces both the quotient and
4808            remainder, using truncation.  We can easily compensate the quotient
4809            or remainder to get floor rounding, once we have the remainder.
4810            Notice that we compute also the final remainder value here,
4811            and return the result right away.  */
4812         if (target == 0 || GET_MODE (target) != compute_mode)
4813           target = gen_reg_rtx (compute_mode);
4814
4815         if (rem_flag)
4816           {
4817             remainder
4818               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4819             quotient = gen_reg_rtx (compute_mode);
4820           }
4821         else
4822           {
4823             quotient
4824               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4825             remainder = gen_reg_rtx (compute_mode);
4826           }
4827
4828         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4829                                  quotient, remainder, 0))
4830           {
4831             /* This could be computed with a branch-less sequence.
4832                Save that for later.  */
4833             rtx tem;
4834             rtx_code_label *label = gen_label_rtx ();
4835             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4836             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4837                                 NULL_RTX, 0, OPTAB_WIDEN);
4838             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4839             expand_dec (quotient, const1_rtx);
4840             expand_inc (remainder, op1);
4841             emit_label (label);
4842             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4843           }
4844
4845         /* No luck with division elimination or divmod.  Have to do it
4846            by conditionally adjusting op0 *and* the result.  */
4847         {
4848           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4849           rtx adjusted_op0;
4850           rtx tem;
4851
4852           quotient = gen_reg_rtx (compute_mode);
4853           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4854           label1 = gen_label_rtx ();
4855           label2 = gen_label_rtx ();
4856           label3 = gen_label_rtx ();
4857           label4 = gen_label_rtx ();
4858           label5 = gen_label_rtx ();
4859           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4860           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4861           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4862                               quotient, 0, methods);
4863           if (tem != quotient)
4864             emit_move_insn (quotient, tem);
4865           emit_jump_insn (targetm.gen_jump (label5));
4866           emit_barrier ();
4867           emit_label (label1);
4868           expand_inc (adjusted_op0, const1_rtx);
4869           emit_jump_insn (targetm.gen_jump (label4));
4870           emit_barrier ();
4871           emit_label (label2);
4872           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4873           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4874                               quotient, 0, methods);
4875           if (tem != quotient)
4876             emit_move_insn (quotient, tem);
4877           emit_jump_insn (targetm.gen_jump (label5));
4878           emit_barrier ();
4879           emit_label (label3);
4880           expand_dec (adjusted_op0, const1_rtx);
4881           emit_label (label4);
4882           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4883                               quotient, 0, methods);
4884           if (tem != quotient)
4885             emit_move_insn (quotient, tem);
4886           expand_dec (quotient, const1_rtx);
4887           emit_label (label5);
4888         }
4889         break;
4890
4891       case CEIL_DIV_EXPR:
4892       case CEIL_MOD_EXPR:
4893         if (unsignedp)
4894           {
4895             if (op1_is_constant
4896                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4897                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4898                     || INTVAL (op1) >= 0))
4899               {
4900                 scalar_int_mode int_mode
4901                   = as_a <scalar_int_mode> (compute_mode);
4902                 rtx t1, t2, t3;
4903                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4904                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4905                                    floor_log2 (d), tquotient, 1);
4906                 t2 = expand_binop (int_mode, and_optab, op0,
4907                                    gen_int_mode (d - 1, int_mode),
4908                                    NULL_RTX, 1, methods);
4909                 t3 = gen_reg_rtx (int_mode);
4910                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4911                 if (t3 == 0)
4912                   {
4913                     rtx_code_label *lab;
4914                     lab = gen_label_rtx ();
4915                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4916                     expand_inc (t1, const1_rtx);
4917                     emit_label (lab);
4918                     quotient = t1;
4919                   }
4920                 else
4921                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4922                                             tquotient);
4923                 break;
4924               }
4925
4926             /* Try using an instruction that produces both the quotient and
4927                remainder, using truncation.  We can easily compensate the
4928                quotient or remainder to get ceiling rounding, once we have the
4929                remainder.  Notice that we compute also the final remainder
4930                value here, and return the result right away.  */
4931             if (target == 0 || GET_MODE (target) != compute_mode)
4932               target = gen_reg_rtx (compute_mode);
4933
4934             if (rem_flag)
4935               {
4936                 remainder = (REG_P (target)
4937                              ? target : gen_reg_rtx (compute_mode));
4938                 quotient = gen_reg_rtx (compute_mode);
4939               }
4940             else
4941               {
4942                 quotient = (REG_P (target)
4943                             ? target : gen_reg_rtx (compute_mode));
4944                 remainder = gen_reg_rtx (compute_mode);
4945               }
4946
4947             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4948                                      remainder, 1))
4949               {
4950                 /* This could be computed with a branch-less sequence.
4951                    Save that for later.  */
4952                 rtx_code_label *label = gen_label_rtx ();
4953                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4954                                  compute_mode, label);
4955                 expand_inc (quotient, const1_rtx);
4956                 expand_dec (remainder, op1);
4957                 emit_label (label);
4958                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4959               }
4960
4961             /* No luck with division elimination or divmod.  Have to do it
4962                by conditionally adjusting op0 *and* the result.  */
4963             {
4964               rtx_code_label *label1, *label2;
4965               rtx adjusted_op0, tem;
4966
4967               quotient = gen_reg_rtx (compute_mode);
4968               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4969               label1 = gen_label_rtx ();
4970               label2 = gen_label_rtx ();
4971               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4972                                compute_mode, label1);
4973               emit_move_insn  (quotient, const0_rtx);
4974               emit_jump_insn (targetm.gen_jump (label2));
4975               emit_barrier ();
4976               emit_label (label1);
4977               expand_dec (adjusted_op0, const1_rtx);
4978               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4979                                   quotient, 1, methods);
4980               if (tem != quotient)
4981                 emit_move_insn (quotient, tem);
4982               expand_inc (quotient, const1_rtx);
4983               emit_label (label2);
4984             }
4985           }
4986         else /* signed */
4987           {
4988             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4989                 && INTVAL (op1) >= 0)
4990               {
4991                 /* This is extremely similar to the code for the unsigned case
4992                    above.  For 2.7 we should merge these variants, but for
4993                    2.6.1 I don't want to touch the code for unsigned since that
4994                    get used in C.  The signed case will only be used by other
4995                    languages (Ada).  */
4996
4997                 rtx t1, t2, t3;
4998                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4999                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5000                                    floor_log2 (d), tquotient, 0);
5001                 t2 = expand_binop (compute_mode, and_optab, op0,
5002                                    gen_int_mode (d - 1, compute_mode),
5003                                    NULL_RTX, 1, methods);
5004                 t3 = gen_reg_rtx (compute_mode);
5005                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5006                                       compute_mode, 1, 1);
5007                 if (t3 == 0)
5008                   {
5009                     rtx_code_label *lab;
5010                     lab = gen_label_rtx ();
5011                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5012                     expand_inc (t1, const1_rtx);
5013                     emit_label (lab);
5014                     quotient = t1;
5015                   }
5016                 else
5017                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5018                                                           t1, t3),
5019                                             tquotient);
5020                 break;
5021               }
5022
5023             /* Try using an instruction that produces both the quotient and
5024                remainder, using truncation.  We can easily compensate the
5025                quotient or remainder to get ceiling rounding, once we have the
5026                remainder.  Notice that we compute also the final remainder
5027                value here, and return the result right away.  */
5028             if (target == 0 || GET_MODE (target) != compute_mode)
5029               target = gen_reg_rtx (compute_mode);
5030             if (rem_flag)
5031               {
5032                 remainder= (REG_P (target)
5033                             ? target : gen_reg_rtx (compute_mode));
5034                 quotient = gen_reg_rtx (compute_mode);
5035               }
5036             else
5037               {
5038                 quotient = (REG_P (target)
5039                             ? target : gen_reg_rtx (compute_mode));
5040                 remainder = gen_reg_rtx (compute_mode);
5041               }
5042
5043             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5044                                      remainder, 0))
5045               {
5046                 /* This could be computed with a branch-less sequence.
5047                    Save that for later.  */
5048                 rtx tem;
5049                 rtx_code_label *label = gen_label_rtx ();
5050                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5051                                  compute_mode, label);
5052                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5053                                     NULL_RTX, 0, OPTAB_WIDEN);
5054                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5055                 expand_inc (quotient, const1_rtx);
5056                 expand_dec (remainder, op1);
5057                 emit_label (label);
5058                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5059               }
5060
5061             /* No luck with division elimination or divmod.  Have to do it
5062                by conditionally adjusting op0 *and* the result.  */
5063             {
5064               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5065               rtx adjusted_op0;
5066               rtx tem;
5067
5068               quotient = gen_reg_rtx (compute_mode);
5069               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5070               label1 = gen_label_rtx ();
5071               label2 = gen_label_rtx ();
5072               label3 = gen_label_rtx ();
5073               label4 = gen_label_rtx ();
5074               label5 = gen_label_rtx ();
5075               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5076               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5077                                compute_mode, label1);
5078               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5079                                   quotient, 0, methods);
5080               if (tem != quotient)
5081                 emit_move_insn (quotient, tem);
5082               emit_jump_insn (targetm.gen_jump (label5));
5083               emit_barrier ();
5084               emit_label (label1);
5085               expand_dec (adjusted_op0, const1_rtx);
5086               emit_jump_insn (targetm.gen_jump (label4));
5087               emit_barrier ();
5088               emit_label (label2);
5089               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5090                                compute_mode, label3);
5091               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5092                                   quotient, 0, methods);
5093               if (tem != quotient)
5094                 emit_move_insn (quotient, tem);
5095               emit_jump_insn (targetm.gen_jump (label5));
5096               emit_barrier ();
5097               emit_label (label3);
5098               expand_inc (adjusted_op0, const1_rtx);
5099               emit_label (label4);
5100               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5101                                   quotient, 0, methods);
5102               if (tem != quotient)
5103                 emit_move_insn (quotient, tem);
5104               expand_inc (quotient, const1_rtx);
5105               emit_label (label5);
5106             }
5107           }
5108         break;
5109
5110       case EXACT_DIV_EXPR:
5111         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5112           {
5113             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5114             int size = GET_MODE_BITSIZE (int_mode);
5115             HOST_WIDE_INT d = INTVAL (op1);
5116             unsigned HOST_WIDE_INT ml;
5117             int pre_shift;
5118             rtx t1;
5119
5120             pre_shift = ctz_or_zero (d);
5121             ml = invert_mod2n (d >> pre_shift, size);
5122             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5123                                pre_shift, NULL_RTX, unsignedp);
5124             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5125                                     NULL_RTX, 1);
5126
5127             insn = get_last_insn ();
5128             set_dst_reg_note (insn, REG_EQUAL,
5129                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5130                                               int_mode, op0, op1),
5131                               quotient);
5132           }
5133         break;
5134
5135       case ROUND_DIV_EXPR:
5136       case ROUND_MOD_EXPR:
5137         if (unsignedp)
5138           {
5139             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5140             rtx tem;
5141             rtx_code_label *label;
5142             label = gen_label_rtx ();
5143             quotient = gen_reg_rtx (int_mode);
5144             remainder = gen_reg_rtx (int_mode);
5145             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5146               {
5147                 rtx tem;
5148                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5149                                          quotient, 1, methods);
5150                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5151                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5152                                           remainder, 1, methods);
5153               }
5154             tem = plus_constant (int_mode, op1, -1);
5155             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5156             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5157             expand_inc (quotient, const1_rtx);
5158             expand_dec (remainder, op1);
5159             emit_label (label);
5160           }
5161         else
5162           {
5163             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5164             int size = GET_MODE_BITSIZE (int_mode);
5165             rtx abs_rem, abs_op1, tem, mask;
5166             rtx_code_label *label;
5167             label = gen_label_rtx ();
5168             quotient = gen_reg_rtx (int_mode);
5169             remainder = gen_reg_rtx (int_mode);
5170             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5171               {
5172                 rtx tem;
5173                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5174                                          quotient, 0, methods);
5175                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5176                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5177                                           remainder, 0, methods);
5178               }
5179             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5180             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5181             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5182                                 1, NULL_RTX, 1);
5183             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5184             tem = expand_binop (int_mode, xor_optab, op0, op1,
5185                                 NULL_RTX, 0, OPTAB_WIDEN);
5186             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5187                                  size - 1, NULL_RTX, 0);
5188             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5189                                 NULL_RTX, 0, OPTAB_WIDEN);
5190             tem = expand_binop (int_mode, sub_optab, tem, mask,
5191                                 NULL_RTX, 0, OPTAB_WIDEN);
5192             expand_inc (quotient, tem);
5193             tem = expand_binop (int_mode, xor_optab, mask, op1,
5194                                 NULL_RTX, 0, OPTAB_WIDEN);
5195             tem = expand_binop (int_mode, sub_optab, tem, mask,
5196                                 NULL_RTX, 0, OPTAB_WIDEN);
5197             expand_dec (remainder, tem);
5198             emit_label (label);
5199           }
5200         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5201
5202       default:
5203         gcc_unreachable ();
5204       }
5205
5206   if (quotient == 0)
5207     {
5208       if (target && GET_MODE (target) != compute_mode)
5209         target = 0;
5210
5211       if (rem_flag)
5212         {
5213           /* Try to produce the remainder without producing the quotient.
5214              If we seem to have a divmod pattern that does not require widening,
5215              don't try widening here.  We should really have a WIDEN argument
5216              to expand_twoval_binop, since what we'd really like to do here is
5217              1) try a mod insn in compute_mode
5218              2) try a divmod insn in compute_mode
5219              3) try a div insn in compute_mode and multiply-subtract to get
5220                 remainder
5221              4) try the same things with widening allowed.  */
5222           remainder
5223             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5224                                  op0, op1, target,
5225                                  unsignedp,
5226                                  ((optab_handler (optab2, compute_mode)
5227                                    != CODE_FOR_nothing)
5228                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5229           if (remainder == 0)
5230             {
5231               /* No luck there.  Can we do remainder and divide at once
5232                  without a library call?  */
5233               remainder = gen_reg_rtx (compute_mode);
5234               if (! expand_twoval_binop ((unsignedp
5235                                           ? udivmod_optab
5236                                           : sdivmod_optab),
5237                                          op0, op1,
5238                                          NULL_RTX, remainder, unsignedp))
5239                 remainder = 0;
5240             }
5241
5242           if (remainder)
5243             return gen_lowpart (mode, remainder);
5244         }
5245
5246       /* Produce the quotient.  Try a quotient insn, but not a library call.
5247          If we have a divmod in this mode, use it in preference to widening
5248          the div (for this test we assume it will not fail). Note that optab2
5249          is set to the one of the two optabs that the call below will use.  */
5250       quotient
5251         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5252                              op0, op1, rem_flag ? NULL_RTX : target,
5253                              unsignedp,
5254                              ((optab_handler (optab2, compute_mode)
5255                                != CODE_FOR_nothing)
5256                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5257
5258       if (quotient == 0)
5259         {
5260           /* No luck there.  Try a quotient-and-remainder insn,
5261              keeping the quotient alone.  */
5262           quotient = gen_reg_rtx (compute_mode);
5263           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5264                                      op0, op1,
5265                                      quotient, NULL_RTX, unsignedp))
5266             {
5267               quotient = 0;
5268               if (! rem_flag)
5269                 /* Still no luck.  If we are not computing the remainder,
5270                    use a library call for the quotient.  */
5271                 quotient = sign_expand_binop (compute_mode,
5272                                               udiv_optab, sdiv_optab,
5273                                               op0, op1, target,
5274                                               unsignedp, methods);
5275             }
5276         }
5277     }
5278
5279   if (rem_flag)
5280     {
5281       if (target && GET_MODE (target) != compute_mode)
5282         target = 0;
5283
5284       if (quotient == 0)
5285         {
5286           /* No divide instruction either.  Use library for remainder.  */
5287           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5288                                          op0, op1, target,
5289                                          unsignedp, methods);
5290           /* No remainder function.  Try a quotient-and-remainder
5291              function, keeping the remainder.  */
5292           if (!remainder
5293               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5294             {
5295               remainder = gen_reg_rtx (compute_mode);
5296               if (!expand_twoval_binop_libfunc
5297                   (unsignedp ? udivmod_optab : sdivmod_optab,
5298                    op0, op1,
5299                    NULL_RTX, remainder,
5300                    unsignedp ? UMOD : MOD))
5301                 remainder = NULL_RTX;
5302             }
5303         }
5304       else
5305         {
5306           /* We divided.  Now finish doing X - Y * (X / Y).  */
5307           remainder = expand_mult (compute_mode, quotient, op1,
5308                                    NULL_RTX, unsignedp);
5309           remainder = expand_binop (compute_mode, sub_optab, op0,
5310                                     remainder, target, unsignedp,
5311                                     methods);
5312         }
5313     }
5314
5315   if (methods != OPTAB_LIB_WIDEN
5316       && (rem_flag ? remainder : quotient) == NULL_RTX)
5317     return NULL_RTX;
5318
5319   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5320 }
5321 \f
5322 /* Return a tree node with data type TYPE, describing the value of X.
5323    Usually this is an VAR_DECL, if there is no obvious better choice.
5324    X may be an expression, however we only support those expressions
5325    generated by loop.c.  */
5326
5327 tree
5328 make_tree (tree type, rtx x)
5329 {
5330   tree t;
5331
5332   switch (GET_CODE (x))
5333     {
5334     case CONST_INT:
5335     case CONST_WIDE_INT:
5336       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5337       return t;
5338
5339     case CONST_DOUBLE:
5340       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5341       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5342         t = wide_int_to_tree (type,
5343                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5344                                                     HOST_BITS_PER_WIDE_INT * 2));
5345       else
5346         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5347
5348       return t;
5349
5350     case CONST_VECTOR:
5351       {
5352         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5353         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5354         tree itype = TREE_TYPE (type);
5355
5356         /* Build a tree with vector elements.  */
5357         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5358         unsigned int count = elts.encoded_nelts ();
5359         for (unsigned int i = 0; i < count; ++i)
5360           {
5361             rtx elt = CONST_VECTOR_ELT (x, i);
5362             elts.quick_push (make_tree (itype, elt));
5363           }
5364
5365         return elts.build ();
5366       }
5367
5368     case PLUS:
5369       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5370                           make_tree (type, XEXP (x, 1)));
5371
5372     case MINUS:
5373       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5374                           make_tree (type, XEXP (x, 1)));
5375
5376     case NEG:
5377       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5378
5379     case MULT:
5380       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5381                           make_tree (type, XEXP (x, 1)));
5382
5383     case ASHIFT:
5384       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5385                           make_tree (type, XEXP (x, 1)));
5386
5387     case LSHIFTRT:
5388       t = unsigned_type_for (type);
5389       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5390                                          make_tree (t, XEXP (x, 0)),
5391                                          make_tree (type, XEXP (x, 1))));
5392
5393     case ASHIFTRT:
5394       t = signed_type_for (type);
5395       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5396                                          make_tree (t, XEXP (x, 0)),
5397                                          make_tree (type, XEXP (x, 1))));
5398
5399     case DIV:
5400       if (TREE_CODE (type) != REAL_TYPE)
5401         t = signed_type_for (type);
5402       else
5403         t = type;
5404
5405       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5406                                          make_tree (t, XEXP (x, 0)),
5407                                          make_tree (t, XEXP (x, 1))));
5408     case UDIV:
5409       t = unsigned_type_for (type);
5410       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5411                                          make_tree (t, XEXP (x, 0)),
5412                                          make_tree (t, XEXP (x, 1))));
5413
5414     case SIGN_EXTEND:
5415     case ZERO_EXTEND:
5416       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5417                                           GET_CODE (x) == ZERO_EXTEND);
5418       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5419
5420     case CONST:
5421       return make_tree (type, XEXP (x, 0));
5422
5423     case SYMBOL_REF:
5424       t = SYMBOL_REF_DECL (x);
5425       if (t)
5426         return fold_convert (type, build_fold_addr_expr (t));
5427       /* fall through.  */
5428
5429     default:
5430       if (CONST_POLY_INT_P (x))
5431         return wide_int_to_tree (t, const_poly_int_value (x));
5432
5433       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5434
5435       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5436          address mode to pointer mode.  */
5437       if (POINTER_TYPE_P (type))
5438         x = convert_memory_address_addr_space
5439           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5440
5441       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5442          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5443       t->decl_with_rtl.rtl = x;
5444
5445       return t;
5446     }
5447 }
5448 \f
5449 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5450    and returning TARGET.
5451
5452    If TARGET is 0, a pseudo-register or constant is returned.  */
5453
5454 rtx
5455 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5456 {
5457   rtx tem = 0;
5458
5459   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5460     tem = simplify_binary_operation (AND, mode, op0, op1);
5461   if (tem == 0)
5462     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5463
5464   if (target == 0)
5465     target = tem;
5466   else if (tem != target)
5467     emit_move_insn (target, tem);
5468   return target;
5469 }
5470
5471 /* Helper function for emit_store_flag.  */
5472 rtx
5473 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5474              machine_mode mode, machine_mode compare_mode,
5475              int unsignedp, rtx x, rtx y, int normalizep,
5476              machine_mode target_mode)
5477 {
5478   class expand_operand ops[4];
5479   rtx op0, comparison, subtarget;
5480   rtx_insn *last;
5481   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5482   scalar_int_mode int_target_mode;
5483
5484   last = get_last_insn ();
5485   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5486   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5487   if (!x || !y)
5488     {
5489       delete_insns_since (last);
5490       return NULL_RTX;
5491     }
5492
5493   if (target_mode == VOIDmode)
5494     int_target_mode = result_mode;
5495   else
5496     int_target_mode = as_a <scalar_int_mode> (target_mode);
5497   if (!target)
5498     target = gen_reg_rtx (int_target_mode);
5499
5500   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5501
5502   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5503   create_fixed_operand (&ops[1], comparison);
5504   create_fixed_operand (&ops[2], x);
5505   create_fixed_operand (&ops[3], y);
5506   if (!maybe_expand_insn (icode, 4, ops))
5507     {
5508       delete_insns_since (last);
5509       return NULL_RTX;
5510     }
5511   subtarget = ops[0].value;
5512
5513   /* If we are converting to a wider mode, first convert to
5514      INT_TARGET_MODE, then normalize.  This produces better combining
5515      opportunities on machines that have a SIGN_EXTRACT when we are
5516      testing a single bit.  This mostly benefits the 68k.
5517
5518      If STORE_FLAG_VALUE does not have the sign bit set when
5519      interpreted in MODE, we can do this conversion as unsigned, which
5520      is usually more efficient.  */
5521   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5522     {
5523       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5524                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5525
5526       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5527       convert_move (target, subtarget, unsignedp);
5528
5529       op0 = target;
5530       result_mode = int_target_mode;
5531     }
5532   else
5533     op0 = subtarget;
5534
5535   /* If we want to keep subexpressions around, don't reuse our last
5536      target.  */
5537   if (optimize)
5538     subtarget = 0;
5539
5540   /* Now normalize to the proper value in MODE.  Sometimes we don't
5541      have to do anything.  */
5542   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5543     ;
5544   /* STORE_FLAG_VALUE might be the most negative number, so write
5545      the comparison this way to avoid a compiler-time warning.  */
5546   else if (- normalizep == STORE_FLAG_VALUE)
5547     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5548
5549   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5550      it hard to use a value of just the sign bit due to ANSI integer
5551      constant typing rules.  */
5552   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5553     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5554                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5555                         normalizep == 1);
5556   else
5557     {
5558       gcc_assert (STORE_FLAG_VALUE & 1);
5559
5560       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5561       if (normalizep == -1)
5562         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5563     }
5564
5565   /* If we were converting to a smaller mode, do the conversion now.  */
5566   if (int_target_mode != result_mode)
5567     {
5568       convert_move (target, op0, 0);
5569       return target;
5570     }
5571   else
5572     return op0;
5573 }
5574
5575
5576 /* A subroutine of emit_store_flag only including "tricks" that do not
5577    need a recursive call.  These are kept separate to avoid infinite
5578    loops.  */
5579
5580 static rtx
5581 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5582                    machine_mode mode, int unsignedp, int normalizep,
5583                    machine_mode target_mode)
5584 {
5585   rtx subtarget;
5586   enum insn_code icode;
5587   machine_mode compare_mode;
5588   enum mode_class mclass;
5589   enum rtx_code scode;
5590
5591   if (unsignedp)
5592     code = unsigned_condition (code);
5593   scode = swap_condition (code);
5594
5595   /* If one operand is constant, make it the second one.  Only do this
5596      if the other operand is not constant as well.  */
5597
5598   if (swap_commutative_operands_p (op0, op1))
5599     {
5600       std::swap (op0, op1);
5601       code = swap_condition (code);
5602     }
5603
5604   if (mode == VOIDmode)
5605     mode = GET_MODE (op0);
5606
5607   if (CONST_SCALAR_INT_P (op1))
5608     canonicalize_comparison (mode, &code, &op1);
5609
5610   /* For some comparisons with 1 and -1, we can convert this to
5611      comparisons with zero.  This will often produce more opportunities for
5612      store-flag insns.  */
5613
5614   switch (code)
5615     {
5616     case LT:
5617       if (op1 == const1_rtx)
5618         op1 = const0_rtx, code = LE;
5619       break;
5620     case LE:
5621       if (op1 == constm1_rtx)
5622         op1 = const0_rtx, code = LT;
5623       break;
5624     case GE:
5625       if (op1 == const1_rtx)
5626         op1 = const0_rtx, code = GT;
5627       break;
5628     case GT:
5629       if (op1 == constm1_rtx)
5630         op1 = const0_rtx, code = GE;
5631       break;
5632     case GEU:
5633       if (op1 == const1_rtx)
5634         op1 = const0_rtx, code = NE;
5635       break;
5636     case LTU:
5637       if (op1 == const1_rtx)
5638         op1 = const0_rtx, code = EQ;
5639       break;
5640     default:
5641       break;
5642     }
5643
5644   /* If we are comparing a double-word integer with zero or -1, we can
5645      convert the comparison into one involving a single word.  */
5646   scalar_int_mode int_mode;
5647   if (is_int_mode (mode, &int_mode)
5648       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5649       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5650     {
5651       rtx tem;
5652       if ((code == EQ || code == NE)
5653           && (op1 == const0_rtx || op1 == constm1_rtx))
5654         {
5655           rtx op00, op01;
5656
5657           /* Do a logical OR or AND of the two words and compare the
5658              result.  */
5659           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5660           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5661           tem = expand_binop (word_mode,
5662                               op1 == const0_rtx ? ior_optab : and_optab,
5663                               op00, op01, NULL_RTX, unsignedp,
5664                               OPTAB_DIRECT);
5665
5666           if (tem != 0)
5667             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5668                                    unsignedp, normalizep);
5669         }
5670       else if ((code == LT || code == GE) && op1 == const0_rtx)
5671         {
5672           rtx op0h;
5673
5674           /* If testing the sign bit, can just test on high word.  */
5675           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5676                                       subreg_highpart_offset (word_mode,
5677                                                               int_mode));
5678           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5679                                  unsignedp, normalizep);
5680         }
5681       else
5682         tem = NULL_RTX;
5683
5684       if (tem)
5685         {
5686           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5687             return tem;
5688           if (!target)
5689             target = gen_reg_rtx (target_mode);
5690
5691           convert_move (target, tem,
5692                         !val_signbit_known_set_p (word_mode,
5693                                                   (normalizep ? normalizep
5694                                                    : STORE_FLAG_VALUE)));
5695           return target;
5696         }
5697     }
5698
5699   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5700      complement of A (for GE) and shifting the sign bit to the low bit.  */
5701   if (op1 == const0_rtx && (code == LT || code == GE)
5702       && is_int_mode (mode, &int_mode)
5703       && (normalizep || STORE_FLAG_VALUE == 1
5704           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5705     {
5706       scalar_int_mode int_target_mode;
5707       subtarget = target;
5708
5709       if (!target)
5710         int_target_mode = int_mode;
5711       else
5712         {
5713           /* If the result is to be wider than OP0, it is best to convert it
5714              first.  If it is to be narrower, it is *incorrect* to convert it
5715              first.  */
5716           int_target_mode = as_a <scalar_int_mode> (target_mode);
5717           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5718             {
5719               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5720               int_mode = int_target_mode;
5721             }
5722         }
5723
5724       if (int_target_mode != int_mode)
5725         subtarget = 0;
5726
5727       if (code == GE)
5728         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5729                            ((STORE_FLAG_VALUE == 1 || normalizep)
5730                             ? 0 : subtarget), 0);
5731
5732       if (STORE_FLAG_VALUE == 1 || normalizep)
5733         /* If we are supposed to produce a 0/1 value, we want to do
5734            a logical shift from the sign bit to the low-order bit; for
5735            a -1/0 value, we do an arithmetic shift.  */
5736         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5737                             GET_MODE_BITSIZE (int_mode) - 1,
5738                             subtarget, normalizep != -1);
5739
5740       if (int_mode != int_target_mode)
5741         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5742
5743       return op0;
5744     }
5745
5746   mclass = GET_MODE_CLASS (mode);
5747   FOR_EACH_MODE_FROM (compare_mode, mode)
5748     {
5749      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5750      icode = optab_handler (cstore_optab, optab_mode);
5751      if (icode != CODE_FOR_nothing)
5752         {
5753           do_pending_stack_adjust ();
5754           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5755                                  unsignedp, op0, op1, normalizep, target_mode);
5756           if (tem)
5757             return tem;
5758
5759           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5760             {
5761               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5762                                  unsignedp, op1, op0, normalizep, target_mode);
5763               if (tem)
5764                 return tem;
5765             }
5766           break;
5767         }
5768     }
5769
5770   return 0;
5771 }
5772
5773 /* Subroutine of emit_store_flag that handles cases in which the operands
5774    are scalar integers.  SUBTARGET is the target to use for temporary
5775    operations and TRUEVAL is the value to store when the condition is
5776    true.  All other arguments are as for emit_store_flag.  */
5777
5778 rtx
5779 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5780                      rtx op1, scalar_int_mode mode, int unsignedp,
5781                      int normalizep, rtx trueval)
5782 {
5783   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5784   rtx_insn *last = get_last_insn ();
5785
5786   /* If this is an equality comparison of integers, we can try to exclusive-or
5787      (or subtract) the two operands and use a recursive call to try the
5788      comparison with zero.  Don't do any of these cases if branches are
5789      very cheap.  */
5790
5791   if ((code == EQ || code == NE) && op1 != const0_rtx)
5792     {
5793       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5794                               OPTAB_WIDEN);
5795
5796       if (tem == 0)
5797         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5798                             OPTAB_WIDEN);
5799       if (tem != 0)
5800         tem = emit_store_flag (target, code, tem, const0_rtx,
5801                                mode, unsignedp, normalizep);
5802       if (tem != 0)
5803         return tem;
5804
5805       delete_insns_since (last);
5806     }
5807
5808   /* For integer comparisons, try the reverse comparison.  However, for
5809      small X and if we'd have anyway to extend, implementing "X != 0"
5810      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5811   rtx_code rcode = reverse_condition (code);
5812   if (can_compare_p (rcode, mode, ccp_store_flag)
5813       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5814             && code == NE
5815             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5816             && op1 == const0_rtx))
5817     {
5818       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5819                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5820
5821       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5822       if (want_add
5823           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5824                        optimize_insn_for_speed_p ()) == 0)
5825         {
5826           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5827                                        STORE_FLAG_VALUE, target_mode);
5828           if (tem != 0)
5829             tem = expand_binop (target_mode, add_optab, tem,
5830                                 gen_int_mode (normalizep, target_mode),
5831                                 target, 0, OPTAB_WIDEN);
5832           if (tem != 0)
5833             return tem;
5834         }
5835       else if (!want_add
5836                && rtx_cost (trueval, mode, XOR, 1,
5837                             optimize_insn_for_speed_p ()) == 0)
5838         {
5839           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5840                                        normalizep, target_mode);
5841           if (tem != 0)
5842             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5843                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5844           if (tem != 0)
5845             return tem;
5846         }
5847
5848       delete_insns_since (last);
5849     }
5850
5851   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5852      the constant zero.  Reject all other comparisons at this point.  Only
5853      do LE and GT if branches are expensive since they are expensive on
5854      2-operand machines.  */
5855
5856   if (op1 != const0_rtx
5857       || (code != EQ && code != NE
5858           && (BRANCH_COST (optimize_insn_for_speed_p (),
5859                            false) <= 1 || (code != LE && code != GT))))
5860     return 0;
5861
5862   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5863      do the necessary operation below.  */
5864
5865   rtx tem = 0;
5866
5867   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5868      the sign bit set.  */
5869
5870   if (code == LE)
5871     {
5872       /* This is destructive, so SUBTARGET can't be OP0.  */
5873       if (rtx_equal_p (subtarget, op0))
5874         subtarget = 0;
5875
5876       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5877                           OPTAB_WIDEN);
5878       if (tem)
5879         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5880                             OPTAB_WIDEN);
5881     }
5882
5883   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5884      number of bits in the mode of OP0, minus one.  */
5885
5886   if (code == GT)
5887     {
5888       if (rtx_equal_p (subtarget, op0))
5889         subtarget = 0;
5890
5891       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5892                                 GET_MODE_BITSIZE (mode) - 1,
5893                                 subtarget, 0);
5894       if (tem)
5895         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5896                             OPTAB_WIDEN);
5897     }
5898
5899   if (code == EQ || code == NE)
5900     {
5901       /* For EQ or NE, one way to do the comparison is to apply an operation
5902          that converts the operand into a positive number if it is nonzero
5903          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5904          for NE we negate.  This puts the result in the sign bit.  Then we
5905          normalize with a shift, if needed.
5906
5907          Two operations that can do the above actions are ABS and FFS, so try
5908          them.  If that doesn't work, and MODE is smaller than a full word,
5909          we can use zero-extension to the wider mode (an unsigned conversion)
5910          as the operation.  */
5911
5912       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5913          that is compensated by the subsequent overflow when subtracting
5914          one / negating.  */
5915
5916       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5917         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5918       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5919         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5920       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5921         {
5922           tem = convert_modes (word_mode, mode, op0, 1);
5923           mode = word_mode;
5924         }
5925
5926       if (tem != 0)
5927         {
5928           if (code == EQ)
5929             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5930                                 0, OPTAB_WIDEN);
5931           else
5932             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5933         }
5934
5935       /* If we couldn't do it that way, for NE we can "or" the two's complement
5936          of the value with itself.  For EQ, we take the one's complement of
5937          that "or", which is an extra insn, so we only handle EQ if branches
5938          are expensive.  */
5939
5940       if (tem == 0
5941           && (code == NE
5942               || BRANCH_COST (optimize_insn_for_speed_p (),
5943                               false) > 1))
5944         {
5945           if (rtx_equal_p (subtarget, op0))
5946             subtarget = 0;
5947
5948           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5949           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5950                               OPTAB_WIDEN);
5951
5952           if (tem && code == EQ)
5953             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5954         }
5955     }
5956
5957   if (tem && normalizep)
5958     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5959                               GET_MODE_BITSIZE (mode) - 1,
5960                               subtarget, normalizep == 1);
5961
5962   if (tem)
5963     {
5964       if (!target)
5965         ;
5966       else if (GET_MODE (tem) != target_mode)
5967         {
5968           convert_move (target, tem, 0);
5969           tem = target;
5970         }
5971       else if (!subtarget)
5972         {
5973           emit_move_insn (target, tem);
5974           tem = target;
5975         }
5976     }
5977   else
5978     delete_insns_since (last);
5979
5980   return tem;
5981 }
5982
5983 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5984    and storing in TARGET.  Normally return TARGET.
5985    Return 0 if that cannot be done.
5986
5987    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5988    it is VOIDmode, they cannot both be CONST_INT.
5989
5990    UNSIGNEDP is for the case where we have to widen the operands
5991    to perform the operation.  It says to use zero-extension.
5992
5993    NORMALIZEP is 1 if we should convert the result to be either zero
5994    or one.  Normalize is -1 if we should convert the result to be
5995    either zero or -1.  If NORMALIZEP is zero, the result will be left
5996    "raw" out of the scc insn.  */
5997
5998 rtx
5999 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6000                  machine_mode mode, int unsignedp, int normalizep)
6001 {
6002   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6003   enum rtx_code rcode;
6004   rtx subtarget;
6005   rtx tem, trueval;
6006   rtx_insn *last;
6007
6008   /* If we compare constants, we shouldn't use a store-flag operation,
6009      but a constant load.  We can get there via the vanilla route that
6010      usually generates a compare-branch sequence, but will in this case
6011      fold the comparison to a constant, and thus elide the branch.  */
6012   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6013     return NULL_RTX;
6014
6015   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6016                            target_mode);
6017   if (tem)
6018     return tem;
6019
6020   /* If we reached here, we can't do this with a scc insn, however there
6021      are some comparisons that can be done in other ways.  Don't do any
6022      of these cases if branches are very cheap.  */
6023   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6024     return 0;
6025
6026   /* See what we need to return.  We can only return a 1, -1, or the
6027      sign bit.  */
6028
6029   if (normalizep == 0)
6030     {
6031       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6032         normalizep = STORE_FLAG_VALUE;
6033
6034       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6035         ;
6036       else
6037         return 0;
6038     }
6039
6040   last = get_last_insn ();
6041
6042   /* If optimizing, use different pseudo registers for each insn, instead
6043      of reusing the same pseudo.  This leads to better CSE, but slows
6044      down the compiler, since there are more pseudos.  */
6045   subtarget = (!optimize
6046                && (target_mode == mode)) ? target : NULL_RTX;
6047   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6048
6049   /* For floating-point comparisons, try the reverse comparison or try
6050      changing the "orderedness" of the comparison.  */
6051   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6052     {
6053       enum rtx_code first_code;
6054       bool and_them;
6055
6056       rcode = reverse_condition_maybe_unordered (code);
6057       if (can_compare_p (rcode, mode, ccp_store_flag)
6058           && (code == ORDERED || code == UNORDERED
6059               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6060               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6061         {
6062           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6063                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6064
6065           /* For the reverse comparison, use either an addition or a XOR.  */
6066           if (want_add
6067               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6068                            optimize_insn_for_speed_p ()) == 0)
6069             {
6070               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6071                                        STORE_FLAG_VALUE, target_mode);
6072               if (tem)
6073                 return expand_binop (target_mode, add_optab, tem,
6074                                      gen_int_mode (normalizep, target_mode),
6075                                      target, 0, OPTAB_WIDEN);
6076             }
6077           else if (!want_add
6078                    && rtx_cost (trueval, mode, XOR, 1,
6079                                 optimize_insn_for_speed_p ()) == 0)
6080             {
6081               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6082                                        normalizep, target_mode);
6083               if (tem)
6084                 return expand_binop (target_mode, xor_optab, tem, trueval,
6085                                      target, INTVAL (trueval) >= 0,
6086                                      OPTAB_WIDEN);
6087             }
6088         }
6089
6090       delete_insns_since (last);
6091
6092       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6093       if (code == ORDERED || code == UNORDERED)
6094         return 0;
6095
6096       and_them = split_comparison (code, mode, &first_code, &code);
6097
6098       /* If there are no NaNs, the first comparison should always fall through.
6099          Effectively change the comparison to the other one.  */
6100       if (!HONOR_NANS (mode))
6101         {
6102           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6103           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6104                                     target_mode);
6105         }
6106
6107       if (!HAVE_conditional_move)
6108         return 0;
6109
6110       /* Do not turn a trapping comparison into a non-trapping one.  */
6111       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6112           && flag_trapping_math)
6113         return 0;
6114
6115       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6116          conditional move.  */
6117       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6118                                normalizep, target_mode);
6119       if (tem == 0)
6120         return 0;
6121
6122       if (and_them)
6123         tem = emit_conditional_move (target, code, op0, op1, mode,
6124                                      tem, const0_rtx, GET_MODE (tem), 0);
6125       else
6126         tem = emit_conditional_move (target, code, op0, op1, mode,
6127                                      trueval, tem, GET_MODE (tem), 0);
6128
6129       if (tem == 0)
6130         delete_insns_since (last);
6131       return tem;
6132     }
6133
6134   /* The remaining tricks only apply to integer comparisons.  */
6135
6136   scalar_int_mode int_mode;
6137   if (is_int_mode (mode, &int_mode))
6138     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6139                                 unsignedp, normalizep, trueval);
6140
6141   return 0;
6142 }
6143
6144 /* Like emit_store_flag, but always succeeds.  */
6145
6146 rtx
6147 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6148                        machine_mode mode, int unsignedp, int normalizep)
6149 {
6150   rtx tem;
6151   rtx_code_label *label;
6152   rtx trueval, falseval;
6153
6154   /* First see if emit_store_flag can do the job.  */
6155   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6156   if (tem != 0)
6157     return tem;
6158
6159   /* If one operand is constant, make it the second one.  Only do this
6160      if the other operand is not constant as well.  */
6161   if (swap_commutative_operands_p (op0, op1))
6162     {
6163       std::swap (op0, op1);
6164       code = swap_condition (code);
6165     }
6166
6167   if (mode == VOIDmode)
6168     mode = GET_MODE (op0);
6169
6170   if (!target)
6171     target = gen_reg_rtx (word_mode);
6172
6173   /* If this failed, we have to do this with set/compare/jump/set code.
6174      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6175   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6176   if (code == NE
6177       && GET_MODE_CLASS (mode) == MODE_INT
6178       && REG_P (target)
6179       && op0 == target
6180       && op1 == const0_rtx)
6181     {
6182       label = gen_label_rtx ();
6183       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6184                                NULL_RTX, NULL, label,
6185                                profile_probability::uninitialized ());
6186       emit_move_insn (target, trueval);
6187       emit_label (label);
6188       return target;
6189     }
6190
6191   if (!REG_P (target)
6192       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6193     target = gen_reg_rtx (GET_MODE (target));
6194
6195   /* Jump in the right direction if the target cannot implement CODE
6196      but can jump on its reverse condition.  */
6197   falseval = const0_rtx;
6198   if (! can_compare_p (code, mode, ccp_jump)
6199       && (! FLOAT_MODE_P (mode)
6200           || code == ORDERED || code == UNORDERED
6201           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6202           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6203     {
6204       enum rtx_code rcode;
6205       if (FLOAT_MODE_P (mode))
6206         rcode = reverse_condition_maybe_unordered (code);
6207       else
6208         rcode = reverse_condition (code);
6209
6210       /* Canonicalize to UNORDERED for the libcall.  */
6211       if (can_compare_p (rcode, mode, ccp_jump)
6212           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6213         {
6214           falseval = trueval;
6215           trueval = const0_rtx;
6216           code = rcode;
6217         }
6218     }
6219
6220   emit_move_insn (target, trueval);
6221   label = gen_label_rtx ();
6222   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6223                            label, profile_probability::uninitialized ());
6224
6225   emit_move_insn (target, falseval);
6226   emit_label (label);
6227
6228   return target;
6229 }
6230
6231 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6232    and exclusive ranges in order to create an equivalent comparison.  See
6233    canonicalize_cmp_for_target for the possible cases.  */
6234
6235 static enum rtx_code
6236 equivalent_cmp_code (enum rtx_code code)
6237 {
6238   switch (code)
6239     {
6240     case GT:
6241       return GE;
6242     case GE:
6243       return GT;
6244     case LT:
6245       return LE;
6246     case LE:
6247       return LT;
6248     case GTU:
6249       return GEU;
6250     case GEU:
6251       return GTU;
6252     case LTU:
6253       return LEU;
6254     case LEU:
6255       return LTU;
6256
6257     default:
6258       return code;
6259     }
6260 }
6261
6262 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6263    purpose of this is to end up with an immediate which can be loaded into a
6264    register in fewer moves, if possible.
6265
6266    For each integer comparison there exists an equivalent choice:
6267      i)   a >  b or a >= b + 1
6268      ii)  a <= b or a <  b + 1
6269      iii) a >= b or a >  b - 1
6270      iv)  a <  b or a <= b - 1
6271
6272    MODE is the mode of the first operand.
6273    CODE points to the comparison code.
6274    IMM points to the rtx containing the immediate.  *IMM must satisfy
6275    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6276    on exit.  */
6277
6278 void
6279 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6280 {
6281   if (!SCALAR_INT_MODE_P (mode))
6282     return;
6283
6284   int to_add = 0;
6285   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6286
6287   /* Extract the immediate value from the rtx.  */
6288   wide_int imm_val = rtx_mode_t (*imm, mode);
6289
6290   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6291     to_add = 1;
6292   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6293     to_add = -1;
6294   else
6295     return;
6296
6297   /* Check for overflow/underflow in the case of signed values and
6298      wrapping around in the case of unsigned values.  If any occur
6299      cancel the optimization.  */
6300   wi::overflow_type overflow = wi::OVF_NONE;
6301   wide_int imm_modif;
6302
6303   if (to_add == 1)
6304     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6305   else
6306     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6307
6308   if (overflow)
6309     return;
6310
6311   /* The following creates a pseudo; if we cannot do that, bail out.  */
6312   if (!can_create_pseudo_p ())
6313     return;
6314
6315   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6316   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6317
6318   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6319   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6320
6321   /* Update the immediate and the code.  */
6322   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6323     {
6324       *code = equivalent_cmp_code (*code);
6325       *imm = new_imm;
6326     }
6327 }
6328
6329
6330 \f
6331 /* Perform possibly multi-word comparison and conditional jump to LABEL
6332    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6333    now a thin wrapper around do_compare_rtx_and_jump.  */
6334
6335 static void
6336 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6337                  rtx_code_label *label)
6338 {
6339   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6340   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6341                            NULL, label, profile_probability::uninitialized ());
6342 }