gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2020 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "expmed.h"
  35 #include "optabs.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158 }
 159
 160 static void
 161 init_expmed_one_mode (struct init_expmed_rtl *all,
 162                       machine_mode mode, int speed)
 163 {
 164   int m, n, mode_bitsize;
 165   machine_mode mode_from;
 166
 167   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 168
 169   PUT_MODE (all->reg, mode);
 170   PUT_MODE (all->plus, mode);
 171   PUT_MODE (all->neg, mode);
 172   PUT_MODE (all->mult, mode);
 173   PUT_MODE (all->sdiv, mode);
 174   PUT_MODE (all->udiv, mode);
 175   PUT_MODE (all->sdiv_32, mode);
 176   PUT_MODE (all->smod_32, mode);
 177   PUT_MODE (all->wide_trunc, mode);
 178   PUT_MODE (all->shift, mode);
 179   PUT_MODE (all->shift_mult, mode);
 180   PUT_MODE (all->shift_add, mode);
 181   PUT_MODE (all->shift_sub0, mode);
 182   PUT_MODE (all->shift_sub1, mode);
 183   PUT_MODE (all->zext, mode);
 184   PUT_MODE (all->trunc, mode);
 185
 186   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 187   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 188   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 189   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 190   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 191
 192   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 193                                      <= 2 * add_cost (speed, mode)));
 194   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 195                                      <= 4 * add_cost (speed, mode)));
 196
 197   set_shift_cost (speed, mode, 0, 0);
 198   {
 199     int cost = add_cost (speed, mode);
 200     set_shiftadd_cost (speed, mode, 0, cost);
 201     set_shiftsub0_cost (speed, mode, 0, cost);
 202     set_shiftsub1_cost (speed, mode, 0, cost);
 203   }
 204
 205   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 206   for (m = 1; m < n; m++)
 207     {
 208       XEXP (all->shift, 1) = all->cint[m];
 209       XEXP (all->shift_mult, 1) = all->pow2[m];
 210
 211       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 212       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 213                                                        speed));
 214       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 215                                                         speed));
 216       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 217                                                         speed));
 218     }
 219
 220   scalar_int_mode int_mode_to;
 221   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 222     {
 223       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 224            mode_from = (machine_mode)(mode_from + 1))
 225         init_expmed_one_conv (all, int_mode_to,
 226                               as_a <scalar_int_mode> (mode_from), speed);
 227
 228       scalar_int_mode wider_mode;
 229       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 230           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 231         {
 232           PUT_MODE (all->zext, wider_mode);
 233           PUT_MODE (all->wide_mult, wider_mode);
 234           PUT_MODE (all->wide_lshr, wider_mode);
 235           XEXP (all->wide_lshr, 1)
 236             = gen_int_shift_amount (wider_mode, mode_bitsize);
 237
 238           set_mul_widen_cost (speed, wider_mode,
 239                               set_src_cost (all->wide_mult, wider_mode, speed));
 240           set_mul_highpart_cost (speed, int_mode_to,
 241                                  set_src_cost (all->wide_trunc,
 242                                                int_mode_to, speed));
 243         }
 244     }
 245 }
 246
 247 void
 248 init_expmed (void)
 249 {
 250   struct init_expmed_rtl all;
 251   machine_mode mode = QImode;
 252   int m, speed;
 253
 254   memset (&all, 0, sizeof all);
 255   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 256     {
 257       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 258       all.cint[m] = GEN_INT (m);
 259     }
 260
 261   /* Avoid using hard regs in ways which may be unsupported.  */
 262   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 263   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 264   all.neg = gen_rtx_NEG (mode, all.reg);
 265   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 266   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 267   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 268   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 269   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 270   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 271   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 272   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 273   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 274   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 275   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 276   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 277   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 278   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 279   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 280
 281   for (speed = 0; speed < 2; speed++)
 282     {
 283       crtl->maybe_hot_insn_p = speed;
 284       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 285
 286       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 287            mode = (machine_mode)(mode + 1))
 288         init_expmed_one_mode (&all, mode, speed);
 289
 290       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 291         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 292              mode = (machine_mode)(mode + 1))
 293           init_expmed_one_mode (&all, mode, speed);
 294
 295       if (MIN_MODE_VECTOR_INT != VOIDmode)
 296         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 297              mode = (machine_mode)(mode + 1))
 298           init_expmed_one_mode (&all, mode, speed);
 299     }
 300
 301   if (alg_hash_used_p ())
 302     {
 303       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 304       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 305     }
 306   else
 307     set_alg_hash_used_p (true);
 308   default_rtl_profile ();
 309
 310   ggc_free (all.trunc);
 311   ggc_free (all.shift_sub1);
 312   ggc_free (all.shift_sub0);
 313   ggc_free (all.shift_add);
 314   ggc_free (all.shift_mult);
 315   ggc_free (all.shift);
 316   ggc_free (all.wide_trunc);
 317   ggc_free (all.wide_lshr);
 318   ggc_free (all.wide_mult);
 319   ggc_free (all.zext);
 320   ggc_free (all.smod_32);
 321   ggc_free (all.sdiv_32);
 322   ggc_free (all.udiv);
 323   ggc_free (all.sdiv);
 324   ggc_free (all.mult);
 325   ggc_free (all.neg);
 326   ggc_free (all.plus);
 327   ggc_free (all.reg);
 328 }
 329
 330 /* Return an rtx representing minus the value of X.
 331    MODE is the intended mode of the result,
 332    useful if X is a CONST_INT.  */
 333
 334 rtx
 335 negate_rtx (machine_mode mode, rtx x)
 336 {
 337   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 338
 339   if (result == 0)
 340     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 341
 342   return result;
 343 }
 344
 345 /* Whether reverse storage order is supported on the target.  */
 346 static int reverse_storage_order_supported = -1;
 347
 348 /* Check whether reverse storage order is supported on the target.  */
 349
 350 static void
 351 check_reverse_storage_order_support (void)
 352 {
 353   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 354     {
 355       reverse_storage_order_supported = 0;
 356       sorry ("reverse scalar storage order");
 357     }
 358   else
 359     reverse_storage_order_supported = 1;
 360 }
 361
 362 /* Whether reverse FP storage order is supported on the target.  */
 363 static int reverse_float_storage_order_supported = -1;
 364
 365 /* Check whether reverse FP storage order is supported on the target.  */
 366
 367 static void
 368 check_reverse_float_storage_order_support (void)
 369 {
 370   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 371     {
 372       reverse_float_storage_order_supported = 0;
 373       sorry ("reverse floating-point scalar storage order");
 374     }
 375   else
 376     reverse_float_storage_order_supported = 1;
 377 }
 378
 379 /* Return an rtx representing value of X with reverse storage order.
 380    MODE is the intended mode of the result,
 381    useful if X is a CONST_INT.  */
 382
 383 rtx
 384 flip_storage_order (machine_mode mode, rtx x)
 385 {
 386   scalar_int_mode int_mode;
 387   rtx result;
 388
 389   if (mode == QImode)
 390     return x;
 391
 392   if (COMPLEX_MODE_P (mode))
 393     {
 394       rtx real = read_complex_part (x, false);
 395       rtx imag = read_complex_part (x, true);
 396
 397       real = flip_storage_order (GET_MODE_INNER (mode), real);
 398       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 399
 400       return gen_rtx_CONCAT (mode, real, imag);
 401     }
 402
 403   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 404     check_reverse_storage_order_support ();
 405
 406   if (!is_a <scalar_int_mode> (mode, &int_mode))
 407     {
 408       if (FLOAT_MODE_P (mode)
 409           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 410         check_reverse_float_storage_order_support ();
 411
 412       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 413         {
 414           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 415           return x;
 416         }
 417       x = gen_lowpart (int_mode, x);
 418     }
 419
 420   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 421   if (result == 0)
 422     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 423
 424   if (int_mode != mode)
 425     result = gen_lowpart (mode, result);
 426
 427   return result;
 428 }
 429
 430 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 431    first unit of mode MODE that contains a bitfield of size BITSIZE at
 432    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 433    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 434    of the field within the new memory.  */
 435
 436 static rtx
 437 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 438                       unsigned HOST_WIDE_INT bitsize,
 439                       unsigned HOST_WIDE_INT bitnum,
 440                       unsigned HOST_WIDE_INT *new_bitnum)
 441 {
 442   scalar_int_mode imode;
 443   if (mode.exists (&imode))
 444     {
 445       unsigned int unit = GET_MODE_BITSIZE (imode);
 446       *new_bitnum = bitnum % unit;
 447       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 448       return adjust_bitfield_address (mem, imode, offset);
 449     }
 450   else
 451     {
 452       *new_bitnum = bitnum % BITS_PER_UNIT;
 453       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 454       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 455                             / BITS_PER_UNIT);
 456       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 457     }
 458 }
 459
 460 /* The caller wants to perform insertion or extraction PATTERN on a
 461    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 462    BITREGION_START and BITREGION_END are as for store_bit_field
 463    and FIELDMODE is the natural mode of the field.
 464
 465    Search for a mode that is compatible with the memory access
 466    restrictions and (where applicable) with a register insertion or
 467    extraction.  Return the new memory on success, storing the adjusted
 468    bit position in *NEW_BITNUM.  Return null otherwise.  */
 469
 470 static rtx
 471 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 472                               rtx op0, HOST_WIDE_INT bitsize,
 473                               HOST_WIDE_INT bitnum,
 474                               poly_uint64 bitregion_start,
 475                               poly_uint64 bitregion_end,
 476                               machine_mode fieldmode,
 477                               unsigned HOST_WIDE_INT *new_bitnum)
 478 {
 479   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 480                                 bitregion_end, MEM_ALIGN (op0),
 481                                 MEM_VOLATILE_P (op0));
 482   scalar_int_mode best_mode;
 483   if (iter.next_mode (&best_mode))
 484     {
 485       /* We can use a memory in BEST_MODE.  See whether this is true for
 486          any wider modes.  All other things being equal, we prefer to
 487          use the widest mode possible because it tends to expose more
 488          CSE opportunities.  */
 489       if (!iter.prefer_smaller_modes ())
 490         {
 491           /* Limit the search to the mode required by the corresponding
 492              register insertion or extraction instruction, if any.  */
 493           scalar_int_mode limit_mode = word_mode;
 494           extraction_insn insn;
 495           if (get_best_reg_extraction_insn (&insn, pattern,
 496                                             GET_MODE_BITSIZE (best_mode),
 497                                             fieldmode))
 498             limit_mode = insn.field_mode;
 499
 500           scalar_int_mode wider_mode;
 501           while (iter.next_mode (&wider_mode)
 502                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 503             best_mode = wider_mode;
 504         }
 505       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 506                                    new_bitnum);
 507     }
 508   return NULL_RTX;
 509 }
 510
 511 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 512    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 513    offset is then BITNUM / BITS_PER_UNIT.  */
 514
 515 static bool
 516 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 517                      machine_mode struct_mode)
 518 {
 519   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 520   if (BYTES_BIG_ENDIAN)
 521     return (multiple_p (bitnum, BITS_PER_UNIT)
 522             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 523                 || multiple_p (bitnum + bitsize,
 524                                regsize * BITS_PER_UNIT)));
 525   else
 526     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 527 }
 528
 529 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 530    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 531    Return false if the access would touch memory outside the range
 532    BITREGION_START to BITREGION_END for conformance to the C++ memory
 533    model.  */
 534
 535 static bool
 536 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 537                             unsigned HOST_WIDE_INT bitnum,
 538                             scalar_int_mode fieldmode,
 539                             poly_uint64 bitregion_start,
 540                             poly_uint64 bitregion_end)
 541 {
 542   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 543
 544   /* -fstrict-volatile-bitfields must be enabled and we must have a
 545      volatile MEM.  */
 546   if (!MEM_P (op0)
 547       || !MEM_VOLATILE_P (op0)
 548       || flag_strict_volatile_bitfields <= 0)
 549     return false;
 550
 551   /* The bit size must not be larger than the field mode, and
 552      the field mode must not be larger than a word.  */
 553   if (bitsize > modesize || modesize > BITS_PER_WORD)
 554     return false;
 555
 556   /* Check for cases of unaligned fields that must be split.  */
 557   if (bitnum % modesize + bitsize > modesize)
 558     return false;
 559
 560   /* The memory must be sufficiently aligned for a MODESIZE access.
 561      This condition guarantees, that the memory access will not
 562      touch anything after the end of the structure.  */
 563   if (MEM_ALIGN (op0) < modesize)
 564     return false;
 565
 566   /* Check for cases where the C++ memory model applies.  */
 567   if (maybe_ne (bitregion_end, 0U)
 568       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 569           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 570                        bitregion_end)))
 571     return false;
 572
 573   return true;
 574 }
 575
 576 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 577    bit number BITNUM can be treated as a simple value of mode MODE.
 578    Store the byte offset in *BYTENUM if so.  */
 579
 580 static bool
 581 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 582                        machine_mode mode, poly_uint64 *bytenum)
 583 {
 584   return (MEM_P (op0)
 585           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 586           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 587           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 588               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 589                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 590 }
 591 \f
 592 /* Try to use instruction INSV to store VALUE into a field of OP0.
 593    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 594    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 595    are as for store_bit_field.  */
 596
 597 static bool
 598 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 599                             opt_scalar_int_mode op0_mode,
 600                             unsigned HOST_WIDE_INT bitsize,
 601                             unsigned HOST_WIDE_INT bitnum,
 602                             rtx value, scalar_int_mode value_mode)
 603 {
 604   class expand_operand ops[4];
 605   rtx value1;
 606   rtx xop0 = op0;
 607   rtx_insn *last = get_last_insn ();
 608   bool copy_back = false;
 609
 610   scalar_int_mode op_mode = insv->field_mode;
 611   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 612   if (bitsize == 0 || bitsize > unit)
 613     return false;
 614
 615   if (MEM_P (xop0))
 616     /* Get a reference to the first byte of the field.  */
 617     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 618                                  &bitnum);
 619   else
 620     {
 621       /* Convert from counting within OP0 to counting in OP_MODE.  */
 622       if (BYTES_BIG_ENDIAN)
 623         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 624
 625       /* If xop0 is a register, we need it in OP_MODE
 626          to make it acceptable to the format of insv.  */
 627       if (GET_CODE (xop0) == SUBREG)
 628         /* We can't just change the mode, because this might clobber op0,
 629            and we will need the original value of op0 if insv fails.  */
 630         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 631       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 632         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 633     }
 634
 635   /* If the destination is a paradoxical subreg such that we need a
 636      truncate to the inner mode, perform the insertion on a temporary and
 637      truncate the result to the original destination.  Note that we can't
 638      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 639      X) 0)) is (reg:N X).  */
 640   if (GET_CODE (xop0) == SUBREG
 641       && REG_P (SUBREG_REG (xop0))
 642       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 643                                          op_mode))
 644     {
 645       rtx tem = gen_reg_rtx (op_mode);
 646       emit_move_insn (tem, xop0);
 647       xop0 = tem;
 648       copy_back = true;
 649     }
 650
 651   /* There are similar overflow check at the start of store_bit_field_1,
 652      but that only check the situation where the field lies completely
 653      outside the register, while there do have situation where the field
 654      lies partialy in the register, we need to adjust bitsize for this
 655      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 656      will broken on those arch support bit insert instruction, like arm, aarch64
 657      etc.  */
 658   if (bitsize + bitnum > unit && bitnum < unit)
 659     {
 660       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 661                "destination object, data truncated into %wu-bit",
 662                bitsize, unit - bitnum);
 663       bitsize = unit - bitnum;
 664     }
 665
 666   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 667      "backwards" from the size of the unit we are inserting into.
 668      Otherwise, we count bits from the most significant on a
 669      BYTES/BITS_BIG_ENDIAN machine.  */
 670
 671   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 672     bitnum = unit - bitsize - bitnum;
 673
 674   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 675   value1 = value;
 676   if (value_mode != op_mode)
 677     {
 678       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 679         {
 680           rtx tmp;
 681           /* Optimization: Don't bother really extending VALUE
 682              if it has all the bits we will actually use.  However,
 683              if we must narrow it, be sure we do it correctly.  */
 684
 685           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 686             {
 687               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 688               if (! tmp)
 689                 tmp = simplify_gen_subreg (op_mode,
 690                                            force_reg (value_mode, value1),
 691                                            value_mode, 0);
 692             }
 693           else
 694             {
 695               tmp = gen_lowpart_if_possible (op_mode, value1);
 696               if (! tmp)
 697                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 698             }
 699           value1 = tmp;
 700         }
 701       else if (CONST_INT_P (value))
 702         value1 = gen_int_mode (INTVAL (value), op_mode);
 703       else
 704         /* Parse phase is supposed to make VALUE's data type
 705            match that of the component reference, which is a type
 706            at least as wide as the field; so VALUE should have
 707            a mode that corresponds to that type.  */
 708         gcc_assert (CONSTANT_P (value));
 709     }
 710
 711   create_fixed_operand (&ops[0], xop0);
 712   create_integer_operand (&ops[1], bitsize);
 713   create_integer_operand (&ops[2], bitnum);
 714   create_input_operand (&ops[3], value1, op_mode);
 715   if (maybe_expand_insn (insv->icode, 4, ops))
 716     {
 717       if (copy_back)
 718         convert_move (op0, xop0, true);
 719       return true;
 720     }
 721   delete_insns_since (last);
 722   return false;
 723 }
 724
 725 /* A subroutine of store_bit_field, with the same arguments.  Return true
 726    if the operation could be implemented.
 727
 728    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 729    no other way of implementing the operation.  If FALLBACK_P is false,
 730    return false instead.  */
 731
 732 static bool
 733 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 734                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 735                    machine_mode fieldmode,
 736                    rtx value, bool reverse, bool fallback_p)
 737 {
 738   rtx op0 = str_rtx;
 739
 740   while (GET_CODE (op0) == SUBREG)
 741     {
 742       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 743       op0 = SUBREG_REG (op0);
 744     }
 745
 746   /* No action is needed if the target is a register and if the field
 747      lies completely outside that register.  This can occur if the source
 748      code contains an out-of-bounds access to a small array.  */
 749   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 750     return true;
 751
 752   /* Use vec_set patterns for inserting parts of vectors whenever
 753      available.  */
 754   machine_mode outermode = GET_MODE (op0);
 755   scalar_mode innermode = GET_MODE_INNER (outermode);
 756   poly_uint64 pos;
 757   if (VECTOR_MODE_P (outermode)
 758       && !MEM_P (op0)
 759       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 760       && fieldmode == innermode
 761       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 762       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 763     {
 764       class expand_operand ops[3];
 765       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 766
 767       create_fixed_operand (&ops[0], op0);
 768       create_input_operand (&ops[1], value, innermode);
 769       create_integer_operand (&ops[2], pos);
 770       if (maybe_expand_insn (icode, 3, ops))
 771         return true;
 772     }
 773
 774   /* If the target is a register, overwriting the entire object, or storing
 775      a full-word or multi-word field can be done with just a SUBREG.  */
 776   if (!MEM_P (op0)
 777       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 778     {
 779       /* Use the subreg machinery either to narrow OP0 to the required
 780          words or to cope with mode punning between equal-sized modes.
 781          In the latter case, use subreg on the rhs side, not lhs.  */
 782       rtx sub;
 783       HOST_WIDE_INT regnum;
 784       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 785       if (known_eq (bitnum, 0U)
 786           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 787         {
 788           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 789           if (sub)
 790             {
 791               if (reverse)
 792                 sub = flip_storage_order (GET_MODE (op0), sub);
 793               emit_move_insn (op0, sub);
 794               return true;
 795             }
 796         }
 797       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 798                && multiple_p (bitsize, regsize * BITS_PER_UNIT))
 799         {
 800           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 801                                      regnum * regsize);
 802           if (sub)
 803             {
 804               if (reverse)
 805                 value = flip_storage_order (fieldmode, value);
 806               emit_move_insn (sub, value);
 807               return true;
 808             }
 809         }
 810     }
 811
 812   /* If the target is memory, storing any naturally aligned field can be
 813      done with a simple store.  For targets that support fast unaligned
 814      memory, any naturally sized, unit aligned field can be done directly.  */
 815   poly_uint64 bytenum;
 816   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 817     {
 818       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 819       if (reverse)
 820         value = flip_storage_order (fieldmode, value);
 821       emit_move_insn (op0, value);
 822       return true;
 823     }
 824
 825   /* It's possible we'll need to handle other cases here for
 826      polynomial bitnum and bitsize.  */
 827
 828   /* From here on we need to be looking at a fixed-size insertion.  */
 829   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 830   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 831
 832   /* Make sure we are playing with integral modes.  Pun with subregs
 833      if we aren't.  This must come after the entire register case above,
 834      since that case is valid for any mode.  The following cases are only
 835      valid for integral modes.  */
 836   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 837   scalar_int_mode imode;
 838   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 839     {
 840       if (MEM_P (op0))
 841         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 842                                             0, MEM_SIZE (op0));
 843       else if (!op0_mode.exists ())
 844         {
 845           if (ibitnum == 0
 846               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 847               && MEM_P (value)
 848               && !reverse)
 849             {
 850               value = adjust_address (value, GET_MODE (op0), 0);
 851               emit_move_insn (op0, value);
 852               return true;
 853             }
 854           if (!fallback_p)
 855             return false;
 856           rtx temp = assign_stack_temp (GET_MODE (op0),
 857                                         GET_MODE_SIZE (GET_MODE (op0)));
 858           emit_move_insn (temp, op0);
 859           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 860                              reverse, fallback_p);
 861           emit_move_insn (op0, temp);
 862           return true;
 863         }
 864       else
 865         op0 = gen_lowpart (op0_mode.require (), op0);
 866     }
 867
 868   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 869                                    bitregion_start, bitregion_end,
 870                                    fieldmode, value, reverse, fallback_p);
 871 }
 872
 873 /* Subroutine of store_bit_field_1, with the same arguments, except
 874    that BITSIZE and BITNUM are constant.  Handle cases specific to
 875    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 876    otherwise OP0 is a BLKmode MEM.  */
 877
 878 static bool
 879 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 880                           unsigned HOST_WIDE_INT bitsize,
 881                           unsigned HOST_WIDE_INT bitnum,
 882                           poly_uint64 bitregion_start,
 883                           poly_uint64 bitregion_end,
 884                           machine_mode fieldmode,
 885                           rtx value, bool reverse, bool fallback_p)
 886 {
 887   /* Storing an lsb-aligned field in a register
 888      can be done with a movstrict instruction.  */
 889
 890   if (!MEM_P (op0)
 891       && !reverse
 892       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 893       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 894       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 895     {
 896       class expand_operand ops[2];
 897       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 898       rtx arg0 = op0;
 899       unsigned HOST_WIDE_INT subreg_off;
 900
 901       if (GET_CODE (arg0) == SUBREG)
 902         {
 903           /* Else we've got some float mode source being extracted into
 904              a different float mode destination -- this combination of
 905              subregs results in Severe Tire Damage.  */
 906           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 907                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 908                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 909           arg0 = SUBREG_REG (arg0);
 910         }
 911
 912       subreg_off = bitnum / BITS_PER_UNIT;
 913       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 914         {
 915           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 916
 917           create_fixed_operand (&ops[0], arg0);
 918           /* Shrink the source operand to FIELDMODE.  */
 919           create_convert_operand_to (&ops[1], value, fieldmode, false);
 920           if (maybe_expand_insn (icode, 2, ops))
 921             return true;
 922         }
 923     }
 924
 925   /* Handle fields bigger than a word.  */
 926
 927   if (bitsize > BITS_PER_WORD)
 928     {
 929       /* Here we transfer the words of the field
 930          in the order least significant first.
 931          This is because the most significant word is the one which may
 932          be less than full.
 933          However, only do that if the value is not BLKmode.  */
 934
 935       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 936       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 937       unsigned int i;
 938       rtx_insn *last;
 939
 940       /* This is the mode we must force value to, so that there will be enough
 941          subwords to extract.  Note that fieldmode will often (always?) be
 942          VOIDmode, because that is what store_field uses to indicate that this
 943          is a bit field, but passing VOIDmode to operand_subword_force
 944          is not allowed.
 945
 946          The mode must be fixed-size, since insertions into variable-sized
 947          objects are meant to be handled before calling this function.  */
 948       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 949       if (value_mode == VOIDmode)
 950         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 951
 952       last = get_last_insn ();
 953       for (i = 0; i < nwords; i++)
 954         {
 955           /* If I is 0, use the low-order word in both field and target;
 956              if I is 1, use the next to lowest word; and so on.  */
 957           unsigned int wordnum = (backwards
 958                                   ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD
 959                                   - i - 1
 960                                   : i);
 961           unsigned int bit_offset = (backwards ^ reverse
 962                                      ? MAX ((int) bitsize - ((int) i + 1)
 963                                             * BITS_PER_WORD,
 964                                             0)
 965                                      : (int) i * BITS_PER_WORD);
 966           rtx value_word = operand_subword_force (value, wordnum, value_mode);
 967           unsigned HOST_WIDE_INT new_bitsize =
 968             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 969
 970           /* If the remaining chunk doesn't have full wordsize we have
 971              to make sure that for big-endian machines the higher order
 972              bits are used.  */
 973           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 974             {
 975               int shift = BITS_PER_WORD - new_bitsize;
 976               rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
 977               value_word = simplify_expand_binop (word_mode, lshr_optab,
 978                                                   value_word, shift_rtx,
 979                                                   NULL_RTX, true,
 980                                                   OPTAB_LIB_WIDEN);
 981             }
 982
 983           if (!store_bit_field_1 (op0, new_bitsize,
 984                                   bitnum + bit_offset,
 985                                   bitregion_start, bitregion_end,
 986                                   word_mode,
 987                                   value_word, reverse, fallback_p))
 988             {
 989               delete_insns_since (last);
 990               return false;
 991             }
 992         }
 993       return true;
 994     }
 995
 996   /* If VALUE has a floating-point or complex mode, access it as an
 997      integer of the corresponding size.  This can occur on a machine
 998      with 64 bit registers that uses SFmode for float.  It can also
 999      occur for unaligned float or complex fields.  */
1000   rtx orig_value = value;
1001   scalar_int_mode value_mode;
1002   if (GET_MODE (value) == VOIDmode)
1003     /* By this point we've dealt with values that are bigger than a word,
1004        so word_mode is a conservatively correct choice.  */
1005     value_mode = word_mode;
1006   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1007     {
1008       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1009       value = gen_reg_rtx (value_mode);
1010       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1011     }
1012
1013   /* If OP0 is a multi-word register, narrow it to the affected word.
1014      If the region spans two words, defer to store_split_bit_field.
1015      Don't do this if op0 is a single hard register wider than word
1016      such as a float or vector register.  */
1017   if (!MEM_P (op0)
1018       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1019       && (!REG_P (op0)
1020           || !HARD_REGISTER_P (op0)
1021           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1022     {
1023       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1024         {
1025           if (!fallback_p)
1026             return false;
1027
1028           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1029                                  bitregion_start, bitregion_end,
1030                                  value, value_mode, reverse);
1031           return true;
1032         }
1033       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1034                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1035       gcc_assert (op0);
1036       op0_mode = word_mode;
1037       bitnum %= BITS_PER_WORD;
1038     }
1039
1040   /* From here on we can assume that the field to be stored in fits
1041      within a word.  If the destination is a register, it too fits
1042      in a word.  */
1043
1044   extraction_insn insv;
1045   if (!MEM_P (op0)
1046       && !reverse
1047       && get_best_reg_extraction_insn (&insv, EP_insv,
1048                                        GET_MODE_BITSIZE (op0_mode.require ()),
1049                                        fieldmode)
1050       && store_bit_field_using_insv (&insv, op0, op0_mode,
1051                                      bitsize, bitnum, value, value_mode))
1052     return true;
1053
1054   /* If OP0 is a memory, try copying it to a register and seeing if a
1055      cheap register alternative is available.  */
1056   if (MEM_P (op0) && !reverse)
1057     {
1058       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1059                                         fieldmode)
1060           && store_bit_field_using_insv (&insv, op0, op0_mode,
1061                                          bitsize, bitnum, value, value_mode))
1062         return true;
1063
1064       rtx_insn *last = get_last_insn ();
1065
1066       /* Try loading part of OP0 into a register, inserting the bitfield
1067          into that, and then copying the result back to OP0.  */
1068       unsigned HOST_WIDE_INT bitpos;
1069       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1070                                                bitregion_start, bitregion_end,
1071                                                fieldmode, &bitpos);
1072       if (xop0)
1073         {
1074           rtx tempreg = copy_to_reg (xop0);
1075           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1076                                  bitregion_start, bitregion_end,
1077                                  fieldmode, orig_value, reverse, false))
1078             {
1079               emit_move_insn (xop0, tempreg);
1080               return true;
1081             }
1082           delete_insns_since (last);
1083         }
1084     }
1085
1086   if (!fallback_p)
1087     return false;
1088
1089   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1090                          bitregion_end, value, value_mode, reverse);
1091   return true;
1092 }
1093
1094 /* Generate code to store value from rtx VALUE
1095    into a bit-field within structure STR_RTX
1096    containing BITSIZE bits starting at bit BITNUM.
1097
1098    BITREGION_START is bitpos of the first bitfield in this region.
1099    BITREGION_END is the bitpos of the ending bitfield in this region.
1100    These two fields are 0, if the C++ memory model does not apply,
1101    or we are not interested in keeping track of bitfield regions.
1102
1103    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1104
1105    If REVERSE is true, the store is to be done in reverse order.  */
1106
1107 void
1108 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1109                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1110                  machine_mode fieldmode,
1111                  rtx value, bool reverse)
1112 {
1113   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1114   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1115   scalar_int_mode int_mode;
1116   if (bitsize.is_constant (&ibitsize)
1117       && bitnum.is_constant (&ibitnum)
1118       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1119       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1120                                      bitregion_start, bitregion_end))
1121     {
1122       /* Storing of a full word can be done with a simple store.
1123          We know here that the field can be accessed with one single
1124          instruction.  For targets that support unaligned memory,
1125          an unaligned access may be necessary.  */
1126       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1127         {
1128           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1129                                              ibitnum / BITS_PER_UNIT);
1130           if (reverse)
1131             value = flip_storage_order (int_mode, value);
1132           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1133           emit_move_insn (str_rtx, value);
1134         }
1135       else
1136         {
1137           rtx temp;
1138
1139           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1140                                           ibitnum, &ibitnum);
1141           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1142           temp = copy_to_reg (str_rtx);
1143           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1144                                   int_mode, value, reverse, true))
1145             gcc_unreachable ();
1146
1147           emit_move_insn (str_rtx, temp);
1148         }
1149
1150       return;
1151     }
1152
1153   /* Under the C++0x memory model, we must not touch bits outside the
1154      bit region.  Adjust the address to start at the beginning of the
1155      bit region.  */
1156   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1157     {
1158       scalar_int_mode best_mode;
1159       machine_mode addr_mode = VOIDmode;
1160
1161       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1162       bitnum -= bitregion_start;
1163       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1164       bitregion_end -= bitregion_start;
1165       bitregion_start = 0;
1166       if (bitsize.is_constant (&ibitsize)
1167           && bitnum.is_constant (&ibitnum)
1168           && get_best_mode (ibitsize, ibitnum,
1169                             bitregion_start, bitregion_end,
1170                             MEM_ALIGN (str_rtx), INT_MAX,
1171                             MEM_VOLATILE_P (str_rtx), &best_mode))
1172         addr_mode = best_mode;
1173       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1174                                               offset, size);
1175     }
1176
1177   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1178                           bitregion_start, bitregion_end,
1179                           fieldmode, value, reverse, true))
1180     gcc_unreachable ();
1181 }
1182 \f
1183 /* Use shifts and boolean operations to store VALUE into a bit field of
1184    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1185    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1186    the mode of VALUE.
1187
1188    If REVERSE is true, the store is to be done in reverse order.  */
1189
1190 static void
1191 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1192                        unsigned HOST_WIDE_INT bitsize,
1193                        unsigned HOST_WIDE_INT bitnum,
1194                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1195                        rtx value, scalar_int_mode value_mode, bool reverse)
1196 {
1197   /* There is a case not handled here:
1198      a structure with a known alignment of just a halfword
1199      and a field split across two aligned halfwords within the structure.
1200      Or likewise a structure with a known alignment of just a byte
1201      and a field split across two bytes.
1202      Such cases are not supposed to be able to occur.  */
1203
1204   scalar_int_mode best_mode;
1205   if (MEM_P (op0))
1206     {
1207       unsigned int max_bitsize = BITS_PER_WORD;
1208       scalar_int_mode imode;
1209       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1210         max_bitsize = GET_MODE_BITSIZE (imode);
1211
1212       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1213                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1214                           &best_mode))
1215         {
1216           /* The only way this should occur is if the field spans word
1217              boundaries.  */
1218           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1219                                  bitregion_start, bitregion_end,
1220                                  value, value_mode, reverse);
1221           return;
1222         }
1223
1224       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1225     }
1226   else
1227     best_mode = op0_mode.require ();
1228
1229   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1230                            value, value_mode, reverse);
1231 }
1232
1233 /* Helper function for store_fixed_bit_field, stores
1234    the bit field always using MODE, which is the mode of OP0.  The other
1235    arguments are as for store_fixed_bit_field.  */
1236
1237 static void
1238 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1239                          unsigned HOST_WIDE_INT bitsize,
1240                          unsigned HOST_WIDE_INT bitnum,
1241                          rtx value, scalar_int_mode value_mode, bool reverse)
1242 {
1243   rtx temp;
1244   int all_zero = 0;
1245   int all_one = 0;
1246
1247   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1248      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1249
1250   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1251     /* BITNUM is the distance between our msb
1252        and that of the containing datum.
1253        Convert it to the distance from the lsb.  */
1254     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1255
1256   /* Now BITNUM is always the distance between our lsb
1257      and that of OP0.  */
1258
1259   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1260      we must first convert its mode to MODE.  */
1261
1262   if (CONST_INT_P (value))
1263     {
1264       unsigned HOST_WIDE_INT v = UINTVAL (value);
1265
1266       if (bitsize < HOST_BITS_PER_WIDE_INT)
1267         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1268
1269       if (v == 0)
1270         all_zero = 1;
1271       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1272                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1273                || (bitsize == HOST_BITS_PER_WIDE_INT
1274                    && v == HOST_WIDE_INT_M1U))
1275         all_one = 1;
1276
1277       value = lshift_value (mode, v, bitnum);
1278     }
1279   else
1280     {
1281       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1282                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1283
1284       if (value_mode != mode)
1285         value = convert_to_mode (mode, value, 1);
1286
1287       if (must_and)
1288         value = expand_binop (mode, and_optab, value,
1289                               mask_rtx (mode, 0, bitsize, 0),
1290                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1291       if (bitnum > 0)
1292         value = expand_shift (LSHIFT_EXPR, mode, value,
1293                               bitnum, NULL_RTX, 1);
1294     }
1295
1296   if (reverse)
1297     value = flip_storage_order (mode, value);
1298
1299   /* Now clear the chosen bits in OP0,
1300      except that if VALUE is -1 we need not bother.  */
1301   /* We keep the intermediates in registers to allow CSE to combine
1302      consecutive bitfield assignments.  */
1303
1304   temp = force_reg (mode, op0);
1305
1306   if (! all_one)
1307     {
1308       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1309       if (reverse)
1310         mask = flip_storage_order (mode, mask);
1311       temp = expand_binop (mode, and_optab, temp, mask,
1312                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1313       temp = force_reg (mode, temp);
1314     }
1315
1316   /* Now logical-or VALUE into OP0, unless it is zero.  */
1317
1318   if (! all_zero)
1319     {
1320       temp = expand_binop (mode, ior_optab, temp, value,
1321                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1322       temp = force_reg (mode, temp);
1323     }
1324
1325   if (op0 != temp)
1326     {
1327       op0 = copy_rtx (op0);
1328       emit_move_insn (op0, temp);
1329     }
1330 }
1331 \f
1332 /* Store a bit field that is split across multiple accessible memory objects.
1333
1334    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1335    BITSIZE is the field width; BITPOS the position of its first bit
1336    (within the word).
1337    VALUE is the value to store, which has mode VALUE_MODE.
1338    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1339    a BLKmode MEM.
1340
1341    If REVERSE is true, the store is to be done in reverse order.
1342
1343    This does not yet handle fields wider than BITS_PER_WORD.  */
1344
1345 static void
1346 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1347                        unsigned HOST_WIDE_INT bitsize,
1348                        unsigned HOST_WIDE_INT bitpos,
1349                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1350                        rtx value, scalar_int_mode value_mode, bool reverse)
1351 {
1352   unsigned int unit, total_bits, bitsdone = 0;
1353
1354   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1355      much at a time.  */
1356   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1357     unit = BITS_PER_WORD;
1358   else
1359     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1360
1361   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1362      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1363      again, and we will mutually recurse forever.  */
1364   if (MEM_P (op0) && op0_mode.exists ())
1365     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1366
1367   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1368      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1369      that VALUE might be a floating-point constant.  */
1370   if (CONSTANT_P (value) && !CONST_INT_P (value))
1371     {
1372       rtx word = gen_lowpart_common (word_mode, value);
1373
1374       if (word && (value != word))
1375         value = word;
1376       else
1377         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1378       value_mode = word_mode;
1379     }
1380
1381   total_bits = GET_MODE_BITSIZE (value_mode);
1382
1383   while (bitsdone < bitsize)
1384     {
1385       unsigned HOST_WIDE_INT thissize;
1386       unsigned HOST_WIDE_INT thispos;
1387       unsigned HOST_WIDE_INT offset;
1388       rtx part;
1389
1390       offset = (bitpos + bitsdone) / unit;
1391       thispos = (bitpos + bitsdone) % unit;
1392
1393       /* When region of bytes we can touch is restricted, decrease
1394          UNIT close to the end of the region as needed.  If op0 is a REG
1395          or SUBREG of REG, don't do this, as there can't be data races
1396          on a register and we can expand shorter code in some cases.  */
1397       if (maybe_ne (bitregion_end, 0U)
1398           && unit > BITS_PER_UNIT
1399           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1400           && !REG_P (op0)
1401           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1402         {
1403           unit = unit / 2;
1404           continue;
1405         }
1406
1407       /* THISSIZE must not overrun a word boundary.  Otherwise,
1408          store_fixed_bit_field will call us again, and we will mutually
1409          recurse forever.  */
1410       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1411       thissize = MIN (thissize, unit - thispos);
1412
1413       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1414         {
1415           /* Fetch successively less significant portions.  */
1416           if (CONST_INT_P (value))
1417             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1418                              >> (bitsize - bitsdone - thissize))
1419                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1420           /* Likewise, but the source is little-endian.  */
1421           else if (reverse)
1422             part = extract_fixed_bit_field (word_mode, value, value_mode,
1423                                             thissize,
1424                                             bitsize - bitsdone - thissize,
1425                                             NULL_RTX, 1, false);
1426           else
1427             /* The args are chosen so that the last part includes the
1428                lsb.  Give extract_bit_field the value it needs (with
1429                endianness compensation) to fetch the piece we want.  */
1430             part = extract_fixed_bit_field (word_mode, value, value_mode,
1431                                             thissize,
1432                                             total_bits - bitsize + bitsdone,
1433                                             NULL_RTX, 1, false);
1434         }
1435       else
1436         {
1437           /* Fetch successively more significant portions.  */
1438           if (CONST_INT_P (value))
1439             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1440                              >> bitsdone)
1441                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1442           /* Likewise, but the source is big-endian.  */
1443           else if (reverse)
1444             part = extract_fixed_bit_field (word_mode, value, value_mode,
1445                                             thissize,
1446                                             total_bits - bitsdone - thissize,
1447                                             NULL_RTX, 1, false);
1448           else
1449             part = extract_fixed_bit_field (word_mode, value, value_mode,
1450                                             thissize, bitsdone, NULL_RTX,
1451                                             1, false);
1452         }
1453
1454       /* If OP0 is a register, then handle OFFSET here.  */
1455       rtx op0_piece = op0;
1456       opt_scalar_int_mode op0_piece_mode = op0_mode;
1457       if (SUBREG_P (op0) || REG_P (op0))
1458         {
1459           scalar_int_mode imode;
1460           if (op0_mode.exists (&imode)
1461               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1462             {
1463               if (offset)
1464                 op0_piece = const0_rtx;
1465             }
1466           else
1467             {
1468               op0_piece = operand_subword_force (op0,
1469                                                  offset * unit / BITS_PER_WORD,
1470                                                  GET_MODE (op0));
1471               op0_piece_mode = word_mode;
1472             }
1473           offset &= BITS_PER_WORD / unit - 1;
1474         }
1475
1476       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1477          it is just an out-of-bounds access.  Ignore it.  */
1478       if (op0_piece != const0_rtx)
1479         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1480                                offset * unit + thispos, bitregion_start,
1481                                bitregion_end, part, word_mode, reverse);
1482       bitsdone += thissize;
1483     }
1484 }
1485 \f
1486 /* A subroutine of extract_bit_field_1 that converts return value X
1487    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1488    to extract_bit_field.  */
1489
1490 static rtx
1491 convert_extracted_bit_field (rtx x, machine_mode mode,
1492                              machine_mode tmode, bool unsignedp)
1493 {
1494   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1495     return x;
1496
1497   /* If the x mode is not a scalar integral, first convert to the
1498      integer mode of that size and then access it as a floating-point
1499      value via a SUBREG.  */
1500   if (!SCALAR_INT_MODE_P (tmode))
1501     {
1502       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1503       x = convert_to_mode (int_mode, x, unsignedp);
1504       x = force_reg (int_mode, x);
1505       return gen_lowpart (tmode, x);
1506     }
1507
1508   return convert_to_mode (tmode, x, unsignedp);
1509 }
1510
1511 /* Try to use an ext(z)v pattern to extract a field from OP0.
1512    Return the extracted value on success, otherwise return null.
1513    EXTV describes the extraction instruction to use.  If OP0_MODE
1514    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1515    The other arguments are as for extract_bit_field.  */
1516
1517 static rtx
1518 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1519                               opt_scalar_int_mode op0_mode,
1520                               unsigned HOST_WIDE_INT bitsize,
1521                               unsigned HOST_WIDE_INT bitnum,
1522                               int unsignedp, rtx target,
1523                               machine_mode mode, machine_mode tmode)
1524 {
1525   class expand_operand ops[4];
1526   rtx spec_target = target;
1527   rtx spec_target_subreg = 0;
1528   scalar_int_mode ext_mode = extv->field_mode;
1529   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1530
1531   if (bitsize == 0 || unit < bitsize)
1532     return NULL_RTX;
1533
1534   if (MEM_P (op0))
1535     /* Get a reference to the first byte of the field.  */
1536     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1537                                 &bitnum);
1538   else
1539     {
1540       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1541       if (BYTES_BIG_ENDIAN)
1542         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1543
1544       /* If op0 is a register, we need it in EXT_MODE to make it
1545          acceptable to the format of ext(z)v.  */
1546       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1547         return NULL_RTX;
1548       if (REG_P (op0) && op0_mode.require () != ext_mode)
1549         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1550     }
1551
1552   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1553      "backwards" from the size of the unit we are extracting from.
1554      Otherwise, we count bits from the most significant on a
1555      BYTES/BITS_BIG_ENDIAN machine.  */
1556
1557   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1558     bitnum = unit - bitsize - bitnum;
1559
1560   if (target == 0)
1561     target = spec_target = gen_reg_rtx (tmode);
1562
1563   if (GET_MODE (target) != ext_mode)
1564     {
1565       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1566          between the mode of the extraction (word_mode) and the target
1567          mode.  Instead, create a temporary and use convert_move to set
1568          the target.  */
1569       if (REG_P (target)
1570           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1571         {
1572           target = gen_lowpart (ext_mode, target);
1573           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1574             spec_target_subreg = target;
1575         }
1576       else
1577         target = gen_reg_rtx (ext_mode);
1578     }
1579
1580   create_output_operand (&ops[0], target, ext_mode);
1581   create_fixed_operand (&ops[1], op0);
1582   create_integer_operand (&ops[2], bitsize);
1583   create_integer_operand (&ops[3], bitnum);
1584   if (maybe_expand_insn (extv->icode, 4, ops))
1585     {
1586       target = ops[0].value;
1587       if (target == spec_target)
1588         return target;
1589       if (target == spec_target_subreg)
1590         return spec_target;
1591       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1592     }
1593   return NULL_RTX;
1594 }
1595
1596 /* See whether it would be valid to extract the part of OP0 described
1597    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1598    operation.  Return the subreg if so, otherwise return null.  */
1599
1600 static rtx
1601 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1602                              poly_uint64 bitsize, poly_uint64 bitnum)
1603 {
1604   poly_uint64 bytenum;
1605   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1606       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1607       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1608       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1609     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1610   return NULL_RTX;
1611 }
1612
1613 /* A subroutine of extract_bit_field, with the same arguments.
1614    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1615    if we can find no other means of implementing the operation.
1616    if FALLBACK_P is false, return NULL instead.  */
1617
1618 static rtx
1619 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1620                      int unsignedp, rtx target, machine_mode mode,
1621                      machine_mode tmode, bool reverse, bool fallback_p,
1622                      rtx *alt_rtl)
1623 {
1624   rtx op0 = str_rtx;
1625   machine_mode mode1;
1626
1627   if (tmode == VOIDmode)
1628     tmode = mode;
1629
1630   while (GET_CODE (op0) == SUBREG)
1631     {
1632       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1633       op0 = SUBREG_REG (op0);
1634     }
1635
1636   /* If we have an out-of-bounds access to a register, just return an
1637      uninitialized register of the required mode.  This can occur if the
1638      source code contains an out-of-bounds access to a small array.  */
1639   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1640     return gen_reg_rtx (tmode);
1641
1642   if (REG_P (op0)
1643       && mode == GET_MODE (op0)
1644       && known_eq (bitnum, 0U)
1645       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1646     {
1647       if (reverse)
1648         op0 = flip_storage_order (mode, op0);
1649       /* We're trying to extract a full register from itself.  */
1650       return op0;
1651     }
1652
1653   /* First try to check for vector from vector extractions.  */
1654   if (VECTOR_MODE_P (GET_MODE (op0))
1655       && !MEM_P (op0)
1656       && VECTOR_MODE_P (tmode)
1657       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1658       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1659     {
1660       machine_mode new_mode = GET_MODE (op0);
1661       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1662         {
1663           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1664           poly_uint64 nunits;
1665           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1666                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1667               || !related_vector_mode (tmode, inner_mode,
1668                                        nunits).exists (&new_mode)
1669               || maybe_ne (GET_MODE_SIZE (new_mode),
1670                            GET_MODE_SIZE (GET_MODE (op0))))
1671             new_mode = VOIDmode;
1672         }
1673       poly_uint64 pos;
1674       if (new_mode != VOIDmode
1675           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1676               != CODE_FOR_nothing)
1677           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1678         {
1679           class expand_operand ops[3];
1680           machine_mode outermode = new_mode;
1681           machine_mode innermode = tmode;
1682           enum insn_code icode
1683             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1684
1685           if (new_mode != GET_MODE (op0))
1686             op0 = gen_lowpart (new_mode, op0);
1687           create_output_operand (&ops[0], target, innermode);
1688           ops[0].target = 1;
1689           create_input_operand (&ops[1], op0, outermode);
1690           create_integer_operand (&ops[2], pos);
1691           if (maybe_expand_insn (icode, 3, ops))
1692             {
1693               if (alt_rtl && ops[0].target)
1694                 *alt_rtl = target;
1695               target = ops[0].value;
1696               if (GET_MODE (target) != mode)
1697                 return gen_lowpart (tmode, target);
1698               return target;
1699             }
1700         }
1701     }
1702
1703   /* See if we can get a better vector mode before extracting.  */
1704   if (VECTOR_MODE_P (GET_MODE (op0))
1705       && !MEM_P (op0)
1706       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1707     {
1708       machine_mode new_mode;
1709
1710       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1711         new_mode = MIN_MODE_VECTOR_FLOAT;
1712       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1713         new_mode = MIN_MODE_VECTOR_FRACT;
1714       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1715         new_mode = MIN_MODE_VECTOR_UFRACT;
1716       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1717         new_mode = MIN_MODE_VECTOR_ACCUM;
1718       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1719         new_mode = MIN_MODE_VECTOR_UACCUM;
1720       else
1721         new_mode = MIN_MODE_VECTOR_INT;
1722
1723       FOR_EACH_MODE_FROM (new_mode, new_mode)
1724         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1725             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1726             && targetm.vector_mode_supported_p (new_mode))
1727           break;
1728       if (new_mode != VOIDmode)
1729         op0 = gen_lowpart (new_mode, op0);
1730     }
1731
1732   /* Use vec_extract patterns for extracting parts of vectors whenever
1733      available.  If that fails, see whether the current modes and bitregion
1734      give a natural subreg.  */
1735   machine_mode outermode = GET_MODE (op0);
1736   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1737     {
1738       scalar_mode innermode = GET_MODE_INNER (outermode);
1739       enum insn_code icode
1740         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1741       poly_uint64 pos;
1742       if (icode != CODE_FOR_nothing
1743           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1744           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1745         {
1746           class expand_operand ops[3];
1747
1748           create_output_operand (&ops[0], target, innermode);
1749           ops[0].target = 1;
1750           create_input_operand (&ops[1], op0, outermode);
1751           create_integer_operand (&ops[2], pos);
1752           if (maybe_expand_insn (icode, 3, ops))
1753             {
1754               if (alt_rtl && ops[0].target)
1755                 *alt_rtl = target;
1756               target = ops[0].value;
1757               if (GET_MODE (target) != mode)
1758                 return gen_lowpart (tmode, target);
1759               return target;
1760             }
1761         }
1762       /* Using subregs is useful if we're extracting one register vector
1763          from a multi-register vector.  extract_bit_field_as_subreg checks
1764          for valid bitsize and bitnum, so we don't need to do that here.  */
1765       if (VECTOR_MODE_P (mode))
1766         {
1767           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1768           if (sub)
1769             return sub;
1770         }
1771     }
1772
1773   /* Make sure we are playing with integral modes.  Pun with subregs
1774      if we aren't.  */
1775   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1776   scalar_int_mode imode;
1777   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1778     {
1779       if (MEM_P (op0))
1780         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1781                                             0, MEM_SIZE (op0));
1782       else if (op0_mode.exists (&imode))
1783         {
1784           op0 = gen_lowpart (imode, op0);
1785
1786           /* If we got a SUBREG, force it into a register since we
1787              aren't going to be able to do another SUBREG on it.  */
1788           if (GET_CODE (op0) == SUBREG)
1789             op0 = force_reg (imode, op0);
1790         }
1791       else
1792         {
1793           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1794           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1795           emit_move_insn (mem, op0);
1796           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1797         }
1798     }
1799
1800   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1801      If that's wrong, the solution is to test for it and set TARGET to 0
1802      if needed.  */
1803
1804   /* Get the mode of the field to use for atomic access or subreg
1805      conversion.  */
1806   if (!SCALAR_INT_MODE_P (tmode)
1807       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1808     mode1 = mode;
1809   gcc_assert (mode1 != BLKmode);
1810
1811   /* Extraction of a full MODE1 value can be done with a subreg as long
1812      as the least significant bit of the value is the least significant
1813      bit of either OP0 or a word of OP0.  */
1814   if (!MEM_P (op0) && !reverse)
1815     {
1816       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1817       if (sub)
1818         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1819     }
1820
1821   /* Extraction of a full MODE1 value can be done with a load as long as
1822      the field is on a byte boundary and is sufficiently aligned.  */
1823   poly_uint64 bytenum;
1824   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1825     {
1826       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1827       if (reverse)
1828         op0 = flip_storage_order (mode1, op0);
1829       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1830     }
1831
1832   /* If we have a memory source and a non-constant bit offset, restrict
1833      the memory to the referenced bytes.  This is a worst-case fallback
1834      but is useful for things like vector booleans.  */
1835   if (MEM_P (op0) && !bitnum.is_constant ())
1836     {
1837       bytenum = bits_to_bytes_round_down (bitnum);
1838       bitnum = num_trailing_bits (bitnum);
1839       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1840       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1841       op0_mode = opt_scalar_int_mode ();
1842     }
1843
1844   /* It's possible we'll need to handle other cases here for
1845      polynomial bitnum and bitsize.  */
1846
1847   /* From here on we need to be looking at a fixed-size insertion.  */
1848   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1849                                      bitnum.to_constant (), unsignedp,
1850                                      target, mode, tmode, reverse, fallback_p);
1851 }
1852
1853 /* Subroutine of extract_bit_field_1, with the same arguments, except
1854    that BITSIZE and BITNUM are constant.  Handle cases specific to
1855    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1856    otherwise OP0 is a BLKmode MEM.  */
1857
1858 static rtx
1859 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1860                             unsigned HOST_WIDE_INT bitsize,
1861                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1862                             rtx target, machine_mode mode, machine_mode tmode,
1863                             bool reverse, bool fallback_p)
1864 {
1865   /* Handle fields bigger than a word.  */
1866
1867   if (bitsize > BITS_PER_WORD)
1868     {
1869       /* Here we transfer the words of the field
1870          in the order least significant first.
1871          This is because the most significant word is the one which may
1872          be less than full.  */
1873
1874       const bool backwards = WORDS_BIG_ENDIAN;
1875       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1876       unsigned int i;
1877       rtx_insn *last;
1878
1879       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1880         target = gen_reg_rtx (mode);
1881
1882       /* In case we're about to clobber a base register or something
1883          (see gcc.c-torture/execute/20040625-1.c).   */
1884       if (reg_mentioned_p (target, op0))
1885         target = gen_reg_rtx (mode);
1886
1887       /* Indicate for flow that the entire target reg is being set.  */
1888       emit_clobber (target);
1889
1890       /* The mode must be fixed-size, since extract_bit_field_1 handles
1891          extractions from variable-sized objects before calling this
1892          function.  */
1893       unsigned int target_size
1894         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1895       last = get_last_insn ();
1896       for (i = 0; i < nwords; i++)
1897         {
1898           /* If I is 0, use the low-order word in both field and target;
1899              if I is 1, use the next to lowest word; and so on.  */
1900           /* Word number in TARGET to use.  */
1901           unsigned int wordnum
1902             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1903           /* Offset from start of field in OP0.  */
1904           unsigned int bit_offset = (backwards ^ reverse
1905                                      ? MAX ((int) bitsize - ((int) i + 1)
1906                                             * BITS_PER_WORD,
1907                                             0)
1908                                      : (int) i * BITS_PER_WORD);
1909           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1910           rtx result_part
1911             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1912                                              bitsize - i * BITS_PER_WORD),
1913                                    bitnum + bit_offset, 1, target_part,
1914                                    mode, word_mode, reverse, fallback_p, NULL);
1915
1916           gcc_assert (target_part);
1917           if (!result_part)
1918             {
1919               delete_insns_since (last);
1920               return NULL;
1921             }
1922
1923           if (result_part != target_part)
1924             emit_move_insn (target_part, result_part);
1925         }
1926
1927       if (unsignedp)
1928         {
1929           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1930              need to be zero'd out.  */
1931           if (target_size > nwords * UNITS_PER_WORD)
1932             {
1933               unsigned int i, total_words;
1934
1935               total_words = target_size / UNITS_PER_WORD;
1936               for (i = nwords; i < total_words; i++)
1937                 emit_move_insn
1938                   (operand_subword (target,
1939                                     backwards ? total_words - i - 1 : i,
1940                                     1, VOIDmode),
1941                    const0_rtx);
1942             }
1943           return target;
1944         }
1945
1946       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1947       target = expand_shift (LSHIFT_EXPR, mode, target,
1948                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1949       return expand_shift (RSHIFT_EXPR, mode, target,
1950                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1951     }
1952
1953   /* If OP0 is a multi-word register, narrow it to the affected word.
1954      If the region spans two words, defer to extract_split_bit_field.  */
1955   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1956     {
1957       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1958         {
1959           if (!fallback_p)
1960             return NULL_RTX;
1961           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1962                                             unsignedp, reverse);
1963           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1964         }
1965       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1966                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1967       op0_mode = word_mode;
1968       bitnum %= BITS_PER_WORD;
1969     }
1970
1971   /* From here on we know the desired field is smaller than a word.
1972      If OP0 is a register, it too fits within a word.  */
1973   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1974   extraction_insn extv;
1975   if (!MEM_P (op0)
1976       && !reverse
1977       /* ??? We could limit the structure size to the part of OP0 that
1978          contains the field, with appropriate checks for endianness
1979          and TARGET_TRULY_NOOP_TRUNCATION.  */
1980       && get_best_reg_extraction_insn (&extv, pattern,
1981                                        GET_MODE_BITSIZE (op0_mode.require ()),
1982                                        tmode))
1983     {
1984       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1985                                                  bitsize, bitnum,
1986                                                  unsignedp, target, mode,
1987                                                  tmode);
1988       if (result)
1989         return result;
1990     }
1991
1992   /* If OP0 is a memory, try copying it to a register and seeing if a
1993      cheap register alternative is available.  */
1994   if (MEM_P (op0) & !reverse)
1995     {
1996       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1997                                         tmode))
1998         {
1999           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2000                                                      bitsize, bitnum,
2001                                                      unsignedp, target, mode,
2002                                                      tmode);
2003           if (result)
2004             return result;
2005         }
2006
2007       rtx_insn *last = get_last_insn ();
2008
2009       /* Try loading part of OP0 into a register and extracting the
2010          bitfield from that.  */
2011       unsigned HOST_WIDE_INT bitpos;
2012       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2013                                                0, 0, tmode, &bitpos);
2014       if (xop0)
2015         {
2016           xop0 = copy_to_reg (xop0);
2017           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2018                                             unsignedp, target,
2019                                             mode, tmode, reverse, false, NULL);
2020           if (result)
2021             return result;
2022           delete_insns_since (last);
2023         }
2024     }
2025
2026   if (!fallback_p)
2027     return NULL;
2028
2029   /* Find a correspondingly-sized integer field, so we can apply
2030      shifts and masks to it.  */
2031   scalar_int_mode int_mode;
2032   if (!int_mode_for_mode (tmode).exists (&int_mode))
2033     /* If this fails, we should probably push op0 out to memory and then
2034        do a load.  */
2035     int_mode = int_mode_for_mode (mode).require ();
2036
2037   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2038                                     bitnum, target, unsignedp, reverse);
2039
2040   /* Complex values must be reversed piecewise, so we need to undo the global
2041      reversal, convert to the complex mode and reverse again.  */
2042   if (reverse && COMPLEX_MODE_P (tmode))
2043     {
2044       target = flip_storage_order (int_mode, target);
2045       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2046       target = flip_storage_order (tmode, target);
2047     }
2048   else
2049     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2050
2051   return target;
2052 }
2053
2054 /* Generate code to extract a byte-field from STR_RTX
2055    containing BITSIZE bits, starting at BITNUM,
2056    and put it in TARGET if possible (if TARGET is nonzero).
2057    Regardless of TARGET, we return the rtx for where the value is placed.
2058
2059    STR_RTX is the structure containing the byte (a REG or MEM).
2060    UNSIGNEDP is nonzero if this is an unsigned bit field.
2061    MODE is the natural mode of the field value once extracted.
2062    TMODE is the mode the caller would like the value to have;
2063    but the value may be returned with type MODE instead.
2064
2065    If REVERSE is true, the extraction is to be done in reverse order.
2066
2067    If a TARGET is specified and we can store in it at no extra cost,
2068    we do so, and return TARGET.
2069    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2070    if they are equally easy.
2071
2072    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2073    then *ALT_RTL is set to TARGET (before legitimziation).  */
2074
2075 rtx
2076 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2077                    int unsignedp, rtx target, machine_mode mode,
2078                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2079 {
2080   machine_mode mode1;
2081
2082   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2083   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2084     mode1 = GET_MODE (str_rtx);
2085   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2086     mode1 = GET_MODE (target);
2087   else
2088     mode1 = tmode;
2089
2090   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2091   scalar_int_mode int_mode;
2092   if (bitsize.is_constant (&ibitsize)
2093       && bitnum.is_constant (&ibitnum)
2094       && is_a <scalar_int_mode> (mode1, &int_mode)
2095       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2096                                      int_mode, 0, 0))
2097     {
2098       /* Extraction of a full INT_MODE value can be done with a simple load.
2099          We know here that the field can be accessed with one single
2100          instruction.  For targets that support unaligned memory,
2101          an unaligned access may be necessary.  */
2102       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2103         {
2104           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2105                                                 ibitnum / BITS_PER_UNIT);
2106           if (reverse)
2107             result = flip_storage_order (int_mode, result);
2108           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2109           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2110         }
2111
2112       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2113                                       &ibitnum);
2114       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2115       str_rtx = copy_to_reg (str_rtx);
2116       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2117                                   target, mode, tmode, reverse, true, alt_rtl);
2118     }
2119
2120   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2121                               target, mode, tmode, reverse, true, alt_rtl);
2122 }
2123 \f
2124 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2125    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2126    otherwise OP0 is a BLKmode MEM.
2127
2128    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2129    If REVERSE is true, the extraction is to be done in reverse order.
2130
2131    If TARGET is nonzero, attempts to store the value there
2132    and return TARGET, but this is not guaranteed.
2133    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2134
2135 static rtx
2136 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2137                          opt_scalar_int_mode op0_mode,
2138                          unsigned HOST_WIDE_INT bitsize,
2139                          unsigned HOST_WIDE_INT bitnum, rtx target,
2140                          int unsignedp, bool reverse)
2141 {
2142   scalar_int_mode mode;
2143   if (MEM_P (op0))
2144     {
2145       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2146                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2147         /* The only way this should occur is if the field spans word
2148            boundaries.  */
2149         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2150                                         unsignedp, reverse);
2151
2152       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2153     }
2154   else
2155     mode = op0_mode.require ();
2156
2157   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2158                                     target, unsignedp, reverse);
2159 }
2160
2161 /* Helper function for extract_fixed_bit_field, extracts
2162    the bit field always using MODE, which is the mode of OP0.
2163    The other arguments are as for extract_fixed_bit_field.  */
2164
2165 static rtx
2166 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2167                            unsigned HOST_WIDE_INT bitsize,
2168                            unsigned HOST_WIDE_INT bitnum, rtx target,
2169                            int unsignedp, bool reverse)
2170 {
2171   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2172      for invalid input, such as extract equivalent of f5 from
2173      gcc.dg/pr48335-2.c.  */
2174
2175   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2176     /* BITNUM is the distance between our msb and that of OP0.
2177        Convert it to the distance from the lsb.  */
2178     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2179
2180   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2181      We have reduced the big-endian case to the little-endian case.  */
2182   if (reverse)
2183     op0 = flip_storage_order (mode, op0);
2184
2185   if (unsignedp)
2186     {
2187       if (bitnum)
2188         {
2189           /* If the field does not already start at the lsb,
2190              shift it so it does.  */
2191           /* Maybe propagate the target for the shift.  */
2192           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2193           if (tmode != mode)
2194             subtarget = 0;
2195           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2196         }
2197       /* Convert the value to the desired mode.  TMODE must also be a
2198          scalar integer for this conversion to make sense, since we
2199          shouldn't reinterpret the bits.  */
2200       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2201       if (mode != new_mode)
2202         op0 = convert_to_mode (new_mode, op0, 1);
2203
2204       /* Unless the msb of the field used to be the msb when we shifted,
2205          mask out the upper bits.  */
2206
2207       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2208         return expand_binop (new_mode, and_optab, op0,
2209                              mask_rtx (new_mode, 0, bitsize, 0),
2210                              target, 1, OPTAB_LIB_WIDEN);
2211       return op0;
2212     }
2213
2214   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2215      then arithmetic-shift its lsb to the lsb of the word.  */
2216   op0 = force_reg (mode, op0);
2217
2218   /* Find the narrowest integer mode that contains the field.  */
2219
2220   opt_scalar_int_mode mode_iter;
2221   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2222     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2223       break;
2224
2225   mode = mode_iter.require ();
2226   op0 = convert_to_mode (mode, op0, 0);
2227
2228   if (mode != tmode)
2229     target = 0;
2230
2231   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2232     {
2233       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2234       /* Maybe propagate the target for the shift.  */
2235       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2236       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2237     }
2238
2239   return expand_shift (RSHIFT_EXPR, mode, op0,
2240                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2241 }
2242
2243 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2244    VALUE << BITPOS.  */
2245
2246 static rtx
2247 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2248               int bitpos)
2249 {
2250   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2251 }
2252 \f
2253 /* Extract a bit field that is split across two words
2254    and return an RTX for the result.
2255
2256    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2257    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2258    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2259    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2260    a BLKmode MEM.
2261
2262    If REVERSE is true, the extraction is to be done in reverse order.  */
2263
2264 static rtx
2265 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2266                          unsigned HOST_WIDE_INT bitsize,
2267                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2268                          bool reverse)
2269 {
2270   unsigned int unit;
2271   unsigned int bitsdone = 0;
2272   rtx result = NULL_RTX;
2273   int first = 1;
2274
2275   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2276      much at a time.  */
2277   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2278     unit = BITS_PER_WORD;
2279   else
2280     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2281
2282   while (bitsdone < bitsize)
2283     {
2284       unsigned HOST_WIDE_INT thissize;
2285       rtx part;
2286       unsigned HOST_WIDE_INT thispos;
2287       unsigned HOST_WIDE_INT offset;
2288
2289       offset = (bitpos + bitsdone) / unit;
2290       thispos = (bitpos + bitsdone) % unit;
2291
2292       /* THISSIZE must not overrun a word boundary.  Otherwise,
2293          extract_fixed_bit_field will call us again, and we will mutually
2294          recurse forever.  */
2295       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2296       thissize = MIN (thissize, unit - thispos);
2297
2298       /* If OP0 is a register, then handle OFFSET here.  */
2299       rtx op0_piece = op0;
2300       opt_scalar_int_mode op0_piece_mode = op0_mode;
2301       if (SUBREG_P (op0) || REG_P (op0))
2302         {
2303           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2304           op0_piece_mode = word_mode;
2305           offset = 0;
2306         }
2307
2308       /* Extract the parts in bit-counting order,
2309          whose meaning is determined by BYTES_PER_UNIT.
2310          OFFSET is in UNITs, and UNIT is in bits.  */
2311       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2312                                       thissize, offset * unit + thispos,
2313                                       0, 1, reverse);
2314       bitsdone += thissize;
2315
2316       /* Shift this part into place for the result.  */
2317       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2318         {
2319           if (bitsize != bitsdone)
2320             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2321                                  bitsize - bitsdone, 0, 1);
2322         }
2323       else
2324         {
2325           if (bitsdone != thissize)
2326             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2327                                  bitsdone - thissize, 0, 1);
2328         }
2329
2330       if (first)
2331         result = part;
2332       else
2333         /* Combine the parts with bitwise or.  This works
2334            because we extracted each part as an unsigned bit field.  */
2335         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2336                                OPTAB_LIB_WIDEN);
2337
2338       first = 0;
2339     }
2340
2341   /* Unsigned bit field: we are done.  */
2342   if (unsignedp)
2343     return result;
2344   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2345   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2346                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2347   return expand_shift (RSHIFT_EXPR, word_mode, result,
2348                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2349 }
2350 \f
2351 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2352    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2353    MODE, fill the upper bits with zeros.  Fail if the layout of either
2354    mode is unknown (as for CC modes) or if the extraction would involve
2355    unprofitable mode punning.  Return the value on success, otherwise
2356    return null.
2357
2358    This is different from gen_lowpart* in these respects:
2359
2360      - the returned value must always be considered an rvalue
2361
2362      - when MODE is wider than SRC_MODE, the extraction involves
2363        a zero extension
2364
2365      - when MODE is smaller than SRC_MODE, the extraction involves
2366        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2367
2368    In other words, this routine performs a computation, whereas the
2369    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2370    operations.  */
2371
2372 rtx
2373 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2374 {
2375   scalar_int_mode int_mode, src_int_mode;
2376
2377   if (mode == src_mode)
2378     return src;
2379
2380   if (CONSTANT_P (src))
2381     {
2382       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2383          fails, it will happily create (subreg (symbol_ref)) or similar
2384          invalid SUBREGs.  */
2385       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2386       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2387       if (ret)
2388         return ret;
2389
2390       if (GET_MODE (src) == VOIDmode
2391           || !validate_subreg (mode, src_mode, src, byte))
2392         return NULL_RTX;
2393
2394       src = force_reg (GET_MODE (src), src);
2395       return gen_rtx_SUBREG (mode, src, byte);
2396     }
2397
2398   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2399     return NULL_RTX;
2400
2401   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2402       && targetm.modes_tieable_p (mode, src_mode))
2403     {
2404       rtx x = gen_lowpart_common (mode, src);
2405       if (x)
2406         return x;
2407     }
2408
2409   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2410       || !int_mode_for_mode (mode).exists (&int_mode))
2411     return NULL_RTX;
2412
2413   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2414     return NULL_RTX;
2415   if (!targetm.modes_tieable_p (int_mode, mode))
2416     return NULL_RTX;
2417
2418   src = gen_lowpart (src_int_mode, src);
2419   if (!validate_subreg (int_mode, src_int_mode, src,
2420                         subreg_lowpart_offset (int_mode, src_int_mode)))
2421     return NULL_RTX;
2422
2423   src = convert_modes (int_mode, src_int_mode, src, true);
2424   src = gen_lowpart (mode, src);
2425   return src;
2426 }
2427 \f
2428 /* Add INC into TARGET.  */
2429
2430 void
2431 expand_inc (rtx target, rtx inc)
2432 {
2433   rtx value = expand_binop (GET_MODE (target), add_optab,
2434                             target, inc,
2435                             target, 0, OPTAB_LIB_WIDEN);
2436   if (value != target)
2437     emit_move_insn (target, value);
2438 }
2439
2440 /* Subtract DEC from TARGET.  */
2441
2442 void
2443 expand_dec (rtx target, rtx dec)
2444 {
2445   rtx value = expand_binop (GET_MODE (target), sub_optab,
2446                             target, dec,
2447                             target, 0, OPTAB_LIB_WIDEN);
2448   if (value != target)
2449     emit_move_insn (target, value);
2450 }
2451 \f
2452 /* Output a shift instruction for expression code CODE,
2453    with SHIFTED being the rtx for the value to shift,
2454    and AMOUNT the rtx for the amount to shift by.
2455    Store the result in the rtx TARGET, if that is convenient.
2456    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2457    Return the rtx for where the value is.
2458    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2459    in which case 0 is returned.  */
2460
2461 static rtx
2462 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2463                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2464 {
2465   rtx op1, temp = 0;
2466   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2467   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2468   optab lshift_optab = ashl_optab;
2469   optab rshift_arith_optab = ashr_optab;
2470   optab rshift_uns_optab = lshr_optab;
2471   optab lrotate_optab = rotl_optab;
2472   optab rrotate_optab = rotr_optab;
2473   machine_mode op1_mode;
2474   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2475   int attempt;
2476   bool speed = optimize_insn_for_speed_p ();
2477
2478   op1 = amount;
2479   op1_mode = GET_MODE (op1);
2480
2481   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2482      shift amount is a vector, use the vector/vector shift patterns.  */
2483   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2484     {
2485       lshift_optab = vashl_optab;
2486       rshift_arith_optab = vashr_optab;
2487       rshift_uns_optab = vlshr_optab;
2488       lrotate_optab = vrotl_optab;
2489       rrotate_optab = vrotr_optab;
2490     }
2491
2492   /* Previously detected shift-counts computed by NEGATE_EXPR
2493      and shifted in the other direction; but that does not work
2494      on all machines.  */
2495
2496   if (SHIFT_COUNT_TRUNCATED)
2497     {
2498       if (CONST_INT_P (op1)
2499           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2500               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2501         op1 = gen_int_shift_amount (mode,
2502                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2503                                     % GET_MODE_BITSIZE (scalar_mode));
2504       else if (GET_CODE (op1) == SUBREG
2505                && subreg_lowpart_p (op1)
2506                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2507                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2508         op1 = SUBREG_REG (op1);
2509     }
2510
2511   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2512      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2513      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2514      amount instead.  */
2515   if (rotate
2516       && CONST_INT_P (op1)
2517       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2518                    GET_MODE_BITSIZE (scalar_mode) - 1))
2519     {
2520       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2521                                          - INTVAL (op1)));
2522       left = !left;
2523       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2524     }
2525
2526   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2527      Note that this is not the case for bigger values.  For instance a rotation
2528      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2529      0x04030201 (bswapsi).  */
2530   if (rotate
2531       && CONST_INT_P (op1)
2532       && INTVAL (op1) == BITS_PER_UNIT
2533       && GET_MODE_SIZE (scalar_mode) == 2
2534       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2535     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2536
2537   if (op1 == const0_rtx)
2538     return shifted;
2539
2540   /* Check whether its cheaper to implement a left shift by a constant
2541      bit count by a sequence of additions.  */
2542   if (code == LSHIFT_EXPR
2543       && CONST_INT_P (op1)
2544       && INTVAL (op1) > 0
2545       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2546       && INTVAL (op1) < MAX_BITS_PER_WORD
2547       && (shift_cost (speed, mode, INTVAL (op1))
2548           > INTVAL (op1) * add_cost (speed, mode))
2549       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2550     {
2551       int i;
2552       for (i = 0; i < INTVAL (op1); i++)
2553         {
2554           temp = force_reg (mode, shifted);
2555           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2556                                   unsignedp, OPTAB_LIB_WIDEN);
2557         }
2558       return shifted;
2559     }
2560
2561   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2562     {
2563       enum optab_methods methods;
2564
2565       if (attempt == 0)
2566         methods = OPTAB_DIRECT;
2567       else if (attempt == 1)
2568         methods = OPTAB_WIDEN;
2569       else
2570         methods = OPTAB_LIB_WIDEN;
2571
2572       if (rotate)
2573         {
2574           /* Widening does not work for rotation.  */
2575           if (methods == OPTAB_WIDEN)
2576             continue;
2577           else if (methods == OPTAB_LIB_WIDEN)
2578             {
2579               /* If we have been unable to open-code this by a rotation,
2580                  do it as the IOR of two shifts.  I.e., to rotate A
2581                  by N bits, compute
2582                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2583                  where C is the bitsize of A.
2584
2585                  It is theoretically possible that the target machine might
2586                  not be able to perform either shift and hence we would
2587                  be making two libcalls rather than just the one for the
2588                  shift (similarly if IOR could not be done).  We will allow
2589                  this extremely unlikely lossage to avoid complicating the
2590                  code below.  */
2591
2592               rtx subtarget = target == shifted ? 0 : target;
2593               rtx new_amount, other_amount;
2594               rtx temp1;
2595
2596               new_amount = op1;
2597               if (op1 == const0_rtx)
2598                 return shifted;
2599               else if (CONST_INT_P (op1))
2600                 other_amount = gen_int_shift_amount
2601                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2602               else
2603                 {
2604                   other_amount
2605                     = simplify_gen_unary (NEG, GET_MODE (op1),
2606                                           op1, GET_MODE (op1));
2607                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2608                   other_amount
2609                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2610                                            gen_int_mode (mask, GET_MODE (op1)));
2611                 }
2612
2613               shifted = force_reg (mode, shifted);
2614
2615               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2616                                      mode, shifted, new_amount, 0, 1);
2617               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2618                                       mode, shifted, other_amount,
2619                                       subtarget, 1);
2620               return expand_binop (mode, ior_optab, temp, temp1, target,
2621                                    unsignedp, methods);
2622             }
2623
2624           temp = expand_binop (mode,
2625                                left ? lrotate_optab : rrotate_optab,
2626                                shifted, op1, target, unsignedp, methods);
2627         }
2628       else if (unsignedp)
2629         temp = expand_binop (mode,
2630                              left ? lshift_optab : rshift_uns_optab,
2631                              shifted, op1, target, unsignedp, methods);
2632
2633       /* Do arithmetic shifts.
2634          Also, if we are going to widen the operand, we can just as well
2635          use an arithmetic right-shift instead of a logical one.  */
2636       if (temp == 0 && ! rotate
2637           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2638         {
2639           enum optab_methods methods1 = methods;
2640
2641           /* If trying to widen a log shift to an arithmetic shift,
2642              don't accept an arithmetic shift of the same size.  */
2643           if (unsignedp)
2644             methods1 = OPTAB_MUST_WIDEN;
2645
2646           /* Arithmetic shift */
2647
2648           temp = expand_binop (mode,
2649                                left ? lshift_optab : rshift_arith_optab,
2650                                shifted, op1, target, unsignedp, methods1);
2651         }
2652
2653       /* We used to try extzv here for logical right shifts, but that was
2654          only useful for one machine, the VAX, and caused poor code
2655          generation there for lshrdi3, so the code was deleted and a
2656          define_expand for lshrsi3 was added to vax.md.  */
2657     }
2658
2659   gcc_assert (temp != NULL_RTX || may_fail);
2660   return temp;
2661 }
2662
2663 /* Output a shift instruction for expression code CODE,
2664    with SHIFTED being the rtx for the value to shift,
2665    and AMOUNT the amount to shift by.
2666    Store the result in the rtx TARGET, if that is convenient.
2667    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2668    Return the rtx for where the value is.  */
2669
2670 rtx
2671 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2672               poly_int64 amount, rtx target, int unsignedp)
2673 {
2674   return expand_shift_1 (code, mode, shifted,
2675                          gen_int_shift_amount (mode, amount),
2676                          target, unsignedp);
2677 }
2678
2679 /* Likewise, but return 0 if that cannot be done.  */
2680
2681 static rtx
2682 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2683                     int amount, rtx target, int unsignedp)
2684 {
2685   return expand_shift_1 (code, mode,
2686                          shifted, GEN_INT (amount), target, unsignedp, true);
2687 }
2688
2689 /* Output a shift instruction for expression code CODE,
2690    with SHIFTED being the rtx for the value to shift,
2691    and AMOUNT the tree for the amount to shift by.
2692    Store the result in the rtx TARGET, if that is convenient.
2693    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2694    Return the rtx for where the value is.  */
2695
2696 rtx
2697 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2698                        tree amount, rtx target, int unsignedp)
2699 {
2700   return expand_shift_1 (code, mode,
2701                          shifted, expand_normal (amount), target, unsignedp);
2702 }
2703
2704 \f
2705 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2706                         const struct mult_cost *, machine_mode mode);
2707 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2708                               const struct algorithm *, enum mult_variant);
2709 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2710 static rtx extract_high_half (scalar_int_mode, rtx);
2711 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2712 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2713                                        int, int);
2714 /* Compute and return the best algorithm for multiplying by T.
2715    The algorithm must cost less than cost_limit
2716    If retval.cost >= COST_LIMIT, no algorithm was found and all
2717    other field of the returned struct are undefined.
2718    MODE is the machine mode of the multiplication.  */
2719
2720 static void
2721 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2722             const struct mult_cost *cost_limit, machine_mode mode)
2723 {
2724   int m;
2725   struct algorithm *alg_in, *best_alg;
2726   struct mult_cost best_cost;
2727   struct mult_cost new_limit;
2728   int op_cost, op_latency;
2729   unsigned HOST_WIDE_INT orig_t = t;
2730   unsigned HOST_WIDE_INT q;
2731   int maxm, hash_index;
2732   bool cache_hit = false;
2733   enum alg_code cache_alg = alg_zero;
2734   bool speed = optimize_insn_for_speed_p ();
2735   scalar_int_mode imode;
2736   struct alg_hash_entry *entry_ptr;
2737
2738   /* Indicate that no algorithm is yet found.  If no algorithm
2739      is found, this value will be returned and indicate failure.  */
2740   alg_out->cost.cost = cost_limit->cost + 1;
2741   alg_out->cost.latency = cost_limit->latency + 1;
2742
2743   if (cost_limit->cost < 0
2744       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2745     return;
2746
2747   /* Be prepared for vector modes.  */
2748   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2749
2750   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2751
2752   /* Restrict the bits of "t" to the multiplication's mode.  */
2753   t &= GET_MODE_MASK (imode);
2754
2755   /* t == 1 can be done in zero cost.  */
2756   if (t == 1)
2757     {
2758       alg_out->ops = 1;
2759       alg_out->cost.cost = 0;
2760       alg_out->cost.latency = 0;
2761       alg_out->op[0] = alg_m;
2762       return;
2763     }
2764
2765   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2766      fail now.  */
2767   if (t == 0)
2768     {
2769       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2770         return;
2771       else
2772         {
2773           alg_out->ops = 1;
2774           alg_out->cost.cost = zero_cost (speed);
2775           alg_out->cost.latency = zero_cost (speed);
2776           alg_out->op[0] = alg_zero;
2777           return;
2778         }
2779     }
2780
2781   /* We'll be needing a couple extra algorithm structures now.  */
2782
2783   alg_in = XALLOCA (struct algorithm);
2784   best_alg = XALLOCA (struct algorithm);
2785   best_cost = *cost_limit;
2786
2787   /* Compute the hash index.  */
2788   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2789
2790   /* See if we already know what to do for T.  */
2791   entry_ptr = alg_hash_entry_ptr (hash_index);
2792   if (entry_ptr->t == t
2793       && entry_ptr->mode == mode
2794       && entry_ptr->speed == speed
2795       && entry_ptr->alg != alg_unknown)
2796     {
2797       cache_alg = entry_ptr->alg;
2798
2799       if (cache_alg == alg_impossible)
2800         {
2801           /* The cache tells us that it's impossible to synthesize
2802              multiplication by T within entry_ptr->cost.  */
2803           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2804             /* COST_LIMIT is at least as restrictive as the one
2805                recorded in the hash table, in which case we have no
2806                hope of synthesizing a multiplication.  Just
2807                return.  */
2808             return;
2809
2810           /* If we get here, COST_LIMIT is less restrictive than the
2811              one recorded in the hash table, so we may be able to
2812              synthesize a multiplication.  Proceed as if we didn't
2813              have the cache entry.  */
2814         }
2815       else
2816         {
2817           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2818             /* The cached algorithm shows that this multiplication
2819                requires more cost than COST_LIMIT.  Just return.  This
2820                way, we don't clobber this cache entry with
2821                alg_impossible but retain useful information.  */
2822             return;
2823
2824           cache_hit = true;
2825
2826           switch (cache_alg)
2827             {
2828             case alg_shift:
2829               goto do_alg_shift;
2830
2831             case alg_add_t_m2:
2832             case alg_sub_t_m2:
2833               goto do_alg_addsub_t_m2;
2834
2835             case alg_add_factor:
2836             case alg_sub_factor:
2837               goto do_alg_addsub_factor;
2838
2839             case alg_add_t2_m:
2840               goto do_alg_add_t2_m;
2841
2842             case alg_sub_t2_m:
2843               goto do_alg_sub_t2_m;
2844
2845             default:
2846               gcc_unreachable ();
2847             }
2848         }
2849     }
2850
2851   /* If we have a group of zero bits at the low-order part of T, try
2852      multiplying by the remaining bits and then doing a shift.  */
2853
2854   if ((t & 1) == 0)
2855     {
2856     do_alg_shift:
2857       m = ctz_or_zero (t); /* m = number of low zero bits */
2858       if (m < maxm)
2859         {
2860           q = t >> m;
2861           /* The function expand_shift will choose between a shift and
2862              a sequence of additions, so the observed cost is given as
2863              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2864           op_cost = m * add_cost (speed, mode);
2865           if (shift_cost (speed, mode, m) < op_cost)
2866             op_cost = shift_cost (speed, mode, m);
2867           new_limit.cost = best_cost.cost - op_cost;
2868           new_limit.latency = best_cost.latency - op_cost;
2869           synth_mult (alg_in, q, &new_limit, mode);
2870
2871           alg_in->cost.cost += op_cost;
2872           alg_in->cost.latency += op_cost;
2873           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2874             {
2875               best_cost = alg_in->cost;
2876               std::swap (alg_in, best_alg);
2877               best_alg->log[best_alg->ops] = m;
2878               best_alg->op[best_alg->ops] = alg_shift;
2879             }
2880
2881           /* See if treating ORIG_T as a signed number yields a better
2882              sequence.  Try this sequence only for a negative ORIG_T
2883              as it would be useless for a non-negative ORIG_T.  */
2884           if ((HOST_WIDE_INT) orig_t < 0)
2885             {
2886               /* Shift ORIG_T as follows because a right shift of a
2887                  negative-valued signed type is implementation
2888                  defined.  */
2889               q = ~(~orig_t >> m);
2890               /* The function expand_shift will choose between a shift
2891                  and a sequence of additions, so the observed cost is
2892                  given as MIN (m * add_cost(speed, mode),
2893                  shift_cost(speed, mode, m)).  */
2894               op_cost = m * add_cost (speed, mode);
2895               if (shift_cost (speed, mode, m) < op_cost)
2896                 op_cost = shift_cost (speed, mode, m);
2897               new_limit.cost = best_cost.cost - op_cost;
2898               new_limit.latency = best_cost.latency - op_cost;
2899               synth_mult (alg_in, q, &new_limit, mode);
2900
2901               alg_in->cost.cost += op_cost;
2902               alg_in->cost.latency += op_cost;
2903               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2904                 {
2905                   best_cost = alg_in->cost;
2906                   std::swap (alg_in, best_alg);
2907                   best_alg->log[best_alg->ops] = m;
2908                   best_alg->op[best_alg->ops] = alg_shift;
2909                 }
2910             }
2911         }
2912       if (cache_hit)
2913         goto done;
2914     }
2915
2916   /* If we have an odd number, add or subtract one.  */
2917   if ((t & 1) != 0)
2918     {
2919       unsigned HOST_WIDE_INT w;
2920
2921     do_alg_addsub_t_m2:
2922       for (w = 1; (w & t) != 0; w <<= 1)
2923         ;
2924       /* If T was -1, then W will be zero after the loop.  This is another
2925          case where T ends with ...111.  Handling this with (T + 1) and
2926          subtract 1 produces slightly better code and results in algorithm
2927          selection much faster than treating it like the ...0111 case
2928          below.  */
2929       if (w == 0
2930           || (w > 2
2931               /* Reject the case where t is 3.
2932                  Thus we prefer addition in that case.  */
2933               && t != 3))
2934         {
2935           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2936
2937           op_cost = add_cost (speed, mode);
2938           new_limit.cost = best_cost.cost - op_cost;
2939           new_limit.latency = best_cost.latency - op_cost;
2940           synth_mult (alg_in, t + 1, &new_limit, mode);
2941
2942           alg_in->cost.cost += op_cost;
2943           alg_in->cost.latency += op_cost;
2944           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2945             {
2946               best_cost = alg_in->cost;
2947               std::swap (alg_in, best_alg);
2948               best_alg->log[best_alg->ops] = 0;
2949               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2950             }
2951         }
2952       else
2953         {
2954           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2955
2956           op_cost = add_cost (speed, mode);
2957           new_limit.cost = best_cost.cost - op_cost;
2958           new_limit.latency = best_cost.latency - op_cost;
2959           synth_mult (alg_in, t - 1, &new_limit, mode);
2960
2961           alg_in->cost.cost += op_cost;
2962           alg_in->cost.latency += op_cost;
2963           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2964             {
2965               best_cost = alg_in->cost;
2966               std::swap (alg_in, best_alg);
2967               best_alg->log[best_alg->ops] = 0;
2968               best_alg->op[best_alg->ops] = alg_add_t_m2;
2969             }
2970         }
2971
2972       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2973          quickly with a - a * n for some appropriate constant n.  */
2974       m = exact_log2 (-orig_t + 1);
2975       if (m >= 0 && m < maxm)
2976         {
2977           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2978           /* If the target has a cheap shift-and-subtract insn use
2979              that in preference to a shift insn followed by a sub insn.
2980              Assume that the shift-and-sub is "atomic" with a latency
2981              equal to it's cost, otherwise assume that on superscalar
2982              hardware the shift may be executed concurrently with the
2983              earlier steps in the algorithm.  */
2984           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2985             {
2986               op_cost = shiftsub1_cost (speed, mode, m);
2987               op_latency = op_cost;
2988             }
2989           else
2990             op_latency = add_cost (speed, mode);
2991
2992           new_limit.cost = best_cost.cost - op_cost;
2993           new_limit.latency = best_cost.latency - op_latency;
2994           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2995                       &new_limit, mode);
2996
2997           alg_in->cost.cost += op_cost;
2998           alg_in->cost.latency += op_latency;
2999           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3000             {
3001               best_cost = alg_in->cost;
3002               std::swap (alg_in, best_alg);
3003               best_alg->log[best_alg->ops] = m;
3004               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3005             }
3006         }
3007
3008       if (cache_hit)
3009         goto done;
3010     }
3011
3012   /* Look for factors of t of the form
3013      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3014      If we find such a factor, we can multiply by t using an algorithm that
3015      multiplies by q, shift the result by m and add/subtract it to itself.
3016
3017      We search for large factors first and loop down, even if large factors
3018      are less probable than small; if we find a large factor we will find a
3019      good sequence quickly, and therefore be able to prune (by decreasing
3020      COST_LIMIT) the search.  */
3021
3022  do_alg_addsub_factor:
3023   for (m = floor_log2 (t - 1); m >= 2; m--)
3024     {
3025       unsigned HOST_WIDE_INT d;
3026
3027       d = (HOST_WIDE_INT_1U << m) + 1;
3028       if (t % d == 0 && t > d && m < maxm
3029           && (!cache_hit || cache_alg == alg_add_factor))
3030         {
3031           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3032           if (shiftadd_cost (speed, mode, m) <= op_cost)
3033             op_cost = shiftadd_cost (speed, mode, m);
3034
3035           op_latency = op_cost;
3036
3037
3038           new_limit.cost = best_cost.cost - op_cost;
3039           new_limit.latency = best_cost.latency - op_latency;
3040           synth_mult (alg_in, t / d, &new_limit, mode);
3041
3042           alg_in->cost.cost += op_cost;
3043           alg_in->cost.latency += op_latency;
3044           if (alg_in->cost.latency < op_cost)
3045             alg_in->cost.latency = op_cost;
3046           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3047             {
3048               best_cost = alg_in->cost;
3049               std::swap (alg_in, best_alg);
3050               best_alg->log[best_alg->ops] = m;
3051               best_alg->op[best_alg->ops] = alg_add_factor;
3052             }
3053           /* Other factors will have been taken care of in the recursion.  */
3054           break;
3055         }
3056
3057       d = (HOST_WIDE_INT_1U << m) - 1;
3058       if (t % d == 0 && t > d && m < maxm
3059           && (!cache_hit || cache_alg == alg_sub_factor))
3060         {
3061           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3062           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3063             op_cost = shiftsub0_cost (speed, mode, m);
3064
3065           op_latency = op_cost;
3066
3067           new_limit.cost = best_cost.cost - op_cost;
3068           new_limit.latency = best_cost.latency - op_latency;
3069           synth_mult (alg_in, t / d, &new_limit, mode);
3070
3071           alg_in->cost.cost += op_cost;
3072           alg_in->cost.latency += op_latency;
3073           if (alg_in->cost.latency < op_cost)
3074             alg_in->cost.latency = op_cost;
3075           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3076             {
3077               best_cost = alg_in->cost;
3078               std::swap (alg_in, best_alg);
3079               best_alg->log[best_alg->ops] = m;
3080               best_alg->op[best_alg->ops] = alg_sub_factor;
3081             }
3082           break;
3083         }
3084     }
3085   if (cache_hit)
3086     goto done;
3087
3088   /* Try shift-and-add (load effective address) instructions,
3089      i.e. do a*3, a*5, a*9.  */
3090   if ((t & 1) != 0)
3091     {
3092     do_alg_add_t2_m:
3093       q = t - 1;
3094       m = ctz_hwi (q);
3095       if (q && m < maxm)
3096         {
3097           op_cost = shiftadd_cost (speed, mode, m);
3098           new_limit.cost = best_cost.cost - op_cost;
3099           new_limit.latency = best_cost.latency - op_cost;
3100           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3101
3102           alg_in->cost.cost += op_cost;
3103           alg_in->cost.latency += op_cost;
3104           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3105             {
3106               best_cost = alg_in->cost;
3107               std::swap (alg_in, best_alg);
3108               best_alg->log[best_alg->ops] = m;
3109               best_alg->op[best_alg->ops] = alg_add_t2_m;
3110             }
3111         }
3112       if (cache_hit)
3113         goto done;
3114
3115     do_alg_sub_t2_m:
3116       q = t + 1;
3117       m = ctz_hwi (q);
3118       if (q && m < maxm)
3119         {
3120           op_cost = shiftsub0_cost (speed, mode, m);
3121           new_limit.cost = best_cost.cost - op_cost;
3122           new_limit.latency = best_cost.latency - op_cost;
3123           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3124
3125           alg_in->cost.cost += op_cost;
3126           alg_in->cost.latency += op_cost;
3127           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3128             {
3129               best_cost = alg_in->cost;
3130               std::swap (alg_in, best_alg);
3131               best_alg->log[best_alg->ops] = m;
3132               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3133             }
3134         }
3135       if (cache_hit)
3136         goto done;
3137     }
3138
3139  done:
3140   /* If best_cost has not decreased, we have not found any algorithm.  */
3141   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3142     {
3143       /* We failed to find an algorithm.  Record alg_impossible for
3144          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3145          we are asked to find an algorithm for T within the same or
3146          lower COST_LIMIT, we can immediately return to the
3147          caller.  */
3148       entry_ptr->t = t;
3149       entry_ptr->mode = mode;
3150       entry_ptr->speed = speed;
3151       entry_ptr->alg = alg_impossible;
3152       entry_ptr->cost = *cost_limit;
3153       return;
3154     }
3155
3156   /* Cache the result.  */
3157   if (!cache_hit)
3158     {
3159       entry_ptr->t = t;
3160       entry_ptr->mode = mode;
3161       entry_ptr->speed = speed;
3162       entry_ptr->alg = best_alg->op[best_alg->ops];
3163       entry_ptr->cost.cost = best_cost.cost;
3164       entry_ptr->cost.latency = best_cost.latency;
3165     }
3166
3167   /* If we are getting a too long sequence for `struct algorithm'
3168      to record, make this search fail.  */
3169   if (best_alg->ops == MAX_BITS_PER_WORD)
3170     return;
3171
3172   /* Copy the algorithm from temporary space to the space at alg_out.
3173      We avoid using structure assignment because the majority of
3174      best_alg is normally undefined, and this is a critical function.  */
3175   alg_out->ops = best_alg->ops + 1;
3176   alg_out->cost = best_cost;
3177   memcpy (alg_out->op, best_alg->op,
3178           alg_out->ops * sizeof *alg_out->op);
3179   memcpy (alg_out->log, best_alg->log,
3180           alg_out->ops * sizeof *alg_out->log);
3181 }
3182 \f
3183 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3184    Try three variations:
3185
3186        - a shift/add sequence based on VAL itself
3187        - a shift/add sequence based on -VAL, followed by a negation
3188        - a shift/add sequence based on VAL - 1, followed by an addition.
3189
3190    Return true if the cheapest of these cost less than MULT_COST,
3191    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3192
3193 bool
3194 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3195                      struct algorithm *alg, enum mult_variant *variant,
3196                      int mult_cost)
3197 {
3198   struct algorithm alg2;
3199   struct mult_cost limit;
3200   int op_cost;
3201   bool speed = optimize_insn_for_speed_p ();
3202
3203   /* Fail quickly for impossible bounds.  */
3204   if (mult_cost < 0)
3205     return false;
3206
3207   /* Ensure that mult_cost provides a reasonable upper bound.
3208      Any constant multiplication can be performed with less
3209      than 2 * bits additions.  */
3210   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3211   if (mult_cost > op_cost)
3212     mult_cost = op_cost;
3213
3214   *variant = basic_variant;
3215   limit.cost = mult_cost;
3216   limit.latency = mult_cost;
3217   synth_mult (alg, val, &limit, mode);
3218
3219   /* This works only if the inverted value actually fits in an
3220      `unsigned int' */
3221   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3222     {
3223       op_cost = neg_cost (speed, mode);
3224       if (MULT_COST_LESS (&alg->cost, mult_cost))
3225         {
3226           limit.cost = alg->cost.cost - op_cost;
3227           limit.latency = alg->cost.latency - op_cost;
3228         }
3229       else
3230         {
3231           limit.cost = mult_cost - op_cost;
3232           limit.latency = mult_cost - op_cost;
3233         }
3234
3235       synth_mult (&alg2, -val, &limit, mode);
3236       alg2.cost.cost += op_cost;
3237       alg2.cost.latency += op_cost;
3238       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3239         *alg = alg2, *variant = negate_variant;
3240     }
3241
3242   /* This proves very useful for division-by-constant.  */
3243   op_cost = add_cost (speed, mode);
3244   if (MULT_COST_LESS (&alg->cost, mult_cost))
3245     {
3246       limit.cost = alg->cost.cost - op_cost;
3247       limit.latency = alg->cost.latency - op_cost;
3248     }
3249   else
3250     {
3251       limit.cost = mult_cost - op_cost;
3252       limit.latency = mult_cost - op_cost;
3253     }
3254
3255   synth_mult (&alg2, val - 1, &limit, mode);
3256   alg2.cost.cost += op_cost;
3257   alg2.cost.latency += op_cost;
3258   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3259     *alg = alg2, *variant = add_variant;
3260
3261   return MULT_COST_LESS (&alg->cost, mult_cost);
3262 }
3263
3264 /* A subroutine of expand_mult, used for constant multiplications.
3265    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3266    convenient.  Use the shift/add sequence described by ALG and apply
3267    the final fixup specified by VARIANT.  */
3268
3269 static rtx
3270 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3271                    rtx target, const struct algorithm *alg,
3272                    enum mult_variant variant)
3273 {
3274   unsigned HOST_WIDE_INT val_so_far;
3275   rtx_insn *insn;
3276   rtx accum, tem;
3277   int opno;
3278   machine_mode nmode;
3279
3280   /* Avoid referencing memory over and over and invalid sharing
3281      on SUBREGs.  */
3282   op0 = force_reg (mode, op0);
3283
3284   /* ACCUM starts out either as OP0 or as a zero, depending on
3285      the first operation.  */
3286
3287   if (alg->op[0] == alg_zero)
3288     {
3289       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3290       val_so_far = 0;
3291     }
3292   else if (alg->op[0] == alg_m)
3293     {
3294       accum = copy_to_mode_reg (mode, op0);
3295       val_so_far = 1;
3296     }
3297   else
3298     gcc_unreachable ();
3299
3300   for (opno = 1; opno < alg->ops; opno++)
3301     {
3302       int log = alg->log[opno];
3303       rtx shift_subtarget = optimize ? 0 : accum;
3304       rtx add_target
3305         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3306            && !optimize)
3307           ? target : 0;
3308       rtx accum_target = optimize ? 0 : accum;
3309       rtx accum_inner;
3310
3311       switch (alg->op[opno])
3312         {
3313         case alg_shift:
3314           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3315           /* REG_EQUAL note will be attached to the following insn.  */
3316           emit_move_insn (accum, tem);
3317           val_so_far <<= log;
3318           break;
3319
3320         case alg_add_t_m2:
3321           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3322           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3323                                  add_target ? add_target : accum_target);
3324           val_so_far += HOST_WIDE_INT_1U << log;
3325           break;
3326
3327         case alg_sub_t_m2:
3328           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3329           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3330                                  add_target ? add_target : accum_target);
3331           val_so_far -= HOST_WIDE_INT_1U << log;
3332           break;
3333
3334         case alg_add_t2_m:
3335           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3336                                 log, shift_subtarget, 0);
3337           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3338                                  add_target ? add_target : accum_target);
3339           val_so_far = (val_so_far << log) + 1;
3340           break;
3341
3342         case alg_sub_t2_m:
3343           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3344                                 log, shift_subtarget, 0);
3345           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3346                                  add_target ? add_target : accum_target);
3347           val_so_far = (val_so_far << log) - 1;
3348           break;
3349
3350         case alg_add_factor:
3351           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3352           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3353                                  add_target ? add_target : accum_target);
3354           val_so_far += val_so_far << log;
3355           break;
3356
3357         case alg_sub_factor:
3358           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3359           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3360                                  (add_target
3361                                   ? add_target : (optimize ? 0 : tem)));
3362           val_so_far = (val_so_far << log) - val_so_far;
3363           break;
3364
3365         default:
3366           gcc_unreachable ();
3367         }
3368
3369       if (SCALAR_INT_MODE_P (mode))
3370         {
3371           /* Write a REG_EQUAL note on the last insn so that we can cse
3372              multiplication sequences.  Note that if ACCUM is a SUBREG,
3373              we've set the inner register and must properly indicate that.  */
3374           tem = op0, nmode = mode;
3375           accum_inner = accum;
3376           if (GET_CODE (accum) == SUBREG)
3377             {
3378               accum_inner = SUBREG_REG (accum);
3379               nmode = GET_MODE (accum_inner);
3380               tem = gen_lowpart (nmode, op0);
3381             }
3382
3383           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3384              In that case, only the low bits of accum would be guaranteed to
3385              be equal to the content of the REG_EQUAL note, the upper bits
3386              can be anything.  */
3387           if (!paradoxical_subreg_p (tem))
3388             {
3389               insn = get_last_insn ();
3390               wide_int wval_so_far
3391                 = wi::uhwi (val_so_far,
3392                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3393               rtx c = immed_wide_int_const (wval_so_far, nmode);
3394               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3395                                 accum_inner);
3396             }
3397         }
3398     }
3399
3400   if (variant == negate_variant)
3401     {
3402       val_so_far = -val_so_far;
3403       accum = expand_unop (mode, neg_optab, accum, target, 0);
3404     }
3405   else if (variant == add_variant)
3406     {
3407       val_so_far = val_so_far + 1;
3408       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3409     }
3410
3411   /* Compare only the bits of val and val_so_far that are significant
3412      in the result mode, to avoid sign-/zero-extension confusion.  */
3413   nmode = GET_MODE_INNER (mode);
3414   val &= GET_MODE_MASK (nmode);
3415   val_so_far &= GET_MODE_MASK (nmode);
3416   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3417
3418   return accum;
3419 }
3420
3421 /* Perform a multiplication and return an rtx for the result.
3422    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3423    TARGET is a suggestion for where to store the result (an rtx).
3424
3425    We check specially for a constant integer as OP1.
3426    If you want this check for OP0 as well, then before calling
3427    you should swap the two operands if OP0 would be constant.  */
3428
3429 rtx
3430 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3431              int unsignedp, bool no_libcall)
3432 {
3433   enum mult_variant variant;
3434   struct algorithm algorithm;
3435   rtx scalar_op1;
3436   int max_cost;
3437   bool speed = optimize_insn_for_speed_p ();
3438   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3439
3440   if (CONSTANT_P (op0))
3441     std::swap (op0, op1);
3442
3443   /* For vectors, there are several simplifications that can be made if
3444      all elements of the vector constant are identical.  */
3445   scalar_op1 = unwrap_const_vec_duplicate (op1);
3446
3447   if (INTEGRAL_MODE_P (mode))
3448     {
3449       rtx fake_reg;
3450       HOST_WIDE_INT coeff;
3451       bool is_neg;
3452       int mode_bitsize;
3453
3454       if (op1 == CONST0_RTX (mode))
3455         return op1;
3456       if (op1 == CONST1_RTX (mode))
3457         return op0;
3458       if (op1 == CONSTM1_RTX (mode))
3459         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3460                             op0, target, 0);
3461
3462       if (do_trapv)
3463         goto skip_synth;
3464
3465       /* If mode is integer vector mode, check if the backend supports
3466          vector lshift (by scalar or vector) at all.  If not, we can't use
3467          synthetized multiply.  */
3468       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3469           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3470           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3471         goto skip_synth;
3472
3473       /* These are the operations that are potentially turned into
3474          a sequence of shifts and additions.  */
3475       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3476
3477       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3478          less than or equal in size to `unsigned int' this doesn't matter.
3479          If the mode is larger than `unsigned int', then synth_mult works
3480          only if the constant value exactly fits in an `unsigned int' without
3481          any truncation.  This means that multiplying by negative values does
3482          not work; results are off by 2^32 on a 32 bit machine.  */
3483       if (CONST_INT_P (scalar_op1))
3484         {
3485           coeff = INTVAL (scalar_op1);
3486           is_neg = coeff < 0;
3487         }
3488 #if TARGET_SUPPORTS_WIDE_INT
3489       else if (CONST_WIDE_INT_P (scalar_op1))
3490 #else
3491       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3492 #endif
3493         {
3494           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3495           /* Perfect power of 2 (other than 1, which is handled above).  */
3496           if (shift > 0)
3497             return expand_shift (LSHIFT_EXPR, mode, op0,
3498                                  shift, target, unsignedp);
3499           else
3500             goto skip_synth;
3501         }
3502       else
3503         goto skip_synth;
3504
3505       /* We used to test optimize here, on the grounds that it's better to
3506          produce a smaller program when -O is not used.  But this causes
3507          such a terrible slowdown sometimes that it seems better to always
3508          use synth_mult.  */
3509
3510       /* Special case powers of two.  */
3511       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3512           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3513         return expand_shift (LSHIFT_EXPR, mode, op0,
3514                              floor_log2 (coeff), target, unsignedp);
3515
3516       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3517
3518       /* Attempt to handle multiplication of DImode values by negative
3519          coefficients, by performing the multiplication by a positive
3520          multiplier and then inverting the result.  */
3521       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3522         {
3523           /* Its safe to use -coeff even for INT_MIN, as the
3524              result is interpreted as an unsigned coefficient.
3525              Exclude cost of op0 from max_cost to match the cost
3526              calculation of the synth_mult.  */
3527           coeff = -(unsigned HOST_WIDE_INT) coeff;
3528           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3529                                     mode, speed)
3530                       - neg_cost (speed, mode));
3531           if (max_cost <= 0)
3532             goto skip_synth;
3533
3534           /* Special case powers of two.  */
3535           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3536             {
3537               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3538                                        floor_log2 (coeff), target, unsignedp);
3539               return expand_unop (mode, neg_optab, temp, target, 0);
3540             }
3541
3542           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3543                                    max_cost))
3544             {
3545               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3546                                             &algorithm, variant);
3547               return expand_unop (mode, neg_optab, temp, target, 0);
3548             }
3549           goto skip_synth;
3550         }
3551
3552       /* Exclude cost of op0 from max_cost to match the cost
3553          calculation of the synth_mult.  */
3554       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3555       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3556         return expand_mult_const (mode, op0, coeff, target,
3557                                   &algorithm, variant);
3558     }
3559  skip_synth:
3560
3561   /* Expand x*2.0 as x+x.  */
3562   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3563       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3564     {
3565       op0 = force_reg (GET_MODE (op0), op0);
3566       return expand_binop (mode, add_optab, op0, op0,
3567                            target, unsignedp,
3568                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3569     }
3570
3571   /* This used to use umul_optab if unsigned, but for non-widening multiply
3572      there is no difference between signed and unsigned.  */
3573   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3574                       op0, op1, target, unsignedp,
3575                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3576   gcc_assert (op0 || no_libcall);
3577   return op0;
3578 }
3579
3580 /* Return a cost estimate for multiplying a register by the given
3581    COEFFicient in the given MODE and SPEED.  */
3582
3583 int
3584 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3585 {
3586   int max_cost;
3587   struct algorithm algorithm;
3588   enum mult_variant variant;
3589
3590   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3591   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3592                            mode, speed);
3593   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3594     return algorithm.cost.cost;
3595   else
3596     return max_cost;
3597 }
3598
3599 /* Perform a widening multiplication and return an rtx for the result.
3600    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3601    TARGET is a suggestion for where to store the result (an rtx).
3602    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3603    or smul_widen_optab.
3604
3605    We check specially for a constant integer as OP1, comparing the
3606    cost of a widening multiply against the cost of a sequence of shifts
3607    and adds.  */
3608
3609 rtx
3610 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3611                       int unsignedp, optab this_optab)
3612 {
3613   bool speed = optimize_insn_for_speed_p ();
3614   rtx cop1;
3615
3616   if (CONST_INT_P (op1)
3617       && GET_MODE (op0) != VOIDmode
3618       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3619                                 this_optab == umul_widen_optab))
3620       && CONST_INT_P (cop1)
3621       && (INTVAL (cop1) >= 0
3622           || HWI_COMPUTABLE_MODE_P (mode)))
3623     {
3624       HOST_WIDE_INT coeff = INTVAL (cop1);
3625       int max_cost;
3626       enum mult_variant variant;
3627       struct algorithm algorithm;
3628
3629       if (coeff == 0)
3630         return CONST0_RTX (mode);
3631
3632       /* Special case powers of two.  */
3633       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3634         {
3635           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3636           return expand_shift (LSHIFT_EXPR, mode, op0,
3637                                floor_log2 (coeff), target, unsignedp);
3638         }
3639
3640       /* Exclude cost of op0 from max_cost to match the cost
3641          calculation of the synth_mult.  */
3642       max_cost = mul_widen_cost (speed, mode);
3643       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3644                                max_cost))
3645         {
3646           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3647           return expand_mult_const (mode, op0, coeff, target,
3648                                     &algorithm, variant);
3649         }
3650     }
3651   return expand_binop (mode, this_optab, op0, op1, target,
3652                        unsignedp, OPTAB_LIB_WIDEN);
3653 }
3654 \f
3655 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3656    replace division by D, and put the least significant N bits of the result
3657    in *MULTIPLIER_PTR and return the most significant bit.
3658
3659    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3660    needed precision is in PRECISION (should be <= N).
3661
3662    PRECISION should be as small as possible so this function can choose
3663    multiplier more freely.
3664
3665    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3666    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3667
3668    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3669    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3670
3671 unsigned HOST_WIDE_INT
3672 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3673                    unsigned HOST_WIDE_INT *multiplier_ptr,
3674                    int *post_shift_ptr, int *lgup_ptr)
3675 {
3676   int lgup, post_shift;
3677   int pow, pow2;
3678
3679   /* lgup = ceil(log2(divisor)); */
3680   lgup = ceil_log2 (d);
3681
3682   gcc_assert (lgup <= n);
3683
3684   pow = n + lgup;
3685   pow2 = n + lgup - precision;
3686
3687   /* mlow = 2^(N + lgup)/d */
3688   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3689   wide_int mlow = wi::udiv_trunc (val, d);
3690
3691   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3692   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3693   wide_int mhigh = wi::udiv_trunc (val, d);
3694
3695   /* If precision == N, then mlow, mhigh exceed 2^N
3696      (but they do not exceed 2^(N+1)).  */
3697
3698   /* Reduce to lowest terms.  */
3699   for (post_shift = lgup; post_shift > 0; post_shift--)
3700     {
3701       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3702                                                        HOST_BITS_PER_WIDE_INT);
3703       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3704                                                        HOST_BITS_PER_WIDE_INT);
3705       if (ml_lo >= mh_lo)
3706         break;
3707
3708       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3709       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3710     }
3711
3712   *post_shift_ptr = post_shift;
3713   *lgup_ptr = lgup;
3714   if (n < HOST_BITS_PER_WIDE_INT)
3715     {
3716       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3717       *multiplier_ptr = mhigh.to_uhwi () & mask;
3718       return mhigh.to_uhwi () > mask;
3719     }
3720   else
3721     {
3722       *multiplier_ptr = mhigh.to_uhwi ();
3723       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3724     }
3725 }
3726
3727 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3728    congruent to 1 (mod 2**N).  */
3729
3730 static unsigned HOST_WIDE_INT
3731 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3732 {
3733   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3734
3735   /* The algorithm notes that the choice y = x satisfies
3736      x*y == 1 mod 2^3, since x is assumed odd.
3737      Each iteration doubles the number of bits of significance in y.  */
3738
3739   unsigned HOST_WIDE_INT mask;
3740   unsigned HOST_WIDE_INT y = x;
3741   int nbit = 3;
3742
3743   mask = (n == HOST_BITS_PER_WIDE_INT
3744           ? HOST_WIDE_INT_M1U
3745           : (HOST_WIDE_INT_1U << n) - 1);
3746
3747   while (nbit < n)
3748     {
3749       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3750       nbit *= 2;
3751     }
3752   return y;
3753 }
3754
3755 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3756    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3757    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3758    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3759    become signed.
3760
3761    The result is put in TARGET if that is convenient.
3762
3763    MODE is the mode of operation.  */
3764
3765 rtx
3766 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3767                              rtx op1, rtx target, int unsignedp)
3768 {
3769   rtx tem;
3770   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3771
3772   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3773                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3774   tem = expand_and (mode, tem, op1, NULL_RTX);
3775   adj_operand
3776     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3777                      adj_operand);
3778
3779   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3780                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3781   tem = expand_and (mode, tem, op0, NULL_RTX);
3782   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3783                           target);
3784
3785   return target;
3786 }
3787
3788 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3789
3790 static rtx
3791 extract_high_half (scalar_int_mode mode, rtx op)
3792 {
3793   if (mode == word_mode)
3794     return gen_highpart (mode, op);
3795
3796   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3797
3798   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3799                      GET_MODE_BITSIZE (mode), 0, 1);
3800   return convert_modes (mode, wider_mode, op, 0);
3801 }
3802
3803 /* Like expmed_mult_highpart, but only consider using a multiplication
3804    optab.  OP1 is an rtx for the constant operand.  */
3805
3806 static rtx
3807 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3808                             rtx target, int unsignedp, int max_cost)
3809 {
3810   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3811   optab moptab;
3812   rtx tem;
3813   int size;
3814   bool speed = optimize_insn_for_speed_p ();
3815
3816   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3817
3818   size = GET_MODE_BITSIZE (mode);
3819
3820   /* Firstly, try using a multiplication insn that only generates the needed
3821      high part of the product, and in the sign flavor of unsignedp.  */
3822   if (mul_highpart_cost (speed, mode) < max_cost)
3823     {
3824       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3825       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3826                           unsignedp, OPTAB_DIRECT);
3827       if (tem)
3828         return tem;
3829     }
3830
3831   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3832      Need to adjust the result after the multiplication.  */
3833   if (size - 1 < BITS_PER_WORD
3834       && (mul_highpart_cost (speed, mode)
3835           + 2 * shift_cost (speed, mode, size-1)
3836           + 4 * add_cost (speed, mode) < max_cost))
3837     {
3838       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3839       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3840                           unsignedp, OPTAB_DIRECT);
3841       if (tem)
3842         /* We used the wrong signedness.  Adjust the result.  */
3843         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3844                                             tem, unsignedp);
3845     }
3846
3847   /* Try widening multiplication.  */
3848   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3849   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3850       && mul_widen_cost (speed, wider_mode) < max_cost)
3851     {
3852       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3853                           unsignedp, OPTAB_WIDEN);
3854       if (tem)
3855         return extract_high_half (mode, tem);
3856     }
3857
3858   /* Try widening the mode and perform a non-widening multiplication.  */
3859   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3860       && size - 1 < BITS_PER_WORD
3861       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3862           < max_cost))
3863     {
3864       rtx_insn *insns;
3865       rtx wop0, wop1;
3866
3867       /* We need to widen the operands, for example to ensure the
3868          constant multiplier is correctly sign or zero extended.
3869          Use a sequence to clean-up any instructions emitted by
3870          the conversions if things don't work out.  */
3871       start_sequence ();
3872       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3873       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3874       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3875                           unsignedp, OPTAB_WIDEN);
3876       insns = get_insns ();
3877       end_sequence ();
3878
3879       if (tem)
3880         {
3881           emit_insn (insns);
3882           return extract_high_half (mode, tem);
3883         }
3884     }
3885
3886   /* Try widening multiplication of opposite signedness, and adjust.  */
3887   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3888   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3889       && size - 1 < BITS_PER_WORD
3890       && (mul_widen_cost (speed, wider_mode)
3891           + 2 * shift_cost (speed, mode, size-1)
3892           + 4 * add_cost (speed, mode) < max_cost))
3893     {
3894       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3895                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3896       if (tem != 0)
3897         {
3898           tem = extract_high_half (mode, tem);
3899           /* We used the wrong signedness.  Adjust the result.  */
3900           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3901                                               target, unsignedp);
3902         }
3903     }
3904
3905   return 0;
3906 }
3907
3908 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3909    putting the high half of the result in TARGET if that is convenient,
3910    and return where the result is.  If the operation cannot be performed,
3911    0 is returned.
3912
3913    MODE is the mode of operation and result.
3914
3915    UNSIGNEDP nonzero means unsigned multiply.
3916
3917    MAX_COST is the total allowed cost for the expanded RTL.  */
3918
3919 static rtx
3920 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3921                       rtx target, int unsignedp, int max_cost)
3922 {
3923   unsigned HOST_WIDE_INT cnst1;
3924   int extra_cost;
3925   bool sign_adjust = false;
3926   enum mult_variant variant;
3927   struct algorithm alg;
3928   rtx tem;
3929   bool speed = optimize_insn_for_speed_p ();
3930
3931   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3932   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3933
3934   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3935
3936   /* We can't optimize modes wider than BITS_PER_WORD.
3937      ??? We might be able to perform double-word arithmetic if
3938      mode == word_mode, however all the cost calculations in
3939      synth_mult etc. assume single-word operations.  */
3940   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3941   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3942     return expmed_mult_highpart_optab (mode, op0, op1, target,
3943                                        unsignedp, max_cost);
3944
3945   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3946
3947   /* Check whether we try to multiply by a negative constant.  */
3948   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3949     {
3950       sign_adjust = true;
3951       extra_cost += add_cost (speed, mode);
3952     }
3953
3954   /* See whether shift/add multiplication is cheap enough.  */
3955   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3956                            max_cost - extra_cost))
3957     {
3958       /* See whether the specialized multiplication optabs are
3959          cheaper than the shift/add version.  */
3960       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3961                                         alg.cost.cost + extra_cost);
3962       if (tem)
3963         return tem;
3964
3965       tem = convert_to_mode (wider_mode, op0, unsignedp);
3966       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3967       tem = extract_high_half (mode, tem);
3968
3969       /* Adjust result for signedness.  */
3970       if (sign_adjust)
3971         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3972
3973       return tem;
3974     }
3975   return expmed_mult_highpart_optab (mode, op0, op1, target,
3976                                      unsignedp, max_cost);
3977 }
3978
3979
3980 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3981
3982 static rtx
3983 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3984 {
3985   rtx result, temp, shift;
3986   rtx_code_label *label;
3987   int logd;
3988   int prec = GET_MODE_PRECISION (mode);
3989
3990   logd = floor_log2 (d);
3991   result = gen_reg_rtx (mode);
3992
3993   /* Avoid conditional branches when they're expensive.  */
3994   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3995       && optimize_insn_for_speed_p ())
3996     {
3997       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3998                                       mode, 0, -1);
3999       if (signmask)
4000         {
4001           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4002           signmask = force_reg (mode, signmask);
4003           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4004
4005           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4006              which instruction sequence to use.  If logical right shifts
4007              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4008              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4009
4010           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4011           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4012               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4013                   > COSTS_N_INSNS (2)))
4014             {
4015               temp = expand_binop (mode, xor_optab, op0, signmask,
4016                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017               temp = expand_binop (mode, sub_optab, temp, signmask,
4018                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4019               temp = expand_binop (mode, and_optab, temp,
4020                                    gen_int_mode (masklow, mode),
4021                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4022               temp = expand_binop (mode, xor_optab, temp, signmask,
4023                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4024               temp = expand_binop (mode, sub_optab, temp, signmask,
4025                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4026             }
4027           else
4028             {
4029               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4030                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4031               signmask = force_reg (mode, signmask);
4032
4033               temp = expand_binop (mode, add_optab, op0, signmask,
4034                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4035               temp = expand_binop (mode, and_optab, temp,
4036                                    gen_int_mode (masklow, mode),
4037                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4038               temp = expand_binop (mode, sub_optab, temp, signmask,
4039                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4040             }
4041           return temp;
4042         }
4043     }
4044
4045   /* Mask contains the mode's signbit and the significant bits of the
4046      modulus.  By including the signbit in the operation, many targets
4047      can avoid an explicit compare operation in the following comparison
4048      against zero.  */
4049   wide_int mask = wi::mask (logd, false, prec);
4050   mask = wi::set_bit (mask, prec - 1);
4051
4052   temp = expand_binop (mode, and_optab, op0,
4053                        immed_wide_int_const (mask, mode),
4054                        result, 1, OPTAB_LIB_WIDEN);
4055   if (temp != result)
4056     emit_move_insn (result, temp);
4057
4058   label = gen_label_rtx ();
4059   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4060
4061   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4062                        0, OPTAB_LIB_WIDEN);
4063
4064   mask = wi::mask (logd, true, prec);
4065   temp = expand_binop (mode, ior_optab, temp,
4066                        immed_wide_int_const (mask, mode),
4067                        result, 1, OPTAB_LIB_WIDEN);
4068   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4069                        0, OPTAB_LIB_WIDEN);
4070   if (temp != result)
4071     emit_move_insn (result, temp);
4072   emit_label (label);
4073   return result;
4074 }
4075
4076 /* Expand signed division of OP0 by a power of two D in mode MODE.
4077    This routine is only called for positive values of D.  */
4078
4079 static rtx
4080 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4081 {
4082   rtx temp;
4083   rtx_code_label *label;
4084   int logd;
4085
4086   logd = floor_log2 (d);
4087
4088   if (d == 2
4089       && BRANCH_COST (optimize_insn_for_speed_p (),
4090                       false) >= 1)
4091     {
4092       temp = gen_reg_rtx (mode);
4093       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4094       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4095                            0, OPTAB_LIB_WIDEN);
4096       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4097     }
4098
4099   if (HAVE_conditional_move
4100       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4101     {
4102       rtx temp2;
4103
4104       start_sequence ();
4105       temp2 = copy_to_mode_reg (mode, op0);
4106       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4107                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4108       temp = force_reg (mode, temp);
4109
4110       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4111       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4112                                      mode, temp, temp2, mode, 0);
4113       if (temp2)
4114         {
4115           rtx_insn *seq = get_insns ();
4116           end_sequence ();
4117           emit_insn (seq);
4118           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4119         }
4120       end_sequence ();
4121     }
4122
4123   if (BRANCH_COST (optimize_insn_for_speed_p (),
4124                    false) >= 2)
4125     {
4126       int ushift = GET_MODE_BITSIZE (mode) - logd;
4127
4128       temp = gen_reg_rtx (mode);
4129       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4130       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4131           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4132              > COSTS_N_INSNS (1))
4133         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4134                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
4135       else
4136         temp = expand_shift (RSHIFT_EXPR, mode, temp,
4137                              ushift, NULL_RTX, 1);
4138       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4139                            0, OPTAB_LIB_WIDEN);
4140       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4141     }
4142
4143   label = gen_label_rtx ();
4144   temp = copy_to_mode_reg (mode, op0);
4145   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4146   expand_inc (temp, gen_int_mode (d - 1, mode));
4147   emit_label (label);
4148   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4149 }
4150 \f
4151 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4152    if that is convenient, and returning where the result is.
4153    You may request either the quotient or the remainder as the result;
4154    specify REM_FLAG nonzero to get the remainder.
4155
4156    CODE is the expression code for which kind of division this is;
4157    it controls how rounding is done.  MODE is the machine mode to use.
4158    UNSIGNEDP nonzero means do unsigned division.  */
4159
4160 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4161    and then correct it by or'ing in missing high bits
4162    if result of ANDI is nonzero.
4163    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4164    This could optimize to a bfexts instruction.
4165    But C doesn't use these operations, so their optimizations are
4166    left for later.  */
4167 /* ??? For modulo, we don't actually need the highpart of the first product,
4168    the low part will do nicely.  And for small divisors, the second multiply
4169    can also be a low-part only multiply or even be completely left out.
4170    E.g. to calculate the remainder of a division by 3 with a 32 bit
4171    multiply, multiply with 0x55555556 and extract the upper two bits;
4172    the result is exact for inputs up to 0x1fffffff.
4173    The input range can be reduced by using cross-sum rules.
4174    For odd divisors >= 3, the following table gives right shift counts
4175    so that if a number is shifted by an integer multiple of the given
4176    amount, the remainder stays the same:
4177    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4178    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4179    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4180    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4181    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4182
4183    Cross-sum rules for even numbers can be derived by leaving as many bits
4184    to the right alone as the divisor has zeros to the right.
4185    E.g. if x is an unsigned 32 bit number:
4186    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4187    */
4188
4189 rtx
4190 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4191                rtx op0, rtx op1, rtx target, int unsignedp)
4192 {
4193   machine_mode compute_mode;
4194   rtx tquotient;
4195   rtx quotient = 0, remainder = 0;
4196   rtx_insn *last;
4197   rtx_insn *insn;
4198   optab optab1, optab2;
4199   int op1_is_constant, op1_is_pow2 = 0;
4200   int max_cost, extra_cost;
4201   static HOST_WIDE_INT last_div_const = 0;
4202   bool speed = optimize_insn_for_speed_p ();
4203
4204   op1_is_constant = CONST_INT_P (op1);
4205   if (op1_is_constant)
4206     {
4207       wide_int ext_op1 = rtx_mode_t (op1, mode);
4208       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4209                      || (! unsignedp
4210                          && wi::popcount (wi::neg (ext_op1)) == 1));
4211     }
4212
4213   /*
4214      This is the structure of expand_divmod:
4215
4216      First comes code to fix up the operands so we can perform the operations
4217      correctly and efficiently.
4218
4219      Second comes a switch statement with code specific for each rounding mode.
4220      For some special operands this code emits all RTL for the desired
4221      operation, for other cases, it generates only a quotient and stores it in
4222      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4223      to indicate that it has not done anything.
4224
4225      Last comes code that finishes the operation.  If QUOTIENT is set and
4226      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4227      QUOTIENT is not set, it is computed using trunc rounding.
4228
4229      We try to generate special code for division and remainder when OP1 is a
4230      constant.  If |OP1| = 2**n we can use shifts and some other fast
4231      operations.  For other values of OP1, we compute a carefully selected
4232      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4233      by m.
4234
4235      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4236      half of the product.  Different strategies for generating the product are
4237      implemented in expmed_mult_highpart.
4238
4239      If what we actually want is the remainder, we generate that by another
4240      by-constant multiplication and a subtraction.  */
4241
4242   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4243      code below will malfunction if we are, so check here and handle
4244      the special case if so.  */
4245   if (op1 == const1_rtx)
4246     return rem_flag ? const0_rtx : op0;
4247
4248     /* When dividing by -1, we could get an overflow.
4249      negv_optab can handle overflows.  */
4250   if (! unsignedp && op1 == constm1_rtx)
4251     {
4252       if (rem_flag)
4253         return const0_rtx;
4254       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4255                           ? negv_optab : neg_optab, op0, target, 0);
4256     }
4257
4258   if (target
4259       /* Don't use the function value register as a target
4260          since we have to read it as well as write it,
4261          and function-inlining gets confused by this.  */
4262       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4263           /* Don't clobber an operand while doing a multi-step calculation.  */
4264           || ((rem_flag || op1_is_constant)
4265               && (reg_mentioned_p (target, op0)
4266                   || (MEM_P (op0) && MEM_P (target))))
4267           || reg_mentioned_p (target, op1)
4268           || (MEM_P (op1) && MEM_P (target))))
4269     target = 0;
4270
4271   /* Get the mode in which to perform this computation.  Normally it will
4272      be MODE, but sometimes we can't do the desired operation in MODE.
4273      If so, pick a wider mode in which we can do the operation.  Convert
4274      to that mode at the start to avoid repeated conversions.
4275
4276      First see what operations we need.  These depend on the expression
4277      we are evaluating.  (We assume that divxx3 insns exist under the
4278      same conditions that modxx3 insns and that these insns don't normally
4279      fail.  If these assumptions are not correct, we may generate less
4280      efficient code in some cases.)
4281
4282      Then see if we find a mode in which we can open-code that operation
4283      (either a division, modulus, or shift).  Finally, check for the smallest
4284      mode for which we can do the operation with a library call.  */
4285
4286   /* We might want to refine this now that we have division-by-constant
4287      optimization.  Since expmed_mult_highpart tries so many variants, it is
4288      not straightforward to generalize this.  Maybe we should make an array
4289      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4290
4291   optab1 = (op1_is_pow2
4292             ? (unsignedp ? lshr_optab : ashr_optab)
4293             : (unsignedp ? udiv_optab : sdiv_optab));
4294   optab2 = (op1_is_pow2 ? optab1
4295             : (unsignedp ? udivmod_optab : sdivmod_optab));
4296
4297   FOR_EACH_MODE_FROM (compute_mode, mode)
4298     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4299         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4300       break;
4301
4302   if (compute_mode == VOIDmode)
4303     FOR_EACH_MODE_FROM (compute_mode, mode)
4304       if (optab_libfunc (optab1, compute_mode)
4305           || optab_libfunc (optab2, compute_mode))
4306         break;
4307
4308   /* If we still couldn't find a mode, use MODE, but expand_binop will
4309      probably die.  */
4310   if (compute_mode == VOIDmode)
4311     compute_mode = mode;
4312
4313   if (target && GET_MODE (target) == compute_mode)
4314     tquotient = target;
4315   else
4316     tquotient = gen_reg_rtx (compute_mode);
4317
4318 #if 0
4319   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4320      (mode), and thereby get better code when OP1 is a constant.  Do that
4321      later.  It will require going over all usages of SIZE below.  */
4322   size = GET_MODE_BITSIZE (mode);
4323 #endif
4324
4325   /* Only deduct something for a REM if the last divide done was
4326      for a different constant.   Then set the constant of the last
4327      divide.  */
4328   max_cost = (unsignedp
4329               ? udiv_cost (speed, compute_mode)
4330               : sdiv_cost (speed, compute_mode));
4331   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4332                      && INTVAL (op1) == last_div_const))
4333     max_cost -= (mul_cost (speed, compute_mode)
4334                  + add_cost (speed, compute_mode));
4335
4336   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4337
4338   /* Now convert to the best mode to use.  */
4339   if (compute_mode != mode)
4340     {
4341       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4342       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4343
4344       /* convert_modes may have placed op1 into a register, so we
4345          must recompute the following.  */
4346       op1_is_constant = CONST_INT_P (op1);
4347       if (op1_is_constant)
4348         {
4349           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4350           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4351                          || (! unsignedp
4352                              && wi::popcount (wi::neg (ext_op1)) == 1));
4353         }
4354       else
4355         op1_is_pow2 = 0;
4356     }
4357
4358   /* If one of the operands is a volatile MEM, copy it into a register.  */
4359
4360   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4361     op0 = force_reg (compute_mode, op0);
4362   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4363     op1 = force_reg (compute_mode, op1);
4364
4365   /* If we need the remainder or if OP1 is constant, we need to
4366      put OP0 in a register in case it has any queued subexpressions.  */
4367   if (rem_flag || op1_is_constant)
4368     op0 = force_reg (compute_mode, op0);
4369
4370   last = get_last_insn ();
4371
4372   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4373   if (unsignedp)
4374     {
4375       if (code == FLOOR_DIV_EXPR)
4376         code = TRUNC_DIV_EXPR;
4377       if (code == FLOOR_MOD_EXPR)
4378         code = TRUNC_MOD_EXPR;
4379       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4380         code = TRUNC_DIV_EXPR;
4381     }
4382
4383   if (op1 != const0_rtx)
4384     switch (code)
4385       {
4386       case TRUNC_MOD_EXPR:
4387       case TRUNC_DIV_EXPR:
4388         if (op1_is_constant)
4389           {
4390             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4391             int size = GET_MODE_BITSIZE (int_mode);
4392             if (unsignedp)
4393               {
4394                 unsigned HOST_WIDE_INT mh, ml;
4395                 int pre_shift, post_shift;
4396                 int dummy;
4397                 wide_int wd = rtx_mode_t (op1, int_mode);
4398                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4399
4400                 if (wi::popcount (wd) == 1)
4401                   {
4402                     pre_shift = floor_log2 (d);
4403                     if (rem_flag)
4404                       {
4405                         unsigned HOST_WIDE_INT mask
4406                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4407                         remainder
4408                           = expand_binop (int_mode, and_optab, op0,
4409                                           gen_int_mode (mask, int_mode),
4410                                           remainder, 1,
4411                                           OPTAB_LIB_WIDEN);
4412                         if (remainder)
4413                           return gen_lowpart (mode, remainder);
4414                       }
4415                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4416                                              pre_shift, tquotient, 1);
4417                   }
4418                 else if (size <= HOST_BITS_PER_WIDE_INT)
4419                   {
4420                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4421                       {
4422                         /* Most significant bit of divisor is set; emit an scc
4423                            insn.  */
4424                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4425                                                           int_mode, 1, 1);
4426                       }
4427                     else
4428                       {
4429                         /* Find a suitable multiplier and right shift count
4430                            instead of multiplying with D.  */
4431
4432                         mh = choose_multiplier (d, size, size,
4433                                                 &ml, &post_shift, &dummy);
4434
4435                         /* If the suggested multiplier is more than SIZE bits,
4436                            we can do better for even divisors, using an
4437                            initial right shift.  */
4438                         if (mh != 0 && (d & 1) == 0)
4439                           {
4440                             pre_shift = ctz_or_zero (d);
4441                             mh = choose_multiplier (d >> pre_shift, size,
4442                                                     size - pre_shift,
4443                                                     &ml, &post_shift, &dummy);
4444                             gcc_assert (!mh);
4445                           }
4446                         else
4447                           pre_shift = 0;
4448
4449                         if (mh != 0)
4450                           {
4451                             rtx t1, t2, t3, t4;
4452
4453                             if (post_shift - 1 >= BITS_PER_WORD)
4454                               goto fail1;
4455
4456                             extra_cost
4457                               = (shift_cost (speed, int_mode, post_shift - 1)
4458                                  + shift_cost (speed, int_mode, 1)
4459                                  + 2 * add_cost (speed, int_mode));
4460                             t1 = expmed_mult_highpart
4461                               (int_mode, op0, gen_int_mode (ml, int_mode),
4462                                NULL_RTX, 1, max_cost - extra_cost);
4463                             if (t1 == 0)
4464                               goto fail1;
4465                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4466                                                                op0, t1),
4467                                                 NULL_RTX);
4468                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4469                                                t2, 1, NULL_RTX, 1);
4470                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4471                                                               t1, t3),
4472                                                 NULL_RTX);
4473                             quotient = expand_shift
4474                               (RSHIFT_EXPR, int_mode, t4,
4475                                post_shift - 1, tquotient, 1);
4476                           }
4477                         else
4478                           {
4479                             rtx t1, t2;
4480
4481                             if (pre_shift >= BITS_PER_WORD
4482                                 || post_shift >= BITS_PER_WORD)
4483                               goto fail1;
4484
4485                             t1 = expand_shift
4486                               (RSHIFT_EXPR, int_mode, op0,
4487                                pre_shift, NULL_RTX, 1);
4488                             extra_cost
4489                               = (shift_cost (speed, int_mode, pre_shift)
4490                                  + shift_cost (speed, int_mode, post_shift));
4491                             t2 = expmed_mult_highpart
4492                               (int_mode, t1,
4493                                gen_int_mode (ml, int_mode),
4494                                NULL_RTX, 1, max_cost - extra_cost);
4495                             if (t2 == 0)
4496                               goto fail1;
4497                             quotient = expand_shift
4498                               (RSHIFT_EXPR, int_mode, t2,
4499                                post_shift, tquotient, 1);
4500                           }
4501                       }
4502                   }
4503                 else            /* Too wide mode to use tricky code */
4504                   break;
4505
4506                 insn = get_last_insn ();
4507                 if (insn != last)
4508                   set_dst_reg_note (insn, REG_EQUAL,
4509                                     gen_rtx_UDIV (int_mode, op0, op1),
4510                                     quotient);
4511               }
4512             else                /* TRUNC_DIV, signed */
4513               {
4514                 unsigned HOST_WIDE_INT ml;
4515                 int lgup, post_shift;
4516                 rtx mlr;
4517                 HOST_WIDE_INT d = INTVAL (op1);
4518                 unsigned HOST_WIDE_INT abs_d;
4519
4520                 /* Not prepared to handle division/remainder by
4521                    0xffffffffffffffff8000000000000000 etc.  */
4522                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4523                   break;
4524
4525                 /* Since d might be INT_MIN, we have to cast to
4526                    unsigned HOST_WIDE_INT before negating to avoid
4527                    undefined signed overflow.  */
4528                 abs_d = (d >= 0
4529                          ? (unsigned HOST_WIDE_INT) d
4530                          : - (unsigned HOST_WIDE_INT) d);
4531
4532                 /* n rem d = n rem -d */
4533                 if (rem_flag && d < 0)
4534                   {
4535                     d = abs_d;
4536                     op1 = gen_int_mode (abs_d, int_mode);
4537                   }
4538
4539                 if (d == 1)
4540                   quotient = op0;
4541                 else if (d == -1)
4542                   quotient = expand_unop (int_mode, neg_optab, op0,
4543                                           tquotient, 0);
4544                 else if (size <= HOST_BITS_PER_WIDE_INT
4545                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4546                   {
4547                     /* This case is not handled correctly below.  */
4548                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4549                                                 int_mode, 1, 1);
4550                     if (quotient == 0)
4551                       goto fail1;
4552                   }
4553                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4554                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4555                          && (rem_flag
4556                              ? smod_pow2_cheap (speed, int_mode)
4557                              : sdiv_pow2_cheap (speed, int_mode))
4558                          /* We assume that cheap metric is true if the
4559                             optab has an expander for this mode.  */
4560                          && ((optab_handler ((rem_flag ? smod_optab
4561                                               : sdiv_optab),
4562                                              int_mode)
4563                               != CODE_FOR_nothing)
4564                              || (optab_handler (sdivmod_optab, int_mode)
4565                                  != CODE_FOR_nothing)))
4566                   ;
4567                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4568                   {
4569                     if (rem_flag)
4570                       {
4571                         remainder = expand_smod_pow2 (int_mode, op0, d);
4572                         if (remainder)
4573                           return gen_lowpart (mode, remainder);
4574                       }
4575
4576                     if (sdiv_pow2_cheap (speed, int_mode)
4577                         && ((optab_handler (sdiv_optab, int_mode)
4578                              != CODE_FOR_nothing)
4579                             || (optab_handler (sdivmod_optab, int_mode)
4580                                 != CODE_FOR_nothing)))
4581                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4582                                                 int_mode, op0,
4583                                                 gen_int_mode (abs_d,
4584                                                               int_mode),
4585                                                 NULL_RTX, 0);
4586                     else
4587                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4588
4589                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4590                        negate the quotient.  */
4591                     if (d < 0)
4592                       {
4593                         insn = get_last_insn ();
4594                         if (insn != last
4595                             && abs_d < (HOST_WIDE_INT_1U
4596                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4597                           set_dst_reg_note (insn, REG_EQUAL,
4598                                             gen_rtx_DIV (int_mode, op0,
4599                                                          gen_int_mode
4600                                                            (abs_d,
4601                                                             int_mode)),
4602                                             quotient);
4603
4604                         quotient = expand_unop (int_mode, neg_optab,
4605                                                 quotient, quotient, 0);
4606                       }
4607                   }
4608                 else if (size <= HOST_BITS_PER_WIDE_INT)
4609                   {
4610                     choose_multiplier (abs_d, size, size - 1,
4611                                        &ml, &post_shift, &lgup);
4612                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4613                       {
4614                         rtx t1, t2, t3;
4615
4616                         if (post_shift >= BITS_PER_WORD
4617                             || size - 1 >= BITS_PER_WORD)
4618                           goto fail1;
4619
4620                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4621                                       + shift_cost (speed, int_mode, size - 1)
4622                                       + add_cost (speed, int_mode));
4623                         t1 = expmed_mult_highpart
4624                           (int_mode, op0, gen_int_mode (ml, int_mode),
4625                            NULL_RTX, 0, max_cost - extra_cost);
4626                         if (t1 == 0)
4627                           goto fail1;
4628                         t2 = expand_shift
4629                           (RSHIFT_EXPR, int_mode, t1,
4630                            post_shift, NULL_RTX, 0);
4631                         t3 = expand_shift
4632                           (RSHIFT_EXPR, int_mode, op0,
4633                            size - 1, NULL_RTX, 0);
4634                         if (d < 0)
4635                           quotient
4636                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4637                                              tquotient);
4638                         else
4639                           quotient
4640                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4641                                              tquotient);
4642                       }
4643                     else
4644                       {
4645                         rtx t1, t2, t3, t4;
4646
4647                         if (post_shift >= BITS_PER_WORD
4648                             || size - 1 >= BITS_PER_WORD)
4649                           goto fail1;
4650
4651                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4652                         mlr = gen_int_mode (ml, int_mode);
4653                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4654                                       + shift_cost (speed, int_mode, size - 1)
4655                                       + 2 * add_cost (speed, int_mode));
4656                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4657                                                    NULL_RTX, 0,
4658                                                    max_cost - extra_cost);
4659                         if (t1 == 0)
4660                           goto fail1;
4661                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4662                                             NULL_RTX);
4663                         t3 = expand_shift
4664                           (RSHIFT_EXPR, int_mode, t2,
4665                            post_shift, NULL_RTX, 0);
4666                         t4 = expand_shift
4667                           (RSHIFT_EXPR, int_mode, op0,
4668                            size - 1, NULL_RTX, 0);
4669                         if (d < 0)
4670                           quotient
4671                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4672                                              tquotient);
4673                         else
4674                           quotient
4675                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4676                                              tquotient);
4677                       }
4678                   }
4679                 else            /* Too wide mode to use tricky code */
4680                   break;
4681
4682                 insn = get_last_insn ();
4683                 if (insn != last)
4684                   set_dst_reg_note (insn, REG_EQUAL,
4685                                     gen_rtx_DIV (int_mode, op0, op1),
4686                                     quotient);
4687               }
4688             break;
4689           }
4690       fail1:
4691         delete_insns_since (last);
4692         break;
4693
4694       case FLOOR_DIV_EXPR:
4695       case FLOOR_MOD_EXPR:
4696       /* We will come here only for signed operations.  */
4697         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4698           {
4699             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4700             int size = GET_MODE_BITSIZE (int_mode);
4701             unsigned HOST_WIDE_INT mh, ml;
4702             int pre_shift, lgup, post_shift;
4703             HOST_WIDE_INT d = INTVAL (op1);
4704
4705             if (d > 0)
4706               {
4707                 /* We could just as easily deal with negative constants here,
4708                    but it does not seem worth the trouble for GCC 2.6.  */
4709                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4710                   {
4711                     pre_shift = floor_log2 (d);
4712                     if (rem_flag)
4713                       {
4714                         unsigned HOST_WIDE_INT mask
4715                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4716                         remainder = expand_binop
4717                           (int_mode, and_optab, op0,
4718                            gen_int_mode (mask, int_mode),
4719                            remainder, 0, OPTAB_LIB_WIDEN);
4720                         if (remainder)
4721                           return gen_lowpart (mode, remainder);
4722                       }
4723                     quotient = expand_shift
4724                       (RSHIFT_EXPR, int_mode, op0,
4725                        pre_shift, tquotient, 0);
4726                   }
4727                 else
4728                   {
4729                     rtx t1, t2, t3, t4;
4730
4731                     mh = choose_multiplier (d, size, size - 1,
4732                                             &ml, &post_shift, &lgup);
4733                     gcc_assert (!mh);
4734
4735                     if (post_shift < BITS_PER_WORD
4736                         && size - 1 < BITS_PER_WORD)
4737                       {
4738                         t1 = expand_shift
4739                           (RSHIFT_EXPR, int_mode, op0,
4740                            size - 1, NULL_RTX, 0);
4741                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4742                                            NULL_RTX, 0, OPTAB_WIDEN);
4743                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4744                                       + shift_cost (speed, int_mode, size - 1)
4745                                       + 2 * add_cost (speed, int_mode));
4746                         t3 = expmed_mult_highpart
4747                           (int_mode, t2, gen_int_mode (ml, int_mode),
4748                            NULL_RTX, 1, max_cost - extra_cost);
4749                         if (t3 != 0)
4750                           {
4751                             t4 = expand_shift
4752                               (RSHIFT_EXPR, int_mode, t3,
4753                                post_shift, NULL_RTX, 1);
4754                             quotient = expand_binop (int_mode, xor_optab,
4755                                                      t4, t1, tquotient, 0,
4756                                                      OPTAB_WIDEN);
4757                           }
4758                       }
4759                   }
4760               }
4761             else
4762               {
4763                 rtx nsign, t1, t2, t3, t4;
4764                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4765                                                   op0, constm1_rtx), NULL_RTX);
4766                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4767                                    0, OPTAB_WIDEN);
4768                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4769                                       size - 1, NULL_RTX, 0);
4770                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4771                                     NULL_RTX);
4772                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4773                                     NULL_RTX, 0);
4774                 if (t4)
4775                   {
4776                     rtx t5;
4777                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4778                                       NULL_RTX, 0);
4779                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4780                                               tquotient);
4781                   }
4782               }
4783           }
4784
4785         if (quotient != 0)
4786           break;
4787         delete_insns_since (last);
4788
4789         /* Try using an instruction that produces both the quotient and
4790            remainder, using truncation.  We can easily compensate the quotient
4791            or remainder to get floor rounding, once we have the remainder.
4792            Notice that we compute also the final remainder value here,
4793            and return the result right away.  */
4794         if (target == 0 || GET_MODE (target) != compute_mode)
4795           target = gen_reg_rtx (compute_mode);
4796
4797         if (rem_flag)
4798           {
4799             remainder
4800               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4801             quotient = gen_reg_rtx (compute_mode);
4802           }
4803         else
4804           {
4805             quotient
4806               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4807             remainder = gen_reg_rtx (compute_mode);
4808           }
4809
4810         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4811                                  quotient, remainder, 0))
4812           {
4813             /* This could be computed with a branch-less sequence.
4814                Save that for later.  */
4815             rtx tem;
4816             rtx_code_label *label = gen_label_rtx ();
4817             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4818             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4819                                 NULL_RTX, 0, OPTAB_WIDEN);
4820             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4821             expand_dec (quotient, const1_rtx);
4822             expand_inc (remainder, op1);
4823             emit_label (label);
4824             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4825           }
4826
4827         /* No luck with division elimination or divmod.  Have to do it
4828            by conditionally adjusting op0 *and* the result.  */
4829         {
4830           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4831           rtx adjusted_op0;
4832           rtx tem;
4833
4834           quotient = gen_reg_rtx (compute_mode);
4835           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4836           label1 = gen_label_rtx ();
4837           label2 = gen_label_rtx ();
4838           label3 = gen_label_rtx ();
4839           label4 = gen_label_rtx ();
4840           label5 = gen_label_rtx ();
4841           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4842           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4843           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4844                               quotient, 0, OPTAB_LIB_WIDEN);
4845           if (tem != quotient)
4846             emit_move_insn (quotient, tem);
4847           emit_jump_insn (targetm.gen_jump (label5));
4848           emit_barrier ();
4849           emit_label (label1);
4850           expand_inc (adjusted_op0, const1_rtx);
4851           emit_jump_insn (targetm.gen_jump (label4));
4852           emit_barrier ();
4853           emit_label (label2);
4854           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4855           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4856                               quotient, 0, OPTAB_LIB_WIDEN);
4857           if (tem != quotient)
4858             emit_move_insn (quotient, tem);
4859           emit_jump_insn (targetm.gen_jump (label5));
4860           emit_barrier ();
4861           emit_label (label3);
4862           expand_dec (adjusted_op0, const1_rtx);
4863           emit_label (label4);
4864           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4865                               quotient, 0, OPTAB_LIB_WIDEN);
4866           if (tem != quotient)
4867             emit_move_insn (quotient, tem);
4868           expand_dec (quotient, const1_rtx);
4869           emit_label (label5);
4870         }
4871         break;
4872
4873       case CEIL_DIV_EXPR:
4874       case CEIL_MOD_EXPR:
4875         if (unsignedp)
4876           {
4877             if (op1_is_constant
4878                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4879                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4880                     || INTVAL (op1) >= 0))
4881               {
4882                 scalar_int_mode int_mode
4883                   = as_a <scalar_int_mode> (compute_mode);
4884                 rtx t1, t2, t3;
4885                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4886                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4887                                    floor_log2 (d), tquotient, 1);
4888                 t2 = expand_binop (int_mode, and_optab, op0,
4889                                    gen_int_mode (d - 1, int_mode),
4890                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4891                 t3 = gen_reg_rtx (int_mode);
4892                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4893                 if (t3 == 0)
4894                   {
4895                     rtx_code_label *lab;
4896                     lab = gen_label_rtx ();
4897                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4898                     expand_inc (t1, const1_rtx);
4899                     emit_label (lab);
4900                     quotient = t1;
4901                   }
4902                 else
4903                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4904                                             tquotient);
4905                 break;
4906               }
4907
4908             /* Try using an instruction that produces both the quotient and
4909                remainder, using truncation.  We can easily compensate the
4910                quotient or remainder to get ceiling rounding, once we have the
4911                remainder.  Notice that we compute also the final remainder
4912                value here, and return the result right away.  */
4913             if (target == 0 || GET_MODE (target) != compute_mode)
4914               target = gen_reg_rtx (compute_mode);
4915
4916             if (rem_flag)
4917               {
4918                 remainder = (REG_P (target)
4919                              ? target : gen_reg_rtx (compute_mode));
4920                 quotient = gen_reg_rtx (compute_mode);
4921               }
4922             else
4923               {
4924                 quotient = (REG_P (target)
4925                             ? target : gen_reg_rtx (compute_mode));
4926                 remainder = gen_reg_rtx (compute_mode);
4927               }
4928
4929             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4930                                      remainder, 1))
4931               {
4932                 /* This could be computed with a branch-less sequence.
4933                    Save that for later.  */
4934                 rtx_code_label *label = gen_label_rtx ();
4935                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4936                                  compute_mode, label);
4937                 expand_inc (quotient, const1_rtx);
4938                 expand_dec (remainder, op1);
4939                 emit_label (label);
4940                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4941               }
4942
4943             /* No luck with division elimination or divmod.  Have to do it
4944                by conditionally adjusting op0 *and* the result.  */
4945             {
4946               rtx_code_label *label1, *label2;
4947               rtx adjusted_op0, tem;
4948
4949               quotient = gen_reg_rtx (compute_mode);
4950               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4951               label1 = gen_label_rtx ();
4952               label2 = gen_label_rtx ();
4953               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4954                                compute_mode, label1);
4955               emit_move_insn  (quotient, const0_rtx);
4956               emit_jump_insn (targetm.gen_jump (label2));
4957               emit_barrier ();
4958               emit_label (label1);
4959               expand_dec (adjusted_op0, const1_rtx);
4960               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4961                                   quotient, 1, OPTAB_LIB_WIDEN);
4962               if (tem != quotient)
4963                 emit_move_insn (quotient, tem);
4964               expand_inc (quotient, const1_rtx);
4965               emit_label (label2);
4966             }
4967           }
4968         else /* signed */
4969           {
4970             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4971                 && INTVAL (op1) >= 0)
4972               {
4973                 /* This is extremely similar to the code for the unsigned case
4974                    above.  For 2.7 we should merge these variants, but for
4975                    2.6.1 I don't want to touch the code for unsigned since that
4976                    get used in C.  The signed case will only be used by other
4977                    languages (Ada).  */
4978
4979                 rtx t1, t2, t3;
4980                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4981                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4982                                    floor_log2 (d), tquotient, 0);
4983                 t2 = expand_binop (compute_mode, and_optab, op0,
4984                                    gen_int_mode (d - 1, compute_mode),
4985                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4986                 t3 = gen_reg_rtx (compute_mode);
4987                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4988                                       compute_mode, 1, 1);
4989                 if (t3 == 0)
4990                   {
4991                     rtx_code_label *lab;
4992                     lab = gen_label_rtx ();
4993                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4994                     expand_inc (t1, const1_rtx);
4995                     emit_label (lab);
4996                     quotient = t1;
4997                   }
4998                 else
4999                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5000                                                           t1, t3),
5001                                             tquotient);
5002                 break;
5003               }
5004
5005             /* Try using an instruction that produces both the quotient and
5006                remainder, using truncation.  We can easily compensate the
5007                quotient or remainder to get ceiling rounding, once we have the
5008                remainder.  Notice that we compute also the final remainder
5009                value here, and return the result right away.  */
5010             if (target == 0 || GET_MODE (target) != compute_mode)
5011               target = gen_reg_rtx (compute_mode);
5012             if (rem_flag)
5013               {
5014                 remainder= (REG_P (target)
5015                             ? target : gen_reg_rtx (compute_mode));
5016                 quotient = gen_reg_rtx (compute_mode);
5017               }
5018             else
5019               {
5020                 quotient = (REG_P (target)
5021                             ? target : gen_reg_rtx (compute_mode));
5022                 remainder = gen_reg_rtx (compute_mode);
5023               }
5024
5025             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5026                                      remainder, 0))
5027               {
5028                 /* This could be computed with a branch-less sequence.
5029                    Save that for later.  */
5030                 rtx tem;
5031                 rtx_code_label *label = gen_label_rtx ();
5032                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5033                                  compute_mode, label);
5034                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5035                                     NULL_RTX, 0, OPTAB_WIDEN);
5036                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5037                 expand_inc (quotient, const1_rtx);
5038                 expand_dec (remainder, op1);
5039                 emit_label (label);
5040                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5041               }
5042
5043             /* No luck with division elimination or divmod.  Have to do it
5044                by conditionally adjusting op0 *and* the result.  */
5045             {
5046               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5047               rtx adjusted_op0;
5048               rtx tem;
5049
5050               quotient = gen_reg_rtx (compute_mode);
5051               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5052               label1 = gen_label_rtx ();
5053               label2 = gen_label_rtx ();
5054               label3 = gen_label_rtx ();
5055               label4 = gen_label_rtx ();
5056               label5 = gen_label_rtx ();
5057               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5058               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5059                                compute_mode, label1);
5060               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5061                                   quotient, 0, OPTAB_LIB_WIDEN);
5062               if (tem != quotient)
5063                 emit_move_insn (quotient, tem);
5064               emit_jump_insn (targetm.gen_jump (label5));
5065               emit_barrier ();
5066               emit_label (label1);
5067               expand_dec (adjusted_op0, const1_rtx);
5068               emit_jump_insn (targetm.gen_jump (label4));
5069               emit_barrier ();
5070               emit_label (label2);
5071               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5072                                compute_mode, label3);
5073               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5074                                   quotient, 0, OPTAB_LIB_WIDEN);
5075               if (tem != quotient)
5076                 emit_move_insn (quotient, tem);
5077               emit_jump_insn (targetm.gen_jump (label5));
5078               emit_barrier ();
5079               emit_label (label3);
5080               expand_inc (adjusted_op0, const1_rtx);
5081               emit_label (label4);
5082               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5083                                   quotient, 0, OPTAB_LIB_WIDEN);
5084               if (tem != quotient)
5085                 emit_move_insn (quotient, tem);
5086               expand_inc (quotient, const1_rtx);
5087               emit_label (label5);
5088             }
5089           }
5090         break;
5091
5092       case EXACT_DIV_EXPR:
5093         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5094           {
5095             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5096             int size = GET_MODE_BITSIZE (int_mode);
5097             HOST_WIDE_INT d = INTVAL (op1);
5098             unsigned HOST_WIDE_INT ml;
5099             int pre_shift;
5100             rtx t1;
5101
5102             pre_shift = ctz_or_zero (d);
5103             ml = invert_mod2n (d >> pre_shift, size);
5104             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5105                                pre_shift, NULL_RTX, unsignedp);
5106             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5107                                     NULL_RTX, 1);
5108
5109             insn = get_last_insn ();
5110             set_dst_reg_note (insn, REG_EQUAL,
5111                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5112                                               int_mode, op0, op1),
5113                               quotient);
5114           }
5115         break;
5116
5117       case ROUND_DIV_EXPR:
5118       case ROUND_MOD_EXPR:
5119         if (unsignedp)
5120           {
5121             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5122             rtx tem;
5123             rtx_code_label *label;
5124             label = gen_label_rtx ();
5125             quotient = gen_reg_rtx (int_mode);
5126             remainder = gen_reg_rtx (int_mode);
5127             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5128               {
5129                 rtx tem;
5130                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5131                                          quotient, 1, OPTAB_LIB_WIDEN);
5132                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5133                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5134                                           remainder, 1, OPTAB_LIB_WIDEN);
5135               }
5136             tem = plus_constant (int_mode, op1, -1);
5137             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5138             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5139             expand_inc (quotient, const1_rtx);
5140             expand_dec (remainder, op1);
5141             emit_label (label);
5142           }
5143         else
5144           {
5145             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5146             int size = GET_MODE_BITSIZE (int_mode);
5147             rtx abs_rem, abs_op1, tem, mask;
5148             rtx_code_label *label;
5149             label = gen_label_rtx ();
5150             quotient = gen_reg_rtx (int_mode);
5151             remainder = gen_reg_rtx (int_mode);
5152             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5153               {
5154                 rtx tem;
5155                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5156                                          quotient, 0, OPTAB_LIB_WIDEN);
5157                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5158                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5159                                           remainder, 0, OPTAB_LIB_WIDEN);
5160               }
5161             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5162             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5163             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5164                                 1, NULL_RTX, 1);
5165             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5166             tem = expand_binop (int_mode, xor_optab, op0, op1,
5167                                 NULL_RTX, 0, OPTAB_WIDEN);
5168             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5169                                  size - 1, NULL_RTX, 0);
5170             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5171                                 NULL_RTX, 0, OPTAB_WIDEN);
5172             tem = expand_binop (int_mode, sub_optab, tem, mask,
5173                                 NULL_RTX, 0, OPTAB_WIDEN);
5174             expand_inc (quotient, tem);
5175             tem = expand_binop (int_mode, xor_optab, mask, op1,
5176                                 NULL_RTX, 0, OPTAB_WIDEN);
5177             tem = expand_binop (int_mode, sub_optab, tem, mask,
5178                                 NULL_RTX, 0, OPTAB_WIDEN);
5179             expand_dec (remainder, tem);
5180             emit_label (label);
5181           }
5182         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5183
5184       default:
5185         gcc_unreachable ();
5186       }
5187
5188   if (quotient == 0)
5189     {
5190       if (target && GET_MODE (target) != compute_mode)
5191         target = 0;
5192
5193       if (rem_flag)
5194         {
5195           /* Try to produce the remainder without producing the quotient.
5196              If we seem to have a divmod pattern that does not require widening,
5197              don't try widening here.  We should really have a WIDEN argument
5198              to expand_twoval_binop, since what we'd really like to do here is
5199              1) try a mod insn in compute_mode
5200              2) try a divmod insn in compute_mode
5201              3) try a div insn in compute_mode and multiply-subtract to get
5202                 remainder
5203              4) try the same things with widening allowed.  */
5204           remainder
5205             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5206                                  op0, op1, target,
5207                                  unsignedp,
5208                                  ((optab_handler (optab2, compute_mode)
5209                                    != CODE_FOR_nothing)
5210                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5211           if (remainder == 0)
5212             {
5213               /* No luck there.  Can we do remainder and divide at once
5214                  without a library call?  */
5215               remainder = gen_reg_rtx (compute_mode);
5216               if (! expand_twoval_binop ((unsignedp
5217                                           ? udivmod_optab
5218                                           : sdivmod_optab),
5219                                          op0, op1,
5220                                          NULL_RTX, remainder, unsignedp))
5221                 remainder = 0;
5222             }
5223
5224           if (remainder)
5225             return gen_lowpart (mode, remainder);
5226         }
5227
5228       /* Produce the quotient.  Try a quotient insn, but not a library call.
5229          If we have a divmod in this mode, use it in preference to widening
5230          the div (for this test we assume it will not fail). Note that optab2
5231          is set to the one of the two optabs that the call below will use.  */
5232       quotient
5233         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5234                              op0, op1, rem_flag ? NULL_RTX : target,
5235                              unsignedp,
5236                              ((optab_handler (optab2, compute_mode)
5237                                != CODE_FOR_nothing)
5238                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5239
5240       if (quotient == 0)
5241         {
5242           /* No luck there.  Try a quotient-and-remainder insn,
5243              keeping the quotient alone.  */
5244           quotient = gen_reg_rtx (compute_mode);
5245           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5246                                      op0, op1,
5247                                      quotient, NULL_RTX, unsignedp))
5248             {
5249               quotient = 0;
5250               if (! rem_flag)
5251                 /* Still no luck.  If we are not computing the remainder,
5252                    use a library call for the quotient.  */
5253                 quotient = sign_expand_binop (compute_mode,
5254                                               udiv_optab, sdiv_optab,
5255                                               op0, op1, target,
5256                                               unsignedp, OPTAB_LIB_WIDEN);
5257             }
5258         }
5259     }
5260
5261   if (rem_flag)
5262     {
5263       if (target && GET_MODE (target) != compute_mode)
5264         target = 0;
5265
5266       if (quotient == 0)
5267         {
5268           /* No divide instruction either.  Use library for remainder.  */
5269           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5270                                          op0, op1, target,
5271                                          unsignedp, OPTAB_LIB_WIDEN);
5272           /* No remainder function.  Try a quotient-and-remainder
5273              function, keeping the remainder.  */
5274           if (!remainder)
5275             {
5276               remainder = gen_reg_rtx (compute_mode);
5277               if (!expand_twoval_binop_libfunc
5278                   (unsignedp ? udivmod_optab : sdivmod_optab,
5279                    op0, op1,
5280                    NULL_RTX, remainder,
5281                    unsignedp ? UMOD : MOD))
5282                 remainder = NULL_RTX;
5283             }
5284         }
5285       else
5286         {
5287           /* We divided.  Now finish doing X - Y * (X / Y).  */
5288           remainder = expand_mult (compute_mode, quotient, op1,
5289                                    NULL_RTX, unsignedp);
5290           remainder = expand_binop (compute_mode, sub_optab, op0,
5291                                     remainder, target, unsignedp,
5292                                     OPTAB_LIB_WIDEN);
5293         }
5294     }
5295
5296   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5297 }
5298 \f
5299 /* Return a tree node with data type TYPE, describing the value of X.
5300    Usually this is an VAR_DECL, if there is no obvious better choice.
5301    X may be an expression, however we only support those expressions
5302    generated by loop.c.  */
5303
5304 tree
5305 make_tree (tree type, rtx x)
5306 {
5307   tree t;
5308
5309   switch (GET_CODE (x))
5310     {
5311     case CONST_INT:
5312     case CONST_WIDE_INT:
5313       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5314       return t;
5315
5316     case CONST_DOUBLE:
5317       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5318       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5319         t = wide_int_to_tree (type,
5320                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5321                                                     HOST_BITS_PER_WIDE_INT * 2));
5322       else
5323         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5324
5325       return t;
5326
5327     case CONST_VECTOR:
5328       {
5329         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5330         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5331         tree itype = TREE_TYPE (type);
5332
5333         /* Build a tree with vector elements.  */
5334         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5335         unsigned int count = elts.encoded_nelts ();
5336         for (unsigned int i = 0; i < count; ++i)
5337           {
5338             rtx elt = CONST_VECTOR_ELT (x, i);
5339             elts.quick_push (make_tree (itype, elt));
5340           }
5341
5342         return elts.build ();
5343       }
5344
5345     case PLUS:
5346       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5347                           make_tree (type, XEXP (x, 1)));
5348
5349     case MINUS:
5350       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5351                           make_tree (type, XEXP (x, 1)));
5352
5353     case NEG:
5354       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5355
5356     case MULT:
5357       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5358                           make_tree (type, XEXP (x, 1)));
5359
5360     case ASHIFT:
5361       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5362                           make_tree (type, XEXP (x, 1)));
5363
5364     case LSHIFTRT:
5365       t = unsigned_type_for (type);
5366       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5367                                          make_tree (t, XEXP (x, 0)),
5368                                          make_tree (type, XEXP (x, 1))));
5369
5370     case ASHIFTRT:
5371       t = signed_type_for (type);
5372       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5373                                          make_tree (t, XEXP (x, 0)),
5374                                          make_tree (type, XEXP (x, 1))));
5375
5376     case DIV:
5377       if (TREE_CODE (type) != REAL_TYPE)
5378         t = signed_type_for (type);
5379       else
5380         t = type;
5381
5382       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5383                                          make_tree (t, XEXP (x, 0)),
5384                                          make_tree (t, XEXP (x, 1))));
5385     case UDIV:
5386       t = unsigned_type_for (type);
5387       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5388                                          make_tree (t, XEXP (x, 0)),
5389                                          make_tree (t, XEXP (x, 1))));
5390
5391     case SIGN_EXTEND:
5392     case ZERO_EXTEND:
5393       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5394                                           GET_CODE (x) == ZERO_EXTEND);
5395       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5396
5397     case CONST:
5398       return make_tree (type, XEXP (x, 0));
5399
5400     case SYMBOL_REF:
5401       t = SYMBOL_REF_DECL (x);
5402       if (t)
5403         return fold_convert (type, build_fold_addr_expr (t));
5404       /* fall through.  */
5405
5406     default:
5407       if (CONST_POLY_INT_P (x))
5408         return wide_int_to_tree (t, const_poly_int_value (x));
5409
5410       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5411
5412       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5413          address mode to pointer mode.  */
5414       if (POINTER_TYPE_P (type))
5415         x = convert_memory_address_addr_space
5416           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5417
5418       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5419          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5420       t->decl_with_rtl.rtl = x;
5421
5422       return t;
5423     }
5424 }
5425 \f
5426 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5427    and returning TARGET.
5428
5429    If TARGET is 0, a pseudo-register or constant is returned.  */
5430
5431 rtx
5432 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5433 {
5434   rtx tem = 0;
5435
5436   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5437     tem = simplify_binary_operation (AND, mode, op0, op1);
5438   if (tem == 0)
5439     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5440
5441   if (target == 0)
5442     target = tem;
5443   else if (tem != target)
5444     emit_move_insn (target, tem);
5445   return target;
5446 }
5447
5448 /* Helper function for emit_store_flag.  */
5449 rtx
5450 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5451              machine_mode mode, machine_mode compare_mode,
5452              int unsignedp, rtx x, rtx y, int normalizep,
5453              machine_mode target_mode)
5454 {
5455   class expand_operand ops[4];
5456   rtx op0, comparison, subtarget;
5457   rtx_insn *last;
5458   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5459   scalar_int_mode int_target_mode;
5460
5461   last = get_last_insn ();
5462   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5463   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5464   if (!x || !y)
5465     {
5466       delete_insns_since (last);
5467       return NULL_RTX;
5468     }
5469
5470   if (target_mode == VOIDmode)
5471     int_target_mode = result_mode;
5472   else
5473     int_target_mode = as_a <scalar_int_mode> (target_mode);
5474   if (!target)
5475     target = gen_reg_rtx (int_target_mode);
5476
5477   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5478
5479   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5480   create_fixed_operand (&ops[1], comparison);
5481   create_fixed_operand (&ops[2], x);
5482   create_fixed_operand (&ops[3], y);
5483   if (!maybe_expand_insn (icode, 4, ops))
5484     {
5485       delete_insns_since (last);
5486       return NULL_RTX;
5487     }
5488   subtarget = ops[0].value;
5489
5490   /* If we are converting to a wider mode, first convert to
5491      INT_TARGET_MODE, then normalize.  This produces better combining
5492      opportunities on machines that have a SIGN_EXTRACT when we are
5493      testing a single bit.  This mostly benefits the 68k.
5494
5495      If STORE_FLAG_VALUE does not have the sign bit set when
5496      interpreted in MODE, we can do this conversion as unsigned, which
5497      is usually more efficient.  */
5498   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5499     {
5500       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5501                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5502
5503       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5504       convert_move (target, subtarget, unsignedp);
5505
5506       op0 = target;
5507       result_mode = int_target_mode;
5508     }
5509   else
5510     op0 = subtarget;
5511
5512   /* If we want to keep subexpressions around, don't reuse our last
5513      target.  */
5514   if (optimize)
5515     subtarget = 0;
5516
5517   /* Now normalize to the proper value in MODE.  Sometimes we don't
5518      have to do anything.  */
5519   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5520     ;
5521   /* STORE_FLAG_VALUE might be the most negative number, so write
5522      the comparison this way to avoid a compiler-time warning.  */
5523   else if (- normalizep == STORE_FLAG_VALUE)
5524     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5525
5526   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5527      it hard to use a value of just the sign bit due to ANSI integer
5528      constant typing rules.  */
5529   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5530     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5531                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5532                         normalizep == 1);
5533   else
5534     {
5535       gcc_assert (STORE_FLAG_VALUE & 1);
5536
5537       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5538       if (normalizep == -1)
5539         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5540     }
5541
5542   /* If we were converting to a smaller mode, do the conversion now.  */
5543   if (int_target_mode != result_mode)
5544     {
5545       convert_move (target, op0, 0);
5546       return target;
5547     }
5548   else
5549     return op0;
5550 }
5551
5552
5553 /* A subroutine of emit_store_flag only including "tricks" that do not
5554    need a recursive call.  These are kept separate to avoid infinite
5555    loops.  */
5556
5557 static rtx
5558 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5559                    machine_mode mode, int unsignedp, int normalizep,
5560                    machine_mode target_mode)
5561 {
5562   rtx subtarget;
5563   enum insn_code icode;
5564   machine_mode compare_mode;
5565   enum mode_class mclass;
5566   enum rtx_code scode;
5567
5568   if (unsignedp)
5569     code = unsigned_condition (code);
5570   scode = swap_condition (code);
5571
5572   /* If one operand is constant, make it the second one.  Only do this
5573      if the other operand is not constant as well.  */
5574
5575   if (swap_commutative_operands_p (op0, op1))
5576     {
5577       std::swap (op0, op1);
5578       code = swap_condition (code);
5579     }
5580
5581   if (mode == VOIDmode)
5582     mode = GET_MODE (op0);
5583
5584   if (CONST_SCALAR_INT_P (op1))
5585     canonicalize_comparison (mode, &code, &op1);
5586
5587   /* For some comparisons with 1 and -1, we can convert this to
5588      comparisons with zero.  This will often produce more opportunities for
5589      store-flag insns.  */
5590
5591   switch (code)
5592     {
5593     case LT:
5594       if (op1 == const1_rtx)
5595         op1 = const0_rtx, code = LE;
5596       break;
5597     case LE:
5598       if (op1 == constm1_rtx)
5599         op1 = const0_rtx, code = LT;
5600       break;
5601     case GE:
5602       if (op1 == const1_rtx)
5603         op1 = const0_rtx, code = GT;
5604       break;
5605     case GT:
5606       if (op1 == constm1_rtx)
5607         op1 = const0_rtx, code = GE;
5608       break;
5609     case GEU:
5610       if (op1 == const1_rtx)
5611         op1 = const0_rtx, code = NE;
5612       break;
5613     case LTU:
5614       if (op1 == const1_rtx)
5615         op1 = const0_rtx, code = EQ;
5616       break;
5617     default:
5618       break;
5619     }
5620
5621   /* If we are comparing a double-word integer with zero or -1, we can
5622      convert the comparison into one involving a single word.  */
5623   scalar_int_mode int_mode;
5624   if (is_int_mode (mode, &int_mode)
5625       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5626       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5627     {
5628       rtx tem;
5629       if ((code == EQ || code == NE)
5630           && (op1 == const0_rtx || op1 == constm1_rtx))
5631         {
5632           rtx op00, op01;
5633
5634           /* Do a logical OR or AND of the two words and compare the
5635              result.  */
5636           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5637           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5638           tem = expand_binop (word_mode,
5639                               op1 == const0_rtx ? ior_optab : and_optab,
5640                               op00, op01, NULL_RTX, unsignedp,
5641                               OPTAB_DIRECT);
5642
5643           if (tem != 0)
5644             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5645                                    unsignedp, normalizep);
5646         }
5647       else if ((code == LT || code == GE) && op1 == const0_rtx)
5648         {
5649           rtx op0h;
5650
5651           /* If testing the sign bit, can just test on high word.  */
5652           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5653                                       subreg_highpart_offset (word_mode,
5654                                                               int_mode));
5655           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5656                                  unsignedp, normalizep);
5657         }
5658       else
5659         tem = NULL_RTX;
5660
5661       if (tem)
5662         {
5663           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5664             return tem;
5665           if (!target)
5666             target = gen_reg_rtx (target_mode);
5667
5668           convert_move (target, tem,
5669                         !val_signbit_known_set_p (word_mode,
5670                                                   (normalizep ? normalizep
5671                                                    : STORE_FLAG_VALUE)));
5672           return target;
5673         }
5674     }
5675
5676   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5677      complement of A (for GE) and shifting the sign bit to the low bit.  */
5678   if (op1 == const0_rtx && (code == LT || code == GE)
5679       && is_int_mode (mode, &int_mode)
5680       && (normalizep || STORE_FLAG_VALUE == 1
5681           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5682     {
5683       scalar_int_mode int_target_mode;
5684       subtarget = target;
5685
5686       if (!target)
5687         int_target_mode = int_mode;
5688       else
5689         {
5690           /* If the result is to be wider than OP0, it is best to convert it
5691              first.  If it is to be narrower, it is *incorrect* to convert it
5692              first.  */
5693           int_target_mode = as_a <scalar_int_mode> (target_mode);
5694           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5695             {
5696               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5697               int_mode = int_target_mode;
5698             }
5699         }
5700
5701       if (int_target_mode != int_mode)
5702         subtarget = 0;
5703
5704       if (code == GE)
5705         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5706                            ((STORE_FLAG_VALUE == 1 || normalizep)
5707                             ? 0 : subtarget), 0);
5708
5709       if (STORE_FLAG_VALUE == 1 || normalizep)
5710         /* If we are supposed to produce a 0/1 value, we want to do
5711            a logical shift from the sign bit to the low-order bit; for
5712            a -1/0 value, we do an arithmetic shift.  */
5713         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5714                             GET_MODE_BITSIZE (int_mode) - 1,
5715                             subtarget, normalizep != -1);
5716
5717       if (int_mode != int_target_mode)
5718         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5719
5720       return op0;
5721     }
5722
5723   mclass = GET_MODE_CLASS (mode);
5724   FOR_EACH_MODE_FROM (compare_mode, mode)
5725     {
5726      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5727      icode = optab_handler (cstore_optab, optab_mode);
5728      if (icode != CODE_FOR_nothing)
5729         {
5730           do_pending_stack_adjust ();
5731           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5732                                  unsignedp, op0, op1, normalizep, target_mode);
5733           if (tem)
5734             return tem;
5735
5736           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5737             {
5738               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5739                                  unsignedp, op1, op0, normalizep, target_mode);
5740               if (tem)
5741                 return tem;
5742             }
5743           break;
5744         }
5745     }
5746
5747   return 0;
5748 }
5749
5750 /* Subroutine of emit_store_flag that handles cases in which the operands
5751    are scalar integers.  SUBTARGET is the target to use for temporary
5752    operations and TRUEVAL is the value to store when the condition is
5753    true.  All other arguments are as for emit_store_flag.  */
5754
5755 rtx
5756 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5757                      rtx op1, scalar_int_mode mode, int unsignedp,
5758                      int normalizep, rtx trueval)
5759 {
5760   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5761   rtx_insn *last = get_last_insn ();
5762
5763   /* If this is an equality comparison of integers, we can try to exclusive-or
5764      (or subtract) the two operands and use a recursive call to try the
5765      comparison with zero.  Don't do any of these cases if branches are
5766      very cheap.  */
5767
5768   if ((code == EQ || code == NE) && op1 != const0_rtx)
5769     {
5770       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5771                               OPTAB_WIDEN);
5772
5773       if (tem == 0)
5774         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5775                             OPTAB_WIDEN);
5776       if (tem != 0)
5777         tem = emit_store_flag (target, code, tem, const0_rtx,
5778                                mode, unsignedp, normalizep);
5779       if (tem != 0)
5780         return tem;
5781
5782       delete_insns_since (last);
5783     }
5784
5785   /* For integer comparisons, try the reverse comparison.  However, for
5786      small X and if we'd have anyway to extend, implementing "X != 0"
5787      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5788   rtx_code rcode = reverse_condition (code);
5789   if (can_compare_p (rcode, mode, ccp_store_flag)
5790       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5791             && code == NE
5792             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5793             && op1 == const0_rtx))
5794     {
5795       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5796                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5797
5798       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5799       if (want_add
5800           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5801                        optimize_insn_for_speed_p ()) == 0)
5802         {
5803           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5804                                        STORE_FLAG_VALUE, target_mode);
5805           if (tem != 0)
5806             tem = expand_binop (target_mode, add_optab, tem,
5807                                 gen_int_mode (normalizep, target_mode),
5808                                 target, 0, OPTAB_WIDEN);
5809           if (tem != 0)
5810             return tem;
5811         }
5812       else if (!want_add
5813                && rtx_cost (trueval, mode, XOR, 1,
5814                             optimize_insn_for_speed_p ()) == 0)
5815         {
5816           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5817                                        normalizep, target_mode);
5818           if (tem != 0)
5819             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5820                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5821           if (tem != 0)
5822             return tem;
5823         }
5824
5825       delete_insns_since (last);
5826     }
5827
5828   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5829      the constant zero.  Reject all other comparisons at this point.  Only
5830      do LE and GT if branches are expensive since they are expensive on
5831      2-operand machines.  */
5832
5833   if (op1 != const0_rtx
5834       || (code != EQ && code != NE
5835           && (BRANCH_COST (optimize_insn_for_speed_p (),
5836                            false) <= 1 || (code != LE && code != GT))))
5837     return 0;
5838
5839   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5840      do the necessary operation below.  */
5841
5842   rtx tem = 0;
5843
5844   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5845      the sign bit set.  */
5846
5847   if (code == LE)
5848     {
5849       /* This is destructive, so SUBTARGET can't be OP0.  */
5850       if (rtx_equal_p (subtarget, op0))
5851         subtarget = 0;
5852
5853       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5854                           OPTAB_WIDEN);
5855       if (tem)
5856         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5857                             OPTAB_WIDEN);
5858     }
5859
5860   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5861      number of bits in the mode of OP0, minus one.  */
5862
5863   if (code == GT)
5864     {
5865       if (rtx_equal_p (subtarget, op0))
5866         subtarget = 0;
5867
5868       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5869                                 GET_MODE_BITSIZE (mode) - 1,
5870                                 subtarget, 0);
5871       if (tem)
5872         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5873                             OPTAB_WIDEN);
5874     }
5875
5876   if (code == EQ || code == NE)
5877     {
5878       /* For EQ or NE, one way to do the comparison is to apply an operation
5879          that converts the operand into a positive number if it is nonzero
5880          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5881          for NE we negate.  This puts the result in the sign bit.  Then we
5882          normalize with a shift, if needed.
5883
5884          Two operations that can do the above actions are ABS and FFS, so try
5885          them.  If that doesn't work, and MODE is smaller than a full word,
5886          we can use zero-extension to the wider mode (an unsigned conversion)
5887          as the operation.  */
5888
5889       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5890          that is compensated by the subsequent overflow when subtracting
5891          one / negating.  */
5892
5893       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5894         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5895       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5896         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5897       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5898         {
5899           tem = convert_modes (word_mode, mode, op0, 1);
5900           mode = word_mode;
5901         }
5902
5903       if (tem != 0)
5904         {
5905           if (code == EQ)
5906             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5907                                 0, OPTAB_WIDEN);
5908           else
5909             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5910         }
5911
5912       /* If we couldn't do it that way, for NE we can "or" the two's complement
5913          of the value with itself.  For EQ, we take the one's complement of
5914          that "or", which is an extra insn, so we only handle EQ if branches
5915          are expensive.  */
5916
5917       if (tem == 0
5918           && (code == NE
5919               || BRANCH_COST (optimize_insn_for_speed_p (),
5920                               false) > 1))
5921         {
5922           if (rtx_equal_p (subtarget, op0))
5923             subtarget = 0;
5924
5925           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5926           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5927                               OPTAB_WIDEN);
5928
5929           if (tem && code == EQ)
5930             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5931         }
5932     }
5933
5934   if (tem && normalizep)
5935     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5936                               GET_MODE_BITSIZE (mode) - 1,
5937                               subtarget, normalizep == 1);
5938
5939   if (tem)
5940     {
5941       if (!target)
5942         ;
5943       else if (GET_MODE (tem) != target_mode)
5944         {
5945           convert_move (target, tem, 0);
5946           tem = target;
5947         }
5948       else if (!subtarget)
5949         {
5950           emit_move_insn (target, tem);
5951           tem = target;
5952         }
5953     }
5954   else
5955     delete_insns_since (last);
5956
5957   return tem;
5958 }
5959
5960 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5961    and storing in TARGET.  Normally return TARGET.
5962    Return 0 if that cannot be done.
5963
5964    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5965    it is VOIDmode, they cannot both be CONST_INT.
5966
5967    UNSIGNEDP is for the case where we have to widen the operands
5968    to perform the operation.  It says to use zero-extension.
5969
5970    NORMALIZEP is 1 if we should convert the result to be either zero
5971    or one.  Normalize is -1 if we should convert the result to be
5972    either zero or -1.  If NORMALIZEP is zero, the result will be left
5973    "raw" out of the scc insn.  */
5974
5975 rtx
5976 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5977                  machine_mode mode, int unsignedp, int normalizep)
5978 {
5979   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5980   enum rtx_code rcode;
5981   rtx subtarget;
5982   rtx tem, trueval;
5983   rtx_insn *last;
5984
5985   /* If we compare constants, we shouldn't use a store-flag operation,
5986      but a constant load.  We can get there via the vanilla route that
5987      usually generates a compare-branch sequence, but will in this case
5988      fold the comparison to a constant, and thus elide the branch.  */
5989   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5990     return NULL_RTX;
5991
5992   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5993                            target_mode);
5994   if (tem)
5995     return tem;
5996
5997   /* If we reached here, we can't do this with a scc insn, however there
5998      are some comparisons that can be done in other ways.  Don't do any
5999      of these cases if branches are very cheap.  */
6000   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6001     return 0;
6002
6003   /* See what we need to return.  We can only return a 1, -1, or the
6004      sign bit.  */
6005
6006   if (normalizep == 0)
6007     {
6008       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6009         normalizep = STORE_FLAG_VALUE;
6010
6011       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6012         ;
6013       else
6014         return 0;
6015     }
6016
6017   last = get_last_insn ();
6018
6019   /* If optimizing, use different pseudo registers for each insn, instead
6020      of reusing the same pseudo.  This leads to better CSE, but slows
6021      down the compiler, since there are more pseudos.  */
6022   subtarget = (!optimize
6023                && (target_mode == mode)) ? target : NULL_RTX;
6024   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6025
6026   /* For floating-point comparisons, try the reverse comparison or try
6027      changing the "orderedness" of the comparison.  */
6028   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6029     {
6030       enum rtx_code first_code;
6031       bool and_them;
6032
6033       rcode = reverse_condition_maybe_unordered (code);
6034       if (can_compare_p (rcode, mode, ccp_store_flag)
6035           && (code == ORDERED || code == UNORDERED
6036               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6037               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6038         {
6039           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6040                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6041
6042           /* For the reverse comparison, use either an addition or a XOR.  */
6043           if (want_add
6044               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6045                            optimize_insn_for_speed_p ()) == 0)
6046             {
6047               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6048                                        STORE_FLAG_VALUE, target_mode);
6049               if (tem)
6050                 return expand_binop (target_mode, add_optab, tem,
6051                                      gen_int_mode (normalizep, target_mode),
6052                                      target, 0, OPTAB_WIDEN);
6053             }
6054           else if (!want_add
6055                    && rtx_cost (trueval, mode, XOR, 1,
6056                                 optimize_insn_for_speed_p ()) == 0)
6057             {
6058               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6059                                        normalizep, target_mode);
6060               if (tem)
6061                 return expand_binop (target_mode, xor_optab, tem, trueval,
6062                                      target, INTVAL (trueval) >= 0,
6063                                      OPTAB_WIDEN);
6064             }
6065         }
6066
6067       delete_insns_since (last);
6068
6069       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6070       if (code == ORDERED || code == UNORDERED)
6071         return 0;
6072
6073       and_them = split_comparison (code, mode, &first_code, &code);
6074
6075       /* If there are no NaNs, the first comparison should always fall through.
6076          Effectively change the comparison to the other one.  */
6077       if (!HONOR_NANS (mode))
6078         {
6079           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6080           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6081                                     target_mode);
6082         }
6083
6084       if (!HAVE_conditional_move)
6085         return 0;
6086
6087       /* Do not turn a trapping comparison into a non-trapping one.  */
6088       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6089           && flag_trapping_math)
6090         return 0;
6091
6092       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6093          conditional move.  */
6094       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6095                                normalizep, target_mode);
6096       if (tem == 0)
6097         return 0;
6098
6099       if (and_them)
6100         tem = emit_conditional_move (target, code, op0, op1, mode,
6101                                      tem, const0_rtx, GET_MODE (tem), 0);
6102       else
6103         tem = emit_conditional_move (target, code, op0, op1, mode,
6104                                      trueval, tem, GET_MODE (tem), 0);
6105
6106       if (tem == 0)
6107         delete_insns_since (last);
6108       return tem;
6109     }
6110
6111   /* The remaining tricks only apply to integer comparisons.  */
6112
6113   scalar_int_mode int_mode;
6114   if (is_int_mode (mode, &int_mode))
6115     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6116                                 unsignedp, normalizep, trueval);
6117
6118   return 0;
6119 }
6120
6121 /* Like emit_store_flag, but always succeeds.  */
6122
6123 rtx
6124 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6125                        machine_mode mode, int unsignedp, int normalizep)
6126 {
6127   rtx tem;
6128   rtx_code_label *label;
6129   rtx trueval, falseval;
6130
6131   /* First see if emit_store_flag can do the job.  */
6132   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6133   if (tem != 0)
6134     return tem;
6135
6136   /* If one operand is constant, make it the second one.  Only do this
6137      if the other operand is not constant as well.  */
6138   if (swap_commutative_operands_p (op0, op1))
6139     {
6140       std::swap (op0, op1);
6141       code = swap_condition (code);
6142     }
6143
6144   if (mode == VOIDmode)
6145     mode = GET_MODE (op0);
6146
6147   if (!target)
6148     target = gen_reg_rtx (word_mode);
6149
6150   /* If this failed, we have to do this with set/compare/jump/set code.
6151      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6152   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6153   if (code == NE
6154       && GET_MODE_CLASS (mode) == MODE_INT
6155       && REG_P (target)
6156       && op0 == target
6157       && op1 == const0_rtx)
6158     {
6159       label = gen_label_rtx ();
6160       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6161                                NULL_RTX, NULL, label,
6162                                profile_probability::uninitialized ());
6163       emit_move_insn (target, trueval);
6164       emit_label (label);
6165       return target;
6166     }
6167
6168   if (!REG_P (target)
6169       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6170     target = gen_reg_rtx (GET_MODE (target));
6171
6172   /* Jump in the right direction if the target cannot implement CODE
6173      but can jump on its reverse condition.  */
6174   falseval = const0_rtx;
6175   if (! can_compare_p (code, mode, ccp_jump)
6176       && (! FLOAT_MODE_P (mode)
6177           || code == ORDERED || code == UNORDERED
6178           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6179           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6180     {
6181       enum rtx_code rcode;
6182       if (FLOAT_MODE_P (mode))
6183         rcode = reverse_condition_maybe_unordered (code);
6184       else
6185         rcode = reverse_condition (code);
6186
6187       /* Canonicalize to UNORDERED for the libcall.  */
6188       if (can_compare_p (rcode, mode, ccp_jump)
6189           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6190         {
6191           falseval = trueval;
6192           trueval = const0_rtx;
6193           code = rcode;
6194         }
6195     }
6196
6197   emit_move_insn (target, trueval);
6198   label = gen_label_rtx ();
6199   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6200                            label, profile_probability::uninitialized ());
6201
6202   emit_move_insn (target, falseval);
6203   emit_label (label);
6204
6205   return target;
6206 }
6207
6208 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6209    and exclusive ranges in order to create an equivalent comparison.  See
6210    canonicalize_cmp_for_target for the possible cases.  */
6211
6212 static enum rtx_code
6213 equivalent_cmp_code (enum rtx_code code)
6214 {
6215   switch (code)
6216     {
6217     case GT:
6218       return GE;
6219     case GE:
6220       return GT;
6221     case LT:
6222       return LE;
6223     case LE:
6224       return LT;
6225     case GTU:
6226       return GEU;
6227     case GEU:
6228       return GTU;
6229     case LTU:
6230       return LEU;
6231     case LEU:
6232       return LTU;
6233
6234     default:
6235       return code;
6236     }
6237 }
6238
6239 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6240    purpose of this is to end up with an immediate which can be loaded into a
6241    register in fewer moves, if possible.
6242
6243    For each integer comparison there exists an equivalent choice:
6244      i)   a >  b or a >= b + 1
6245      ii)  a <= b or a <  b + 1
6246      iii) a >= b or a >  b - 1
6247      iv)  a <  b or a <= b - 1
6248
6249    MODE is the mode of the first operand.
6250    CODE points to the comparison code.
6251    IMM points to the rtx containing the immediate.  *IMM must satisfy
6252    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6253    on exit.  */
6254
6255 void
6256 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6257 {
6258   if (!SCALAR_INT_MODE_P (mode))
6259     return;
6260
6261   int to_add = 0;
6262   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6263
6264   /* Extract the immediate value from the rtx.  */
6265   wide_int imm_val = rtx_mode_t (*imm, mode);
6266
6267   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6268     to_add = 1;
6269   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6270     to_add = -1;
6271   else
6272     return;
6273
6274   /* Check for overflow/underflow in the case of signed values and
6275      wrapping around in the case of unsigned values.  If any occur
6276      cancel the optimization.  */
6277   wi::overflow_type overflow = wi::OVF_NONE;
6278   wide_int imm_modif;
6279
6280   if (to_add == 1)
6281     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6282   else
6283     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6284
6285   if (overflow)
6286     return;
6287
6288   /* The following creates a pseudo; if we cannot do that, bail out.  */
6289   if (!can_create_pseudo_p ())
6290     return;
6291
6292   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6293   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6294
6295   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6296   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6297
6298   /* Update the immediate and the code.  */
6299   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6300     {
6301       *code = equivalent_cmp_code (*code);
6302       *imm = new_imm;
6303     }
6304 }
6305
6306
6307 \f
6308 /* Perform possibly multi-word comparison and conditional jump to LABEL
6309    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6310    now a thin wrapper around do_compare_rtx_and_jump.  */
6311
6312 static void
6313 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6314                  rtx_code_label *label)
6315 {
6316   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6317   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6318                            NULL, label, profile_probability::uninitialized ());
6319 }