gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2021 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158   /* Restore all->reg's mode.  */
 159   PUT_MODE (all->reg, to_mode);
 160 }
 161
 162 static void
 163 init_expmed_one_mode (struct init_expmed_rtl *all,
 164                       machine_mode mode, int speed)
 165 {
 166   int m, n, mode_bitsize;
 167   machine_mode mode_from;
 168
 169   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 170
 171   PUT_MODE (all->reg, mode);
 172   PUT_MODE (all->plus, mode);
 173   PUT_MODE (all->neg, mode);
 174   PUT_MODE (all->mult, mode);
 175   PUT_MODE (all->sdiv, mode);
 176   PUT_MODE (all->udiv, mode);
 177   PUT_MODE (all->sdiv_32, mode);
 178   PUT_MODE (all->smod_32, mode);
 179   PUT_MODE (all->wide_trunc, mode);
 180   PUT_MODE (all->shift, mode);
 181   PUT_MODE (all->shift_mult, mode);
 182   PUT_MODE (all->shift_add, mode);
 183   PUT_MODE (all->shift_sub0, mode);
 184   PUT_MODE (all->shift_sub1, mode);
 185   PUT_MODE (all->zext, mode);
 186   PUT_MODE (all->trunc, mode);
 187
 188   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 189   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 190   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 191   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 192   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 193
 194   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 195                                      <= 2 * add_cost (speed, mode)));
 196   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 197                                      <= 4 * add_cost (speed, mode)));
 198
 199   set_shift_cost (speed, mode, 0, 0);
 200   {
 201     int cost = add_cost (speed, mode);
 202     set_shiftadd_cost (speed, mode, 0, cost);
 203     set_shiftsub0_cost (speed, mode, 0, cost);
 204     set_shiftsub1_cost (speed, mode, 0, cost);
 205   }
 206
 207   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 208   for (m = 1; m < n; m++)
 209     {
 210       XEXP (all->shift, 1) = all->cint[m];
 211       XEXP (all->shift_mult, 1) = all->pow2[m];
 212
 213       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 214       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 215                                                        speed));
 216       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 217                                                         speed));
 218       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 219                                                         speed));
 220     }
 221
 222   scalar_int_mode int_mode_to;
 223   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 224     {
 225       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 226            mode_from = (machine_mode)(mode_from + 1))
 227         init_expmed_one_conv (all, int_mode_to,
 228                               as_a <scalar_int_mode> (mode_from), speed);
 229
 230       scalar_int_mode wider_mode;
 231       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 232           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 233         {
 234           PUT_MODE (all->reg, mode);
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1)
 239             = gen_int_shift_amount (wider_mode, mode_bitsize);
 240
 241           set_mul_widen_cost (speed, wider_mode,
 242                               set_src_cost (all->wide_mult, wider_mode, speed));
 243           set_mul_highpart_cost (speed, int_mode_to,
 244                                  set_src_cost (all->wide_trunc,
 245                                                int_mode_to, speed));
 246         }
 247     }
 248 }
 249
 250 void
 251 init_expmed (void)
 252 {
 253   struct init_expmed_rtl all;
 254   machine_mode mode = QImode;
 255   int m, speed;
 256
 257   memset (&all, 0, sizeof all);
 258   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 259     {
 260       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 261       all.cint[m] = GEN_INT (m);
 262     }
 263
 264   /* Avoid using hard regs in ways which may be unsupported.  */
 265   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 266   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 267   all.neg = gen_rtx_NEG (mode, all.reg);
 268   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 269   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 270   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 271   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 272   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 273   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 274   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 275   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 276   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 277   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 278   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 279   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 280   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 282   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312
 313   ggc_free (all.trunc);
 314   ggc_free (all.shift_sub1);
 315   ggc_free (all.shift_sub0);
 316   ggc_free (all.shift_add);
 317   ggc_free (all.shift_mult);
 318   ggc_free (all.shift);
 319   ggc_free (all.wide_trunc);
 320   ggc_free (all.wide_lshr);
 321   ggc_free (all.wide_mult);
 322   ggc_free (all.zext);
 323   ggc_free (all.smod_32);
 324   ggc_free (all.sdiv_32);
 325   ggc_free (all.udiv);
 326   ggc_free (all.sdiv);
 327   ggc_free (all.mult);
 328   ggc_free (all.neg);
 329   ggc_free (all.plus);
 330   ggc_free (all.reg);
 331 }
 332
 333 /* Return an rtx representing minus the value of X.
 334    MODE is the intended mode of the result,
 335    useful if X is a CONST_INT.  */
 336
 337 rtx
 338 negate_rtx (machine_mode mode, rtx x)
 339 {
 340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 341
 342   if (result == 0)
 343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 344
 345   return result;
 346 }
 347
 348 /* Whether reverse storage order is supported on the target.  */
 349 static int reverse_storage_order_supported = -1;
 350
 351 /* Check whether reverse storage order is supported on the target.  */
 352
 353 static void
 354 check_reverse_storage_order_support (void)
 355 {
 356   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 357     {
 358       reverse_storage_order_supported = 0;
 359       sorry ("reverse scalar storage order");
 360     }
 361   else
 362     reverse_storage_order_supported = 1;
 363 }
 364
 365 /* Whether reverse FP storage order is supported on the target.  */
 366 static int reverse_float_storage_order_supported = -1;
 367
 368 /* Check whether reverse FP storage order is supported on the target.  */
 369
 370 static void
 371 check_reverse_float_storage_order_support (void)
 372 {
 373   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 374     {
 375       reverse_float_storage_order_supported = 0;
 376       sorry ("reverse floating-point scalar storage order");
 377     }
 378   else
 379     reverse_float_storage_order_supported = 1;
 380 }
 381
 382 /* Return an rtx representing value of X with reverse storage order.
 383    MODE is the intended mode of the result,
 384    useful if X is a CONST_INT.  */
 385
 386 rtx
 387 flip_storage_order (machine_mode mode, rtx x)
 388 {
 389   scalar_int_mode int_mode;
 390   rtx result;
 391
 392   if (mode == QImode)
 393     return x;
 394
 395   if (COMPLEX_MODE_P (mode))
 396     {
 397       rtx real = read_complex_part (x, false);
 398       rtx imag = read_complex_part (x, true);
 399
 400       real = flip_storage_order (GET_MODE_INNER (mode), real);
 401       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 402
 403       return gen_rtx_CONCAT (mode, real, imag);
 404     }
 405
 406   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 407     check_reverse_storage_order_support ();
 408
 409   if (!is_a <scalar_int_mode> (mode, &int_mode))
 410     {
 411       if (FLOAT_MODE_P (mode)
 412           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 413         check_reverse_float_storage_order_support ();
 414
 415       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 416           || !targetm.scalar_mode_supported_p (int_mode))
 417         {
 418           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 419           return x;
 420         }
 421       x = gen_lowpart (int_mode, x);
 422     }
 423
 424   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 425   if (result == 0)
 426     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 427
 428   if (int_mode != mode)
 429     result = gen_lowpart (mode, result);
 430
 431   return result;
 432 }
 433
 434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 435    first unit of mode MODE that contains a bitfield of size BITSIZE at
 436    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 437    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 438    of the field within the new memory.  */
 439
 440 static rtx
 441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 442                       unsigned HOST_WIDE_INT bitsize,
 443                       unsigned HOST_WIDE_INT bitnum,
 444                       unsigned HOST_WIDE_INT *new_bitnum)
 445 {
 446   scalar_int_mode imode;
 447   if (mode.exists (&imode))
 448     {
 449       unsigned int unit = GET_MODE_BITSIZE (imode);
 450       *new_bitnum = bitnum % unit;
 451       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 452       return adjust_bitfield_address (mem, imode, offset);
 453     }
 454   else
 455     {
 456       *new_bitnum = bitnum % BITS_PER_UNIT;
 457       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 458       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 459                             / BITS_PER_UNIT);
 460       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 461     }
 462 }
 463
 464 /* The caller wants to perform insertion or extraction PATTERN on a
 465    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 466    BITREGION_START and BITREGION_END are as for store_bit_field
 467    and FIELDMODE is the natural mode of the field.
 468
 469    Search for a mode that is compatible with the memory access
 470    restrictions and (where applicable) with a register insertion or
 471    extraction.  Return the new memory on success, storing the adjusted
 472    bit position in *NEW_BITNUM.  Return null otherwise.  */
 473
 474 static rtx
 475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 476                               rtx op0, HOST_WIDE_INT bitsize,
 477                               HOST_WIDE_INT bitnum,
 478                               poly_uint64 bitregion_start,
 479                               poly_uint64 bitregion_end,
 480                               machine_mode fieldmode,
 481                               unsigned HOST_WIDE_INT *new_bitnum)
 482 {
 483   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 484                                 bitregion_end, MEM_ALIGN (op0),
 485                                 MEM_VOLATILE_P (op0));
 486   scalar_int_mode best_mode;
 487   if (iter.next_mode (&best_mode))
 488     {
 489       /* We can use a memory in BEST_MODE.  See whether this is true for
 490          any wider modes.  All other things being equal, we prefer to
 491          use the widest mode possible because it tends to expose more
 492          CSE opportunities.  */
 493       if (!iter.prefer_smaller_modes ())
 494         {
 495           /* Limit the search to the mode required by the corresponding
 496              register insertion or extraction instruction, if any.  */
 497           scalar_int_mode limit_mode = word_mode;
 498           extraction_insn insn;
 499           if (get_best_reg_extraction_insn (&insn, pattern,
 500                                             GET_MODE_BITSIZE (best_mode),
 501                                             fieldmode))
 502             limit_mode = insn.field_mode;
 503
 504           scalar_int_mode wider_mode;
 505           while (iter.next_mode (&wider_mode)
 506                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 507             best_mode = wider_mode;
 508         }
 509       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 510                                    new_bitnum);
 511     }
 512   return NULL_RTX;
 513 }
 514
 515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 516    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 517    offset is then BITNUM / BITS_PER_UNIT.  */
 518
 519 static bool
 520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 521                      machine_mode struct_mode)
 522 {
 523   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 524   if (BYTES_BIG_ENDIAN)
 525     return (multiple_p (bitnum, BITS_PER_UNIT)
 526             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 527                 || multiple_p (bitnum + bitsize,
 528                                regsize * BITS_PER_UNIT)));
 529   else
 530     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 531 }
 532
 533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 534    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 535    Return false if the access would touch memory outside the range
 536    BITREGION_START to BITREGION_END for conformance to the C++ memory
 537    model.  */
 538
 539 static bool
 540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 541                             unsigned HOST_WIDE_INT bitnum,
 542                             scalar_int_mode fieldmode,
 543                             poly_uint64 bitregion_start,
 544                             poly_uint64 bitregion_end)
 545 {
 546   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 547
 548   /* -fstrict-volatile-bitfields must be enabled and we must have a
 549      volatile MEM.  */
 550   if (!MEM_P (op0)
 551       || !MEM_VOLATILE_P (op0)
 552       || flag_strict_volatile_bitfields <= 0)
 553     return false;
 554
 555   /* The bit size must not be larger than the field mode, and
 556      the field mode must not be larger than a word.  */
 557   if (bitsize > modesize || modesize > BITS_PER_WORD)
 558     return false;
 559
 560   /* Check for cases of unaligned fields that must be split.  */
 561   if (bitnum % modesize + bitsize > modesize)
 562     return false;
 563
 564   /* The memory must be sufficiently aligned for a MODESIZE access.
 565      This condition guarantees, that the memory access will not
 566      touch anything after the end of the structure.  */
 567   if (MEM_ALIGN (op0) < modesize)
 568     return false;
 569
 570   /* Check for cases where the C++ memory model applies.  */
 571   if (maybe_ne (bitregion_end, 0U)
 572       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 573           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 574                        bitregion_end)))
 575     return false;
 576
 577   return true;
 578 }
 579
 580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 581    bit number BITNUM can be treated as a simple value of mode MODE.
 582    Store the byte offset in *BYTENUM if so.  */
 583
 584 static bool
 585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 586                        machine_mode mode, poly_uint64 *bytenum)
 587 {
 588   return (MEM_P (op0)
 589           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 590           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 591           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 592               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 593                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 594 }
 595 \f
 596 /* Try to use instruction INSV to store VALUE into a field of OP0.
 597    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 598    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 599    are as for store_bit_field.  */
 600
 601 static bool
 602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 603                             opt_scalar_int_mode op0_mode,
 604                             unsigned HOST_WIDE_INT bitsize,
 605                             unsigned HOST_WIDE_INT bitnum,
 606                             rtx value, scalar_int_mode value_mode)
 607 {
 608   class expand_operand ops[4];
 609   rtx value1;
 610   rtx xop0 = op0;
 611   rtx_insn *last = get_last_insn ();
 612   bool copy_back = false;
 613
 614   scalar_int_mode op_mode = insv->field_mode;
 615   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 616   if (bitsize == 0 || bitsize > unit)
 617     return false;
 618
 619   if (MEM_P (xop0))
 620     /* Get a reference to the first byte of the field.  */
 621     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 622                                  &bitnum);
 623   else
 624     {
 625       /* Convert from counting within OP0 to counting in OP_MODE.  */
 626       if (BYTES_BIG_ENDIAN)
 627         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 628
 629       /* If xop0 is a register, we need it in OP_MODE
 630          to make it acceptable to the format of insv.  */
 631       if (GET_CODE (xop0) == SUBREG)
 632         {
 633           /* If such a SUBREG can't be created, give up.  */
 634           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 635                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 636             return false;
 637           /* We can't just change the mode, because this might clobber op0,
 638              and we will need the original value of op0 if insv fails.  */
 639           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 640                                  SUBREG_BYTE (xop0));
 641         }
 642       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 643         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 644     }
 645
 646   /* If the destination is a paradoxical subreg such that we need a
 647      truncate to the inner mode, perform the insertion on a temporary and
 648      truncate the result to the original destination.  Note that we can't
 649      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 650      X) 0)) is (reg:N X).  */
 651   if (GET_CODE (xop0) == SUBREG
 652       && REG_P (SUBREG_REG (xop0))
 653       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 654                                          op_mode))
 655     {
 656       rtx tem = gen_reg_rtx (op_mode);
 657       emit_move_insn (tem, xop0);
 658       xop0 = tem;
 659       copy_back = true;
 660     }
 661
 662   /* There are similar overflow check at the start of store_bit_field_1,
 663      but that only check the situation where the field lies completely
 664      outside the register, while there do have situation where the field
 665      lies partialy in the register, we need to adjust bitsize for this
 666      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 667      will broken on those arch support bit insert instruction, like arm, aarch64
 668      etc.  */
 669   if (bitsize + bitnum > unit && bitnum < unit)
 670     {
 671       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 672                "destination object, data truncated into %wu-bit",
 673                bitsize, unit - bitnum);
 674       bitsize = unit - bitnum;
 675     }
 676
 677   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 678      "backwards" from the size of the unit we are inserting into.
 679      Otherwise, we count bits from the most significant on a
 680      BYTES/BITS_BIG_ENDIAN machine.  */
 681
 682   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 683     bitnum = unit - bitsize - bitnum;
 684
 685   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 686   value1 = value;
 687   if (value_mode != op_mode)
 688     {
 689       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 690         {
 691           rtx tmp;
 692           /* Optimization: Don't bother really extending VALUE
 693              if it has all the bits we will actually use.  However,
 694              if we must narrow it, be sure we do it correctly.  */
 695
 696           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 697             {
 698               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 699               if (! tmp)
 700                 tmp = simplify_gen_subreg (op_mode,
 701                                            force_reg (value_mode, value1),
 702                                            value_mode, 0);
 703             }
 704           else
 705             {
 706               tmp = gen_lowpart_if_possible (op_mode, value1);
 707               if (! tmp)
 708                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 709             }
 710           value1 = tmp;
 711         }
 712       else if (CONST_INT_P (value))
 713         value1 = gen_int_mode (INTVAL (value), op_mode);
 714       else
 715         /* Parse phase is supposed to make VALUE's data type
 716            match that of the component reference, which is a type
 717            at least as wide as the field; so VALUE should have
 718            a mode that corresponds to that type.  */
 719         gcc_assert (CONSTANT_P (value));
 720     }
 721
 722   create_fixed_operand (&ops[0], xop0);
 723   create_integer_operand (&ops[1], bitsize);
 724   create_integer_operand (&ops[2], bitnum);
 725   create_input_operand (&ops[3], value1, op_mode);
 726   if (maybe_expand_insn (insv->icode, 4, ops))
 727     {
 728       if (copy_back)
 729         convert_move (op0, xop0, true);
 730       return true;
 731     }
 732   delete_insns_since (last);
 733   return false;
 734 }
 735
 736 /* A subroutine of store_bit_field, with the same arguments.  Return true
 737    if the operation could be implemented.
 738
 739    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 740    no other way of implementing the operation.  If FALLBACK_P is false,
 741    return false instead.  */
 742
 743 static bool
 744 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 745                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 746                    machine_mode fieldmode,
 747                    rtx value, bool reverse, bool fallback_p)
 748 {
 749   rtx op0 = str_rtx;
 750
 751   while (GET_CODE (op0) == SUBREG)
 752     {
 753       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 754       op0 = SUBREG_REG (op0);
 755     }
 756
 757   /* No action is needed if the target is a register and if the field
 758      lies completely outside that register.  This can occur if the source
 759      code contains an out-of-bounds access to a small array.  */
 760   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 761     return true;
 762
 763   /* Use vec_set patterns for inserting parts of vectors whenever
 764      available.  */
 765   machine_mode outermode = GET_MODE (op0);
 766   scalar_mode innermode = GET_MODE_INNER (outermode);
 767   poly_uint64 pos;
 768   if (VECTOR_MODE_P (outermode)
 769       && !MEM_P (op0)
 770       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 771       && fieldmode == innermode
 772       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 773       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 774     {
 775       class expand_operand ops[3];
 776       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 777
 778       create_fixed_operand (&ops[0], op0);
 779       create_input_operand (&ops[1], value, innermode);
 780       create_integer_operand (&ops[2], pos);
 781       if (maybe_expand_insn (icode, 3, ops))
 782         return true;
 783     }
 784
 785   /* If the target is a register, overwriting the entire object, or storing
 786      a full-word or multi-word field can be done with just a SUBREG.  */
 787   if (!MEM_P (op0)
 788       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794       HOST_WIDE_INT regnum;
 795       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 796       if (known_eq (bitnum, 0U)
 797           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 798         {
 799           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 800           if (sub)
 801             {
 802               if (reverse)
 803                 sub = flip_storage_order (GET_MODE (op0), sub);
 804               emit_move_insn (op0, sub);
 805               return true;
 806             }
 807         }
 808       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 809                && multiple_p (bitsize, regsize * BITS_PER_UNIT)
 810                && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
 811         {
 812           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 813                                      regnum * regsize);
 814           if (sub)
 815             {
 816               if (reverse)
 817                 value = flip_storage_order (fieldmode, value);
 818               emit_move_insn (sub, value);
 819               return true;
 820             }
 821         }
 822     }
 823
 824   /* If the target is memory, storing any naturally aligned field can be
 825      done with a simple store.  For targets that support fast unaligned
 826      memory, any naturally sized, unit aligned field can be done directly.  */
 827   poly_uint64 bytenum;
 828   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 829     {
 830       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 831       if (reverse)
 832         value = flip_storage_order (fieldmode, value);
 833       emit_move_insn (op0, value);
 834       return true;
 835     }
 836
 837   /* It's possible we'll need to handle other cases here for
 838      polynomial bitnum and bitsize.  */
 839
 840   /* From here on we need to be looking at a fixed-size insertion.  */
 841   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 842   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 843
 844   /* Make sure we are playing with integral modes.  Pun with subregs
 845      if we aren't.  This must come after the entire register case above,
 846      since that case is valid for any mode.  The following cases are only
 847      valid for integral modes.  */
 848   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 849   scalar_int_mode imode;
 850   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 851     {
 852       if (MEM_P (op0))
 853         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 854                                             0, MEM_SIZE (op0));
 855       else if (!op0_mode.exists ())
 856         {
 857           if (ibitnum == 0
 858               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 859               && MEM_P (value)
 860               && !reverse)
 861             {
 862               value = adjust_address (value, GET_MODE (op0), 0);
 863               emit_move_insn (op0, value);
 864               return true;
 865             }
 866           if (!fallback_p)
 867             return false;
 868           rtx temp = assign_stack_temp (GET_MODE (op0),
 869                                         GET_MODE_SIZE (GET_MODE (op0)));
 870           emit_move_insn (temp, op0);
 871           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 872                              reverse, fallback_p);
 873           emit_move_insn (op0, temp);
 874           return true;
 875         }
 876       else
 877         op0 = gen_lowpart (op0_mode.require (), op0);
 878     }
 879
 880   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 881                                    bitregion_start, bitregion_end,
 882                                    fieldmode, value, reverse, fallback_p);
 883 }
 884
 885 /* Subroutine of store_bit_field_1, with the same arguments, except
 886    that BITSIZE and BITNUM are constant.  Handle cases specific to
 887    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 888    otherwise OP0 is a BLKmode MEM.  */
 889
 890 static bool
 891 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 892                           unsigned HOST_WIDE_INT bitsize,
 893                           unsigned HOST_WIDE_INT bitnum,
 894                           poly_uint64 bitregion_start,
 895                           poly_uint64 bitregion_end,
 896                           machine_mode fieldmode,
 897                           rtx value, bool reverse, bool fallback_p)
 898 {
 899   /* Storing an lsb-aligned field in a register
 900      can be done with a movstrict instruction.  */
 901
 902   if (!MEM_P (op0)
 903       && !reverse
 904       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 905       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 906       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 907     {
 908       class expand_operand ops[2];
 909       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 910       rtx arg0 = op0;
 911       unsigned HOST_WIDE_INT subreg_off;
 912
 913       if (GET_CODE (arg0) == SUBREG)
 914         {
 915           /* Else we've got some float mode source being extracted into
 916              a different float mode destination -- this combination of
 917              subregs results in Severe Tire Damage.  */
 918           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 919                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 920                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 921           arg0 = SUBREG_REG (arg0);
 922         }
 923
 924       subreg_off = bitnum / BITS_PER_UNIT;
 925       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
 926           /* STRICT_LOW_PART must have a non-paradoxical subreg as
 927              operand.  */
 928           && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
 929         {
 930           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 931
 932           create_fixed_operand (&ops[0], arg0);
 933           /* Shrink the source operand to FIELDMODE.  */
 934           create_convert_operand_to (&ops[1], value, fieldmode, false);
 935           if (maybe_expand_insn (icode, 2, ops))
 936             return true;
 937         }
 938     }
 939
 940   /* Handle fields bigger than a word.  */
 941
 942   if (bitsize > BITS_PER_WORD)
 943     {
 944       /* Here we transfer the words of the field
 945          in the order least significant first.
 946          This is because the most significant word is the one which may
 947          be less than full.
 948          However, only do that if the value is not BLKmode.  */
 949
 950       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 951       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 952       rtx_insn *last;
 953
 954       /* This is the mode we must force value to, so that there will be enough
 955          subwords to extract.  Note that fieldmode will often (always?) be
 956          VOIDmode, because that is what store_field uses to indicate that this
 957          is a bit field, but passing VOIDmode to operand_subword_force
 958          is not allowed.
 959
 960          The mode must be fixed-size, since insertions into variable-sized
 961          objects are meant to be handled before calling this function.  */
 962       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 963       if (value_mode == VOIDmode)
 964         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 965
 966       last = get_last_insn ();
 967       for (int i = 0; i < nwords; i++)
 968         {
 969           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 970              except maybe for the last iteration.  */
 971           const unsigned HOST_WIDE_INT new_bitsize
 972             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 973           /* Bit offset from the starting bit number in the target.  */
 974           const unsigned int bit_offset
 975             = backwards ^ reverse
 976               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 977               : i * BITS_PER_WORD;
 978           /* Starting word number in the value.  */
 979           const unsigned int wordnum
 980             = backwards
 981               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 982               : i;
 983           /* The chunk of the value in word_mode.  We use bit-field extraction
 984               in BLKmode to handle unaligned memory references and to shift the
 985               last chunk right on big-endian machines if need be.  */
 986           rtx value_word
 987             = fieldmode == BLKmode
 988               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 989                                    1, NULL_RTX, word_mode, word_mode, false,
 990                                    NULL)
 991               : operand_subword_force (value, wordnum, value_mode);
 992
 993           if (!store_bit_field_1 (op0, new_bitsize,
 994                                   bitnum + bit_offset,
 995                                   bitregion_start, bitregion_end,
 996                                   word_mode,
 997                                   value_word, reverse, fallback_p))
 998             {
 999               delete_insns_since (last);
1000               return false;
1001             }
1002         }
1003       return true;
1004     }
1005
1006   /* If VALUE has a floating-point or complex mode, access it as an
1007      integer of the corresponding size.  This can occur on a machine
1008      with 64 bit registers that uses SFmode for float.  It can also
1009      occur for unaligned float or complex fields.  */
1010   rtx orig_value = value;
1011   scalar_int_mode value_mode;
1012   if (GET_MODE (value) == VOIDmode)
1013     /* By this point we've dealt with values that are bigger than a word,
1014        so word_mode is a conservatively correct choice.  */
1015     value_mode = word_mode;
1016   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1017     {
1018       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1019       value = gen_reg_rtx (value_mode);
1020       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1021     }
1022
1023   /* If OP0 is a multi-word register, narrow it to the affected word.
1024      If the region spans two words, defer to store_split_bit_field.
1025      Don't do this if op0 is a single hard register wider than word
1026      such as a float or vector register.  */
1027   if (!MEM_P (op0)
1028       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1029       && (!REG_P (op0)
1030           || !HARD_REGISTER_P (op0)
1031           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1032     {
1033       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1034         {
1035           if (!fallback_p)
1036             return false;
1037
1038           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1039                                  bitregion_start, bitregion_end,
1040                                  value, value_mode, reverse);
1041           return true;
1042         }
1043       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1044                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1045       gcc_assert (op0);
1046       op0_mode = word_mode;
1047       bitnum %= BITS_PER_WORD;
1048     }
1049
1050   /* From here on we can assume that the field to be stored in fits
1051      within a word.  If the destination is a register, it too fits
1052      in a word.  */
1053
1054   extraction_insn insv;
1055   if (!MEM_P (op0)
1056       && !reverse
1057       && get_best_reg_extraction_insn (&insv, EP_insv,
1058                                        GET_MODE_BITSIZE (op0_mode.require ()),
1059                                        fieldmode)
1060       && store_bit_field_using_insv (&insv, op0, op0_mode,
1061                                      bitsize, bitnum, value, value_mode))
1062     return true;
1063
1064   /* If OP0 is a memory, try copying it to a register and seeing if a
1065      cheap register alternative is available.  */
1066   if (MEM_P (op0) && !reverse)
1067     {
1068       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1069                                         fieldmode)
1070           && store_bit_field_using_insv (&insv, op0, op0_mode,
1071                                          bitsize, bitnum, value, value_mode))
1072         return true;
1073
1074       rtx_insn *last = get_last_insn ();
1075
1076       /* Try loading part of OP0 into a register, inserting the bitfield
1077          into that, and then copying the result back to OP0.  */
1078       unsigned HOST_WIDE_INT bitpos;
1079       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1080                                                bitregion_start, bitregion_end,
1081                                                fieldmode, &bitpos);
1082       if (xop0)
1083         {
1084           rtx tempreg = copy_to_reg (xop0);
1085           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1086                                  bitregion_start, bitregion_end,
1087                                  fieldmode, orig_value, reverse, false))
1088             {
1089               emit_move_insn (xop0, tempreg);
1090               return true;
1091             }
1092           delete_insns_since (last);
1093         }
1094     }
1095
1096   if (!fallback_p)
1097     return false;
1098
1099   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1100                          bitregion_end, value, value_mode, reverse);
1101   return true;
1102 }
1103
1104 /* Generate code to store value from rtx VALUE
1105    into a bit-field within structure STR_RTX
1106    containing BITSIZE bits starting at bit BITNUM.
1107
1108    BITREGION_START is bitpos of the first bitfield in this region.
1109    BITREGION_END is the bitpos of the ending bitfield in this region.
1110    These two fields are 0, if the C++ memory model does not apply,
1111    or we are not interested in keeping track of bitfield regions.
1112
1113    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1114
1115    If REVERSE is true, the store is to be done in reverse order.  */
1116
1117 void
1118 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1119                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1120                  machine_mode fieldmode,
1121                  rtx value, bool reverse)
1122 {
1123   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1124   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1125   scalar_int_mode int_mode;
1126   if (bitsize.is_constant (&ibitsize)
1127       && bitnum.is_constant (&ibitnum)
1128       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1129       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1130                                      bitregion_start, bitregion_end))
1131     {
1132       /* Storing of a full word can be done with a simple store.
1133          We know here that the field can be accessed with one single
1134          instruction.  For targets that support unaligned memory,
1135          an unaligned access may be necessary.  */
1136       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1137         {
1138           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1139                                              ibitnum / BITS_PER_UNIT);
1140           if (reverse)
1141             value = flip_storage_order (int_mode, value);
1142           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1143           emit_move_insn (str_rtx, value);
1144         }
1145       else
1146         {
1147           rtx temp;
1148
1149           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1150                                           ibitnum, &ibitnum);
1151           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1152           temp = copy_to_reg (str_rtx);
1153           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1154                                   int_mode, value, reverse, true))
1155             gcc_unreachable ();
1156
1157           emit_move_insn (str_rtx, temp);
1158         }
1159
1160       return;
1161     }
1162
1163   /* Under the C++0x memory model, we must not touch bits outside the
1164      bit region.  Adjust the address to start at the beginning of the
1165      bit region.  */
1166   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1167     {
1168       scalar_int_mode best_mode;
1169       machine_mode addr_mode = VOIDmode;
1170
1171       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1172       bitnum -= bitregion_start;
1173       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1174       bitregion_end -= bitregion_start;
1175       bitregion_start = 0;
1176       if (bitsize.is_constant (&ibitsize)
1177           && bitnum.is_constant (&ibitnum)
1178           && get_best_mode (ibitsize, ibitnum,
1179                             bitregion_start, bitregion_end,
1180                             MEM_ALIGN (str_rtx), INT_MAX,
1181                             MEM_VOLATILE_P (str_rtx), &best_mode))
1182         addr_mode = best_mode;
1183       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1184                                               offset, size);
1185     }
1186
1187   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1188                           bitregion_start, bitregion_end,
1189                           fieldmode, value, reverse, true))
1190     gcc_unreachable ();
1191 }
1192 \f
1193 /* Use shifts and boolean operations to store VALUE into a bit field of
1194    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1195    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1196    the mode of VALUE.
1197
1198    If REVERSE is true, the store is to be done in reverse order.  */
1199
1200 static void
1201 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1202                        unsigned HOST_WIDE_INT bitsize,
1203                        unsigned HOST_WIDE_INT bitnum,
1204                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1205                        rtx value, scalar_int_mode value_mode, bool reverse)
1206 {
1207   /* There is a case not handled here:
1208      a structure with a known alignment of just a halfword
1209      and a field split across two aligned halfwords within the structure.
1210      Or likewise a structure with a known alignment of just a byte
1211      and a field split across two bytes.
1212      Such cases are not supposed to be able to occur.  */
1213
1214   scalar_int_mode best_mode;
1215   if (MEM_P (op0))
1216     {
1217       unsigned int max_bitsize = BITS_PER_WORD;
1218       scalar_int_mode imode;
1219       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1220         max_bitsize = GET_MODE_BITSIZE (imode);
1221
1222       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1223                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1224                           &best_mode))
1225         {
1226           /* The only way this should occur is if the field spans word
1227              boundaries.  */
1228           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1229                                  bitregion_start, bitregion_end,
1230                                  value, value_mode, reverse);
1231           return;
1232         }
1233
1234       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1235     }
1236   else
1237     best_mode = op0_mode.require ();
1238
1239   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1240                            value, value_mode, reverse);
1241 }
1242
1243 /* Helper function for store_fixed_bit_field, stores
1244    the bit field always using MODE, which is the mode of OP0.  The other
1245    arguments are as for store_fixed_bit_field.  */
1246
1247 static void
1248 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1249                          unsigned HOST_WIDE_INT bitsize,
1250                          unsigned HOST_WIDE_INT bitnum,
1251                          rtx value, scalar_int_mode value_mode, bool reverse)
1252 {
1253   rtx temp;
1254   int all_zero = 0;
1255   int all_one = 0;
1256
1257   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1258      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1259
1260   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1261     /* BITNUM is the distance between our msb
1262        and that of the containing datum.
1263        Convert it to the distance from the lsb.  */
1264     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1265
1266   /* Now BITNUM is always the distance between our lsb
1267      and that of OP0.  */
1268
1269   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1270      we must first convert its mode to MODE.  */
1271
1272   if (CONST_INT_P (value))
1273     {
1274       unsigned HOST_WIDE_INT v = UINTVAL (value);
1275
1276       if (bitsize < HOST_BITS_PER_WIDE_INT)
1277         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1278
1279       if (v == 0)
1280         all_zero = 1;
1281       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1282                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1283                || (bitsize == HOST_BITS_PER_WIDE_INT
1284                    && v == HOST_WIDE_INT_M1U))
1285         all_one = 1;
1286
1287       value = lshift_value (mode, v, bitnum);
1288     }
1289   else
1290     {
1291       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1292                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1293
1294       if (value_mode != mode)
1295         value = convert_to_mode (mode, value, 1);
1296
1297       if (must_and)
1298         value = expand_binop (mode, and_optab, value,
1299                               mask_rtx (mode, 0, bitsize, 0),
1300                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1301       if (bitnum > 0)
1302         value = expand_shift (LSHIFT_EXPR, mode, value,
1303                               bitnum, NULL_RTX, 1);
1304     }
1305
1306   if (reverse)
1307     value = flip_storage_order (mode, value);
1308
1309   /* Now clear the chosen bits in OP0,
1310      except that if VALUE is -1 we need not bother.  */
1311   /* We keep the intermediates in registers to allow CSE to combine
1312      consecutive bitfield assignments.  */
1313
1314   temp = force_reg (mode, op0);
1315
1316   if (! all_one)
1317     {
1318       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1319       if (reverse)
1320         mask = flip_storage_order (mode, mask);
1321       temp = expand_binop (mode, and_optab, temp, mask,
1322                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1323       temp = force_reg (mode, temp);
1324     }
1325
1326   /* Now logical-or VALUE into OP0, unless it is zero.  */
1327
1328   if (! all_zero)
1329     {
1330       temp = expand_binop (mode, ior_optab, temp, value,
1331                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1332       temp = force_reg (mode, temp);
1333     }
1334
1335   if (op0 != temp)
1336     {
1337       op0 = copy_rtx (op0);
1338       emit_move_insn (op0, temp);
1339     }
1340 }
1341 \f
1342 /* Store a bit field that is split across multiple accessible memory objects.
1343
1344    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1345    BITSIZE is the field width; BITPOS the position of its first bit
1346    (within the word).
1347    VALUE is the value to store, which has mode VALUE_MODE.
1348    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1349    a BLKmode MEM.
1350
1351    If REVERSE is true, the store is to be done in reverse order.
1352
1353    This does not yet handle fields wider than BITS_PER_WORD.  */
1354
1355 static void
1356 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1357                        unsigned HOST_WIDE_INT bitsize,
1358                        unsigned HOST_WIDE_INT bitpos,
1359                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1360                        rtx value, scalar_int_mode value_mode, bool reverse)
1361 {
1362   unsigned int unit, total_bits, bitsdone = 0;
1363
1364   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1365      much at a time.  */
1366   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1367     unit = BITS_PER_WORD;
1368   else
1369     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1370
1371   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1372      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1373      again, and we will mutually recurse forever.  */
1374   if (MEM_P (op0) && op0_mode.exists ())
1375     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1376
1377   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1378      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1379      that VALUE might be a floating-point constant.  */
1380   if (CONSTANT_P (value) && !CONST_INT_P (value))
1381     {
1382       rtx word = gen_lowpart_common (word_mode, value);
1383
1384       if (word && (value != word))
1385         value = word;
1386       else
1387         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1388       value_mode = word_mode;
1389     }
1390
1391   total_bits = GET_MODE_BITSIZE (value_mode);
1392
1393   while (bitsdone < bitsize)
1394     {
1395       unsigned HOST_WIDE_INT thissize;
1396       unsigned HOST_WIDE_INT thispos;
1397       unsigned HOST_WIDE_INT offset;
1398       rtx part;
1399
1400       offset = (bitpos + bitsdone) / unit;
1401       thispos = (bitpos + bitsdone) % unit;
1402
1403       /* When region of bytes we can touch is restricted, decrease
1404          UNIT close to the end of the region as needed.  If op0 is a REG
1405          or SUBREG of REG, don't do this, as there can't be data races
1406          on a register and we can expand shorter code in some cases.  */
1407       if (maybe_ne (bitregion_end, 0U)
1408           && unit > BITS_PER_UNIT
1409           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1410           && !REG_P (op0)
1411           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1412         {
1413           unit = unit / 2;
1414           continue;
1415         }
1416
1417       /* THISSIZE must not overrun a word boundary.  Otherwise,
1418          store_fixed_bit_field will call us again, and we will mutually
1419          recurse forever.  */
1420       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1421       thissize = MIN (thissize, unit - thispos);
1422
1423       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1424         {
1425           /* Fetch successively less significant portions.  */
1426           if (CONST_INT_P (value))
1427             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1428                              >> (bitsize - bitsdone - thissize))
1429                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1430           /* Likewise, but the source is little-endian.  */
1431           else if (reverse)
1432             part = extract_fixed_bit_field (word_mode, value, value_mode,
1433                                             thissize,
1434                                             bitsize - bitsdone - thissize,
1435                                             NULL_RTX, 1, false);
1436           else
1437             /* The args are chosen so that the last part includes the
1438                lsb.  Give extract_bit_field the value it needs (with
1439                endianness compensation) to fetch the piece we want.  */
1440             part = extract_fixed_bit_field (word_mode, value, value_mode,
1441                                             thissize,
1442                                             total_bits - bitsize + bitsdone,
1443                                             NULL_RTX, 1, false);
1444         }
1445       else
1446         {
1447           /* Fetch successively more significant portions.  */
1448           if (CONST_INT_P (value))
1449             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1450                              >> bitsdone)
1451                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1452           /* Likewise, but the source is big-endian.  */
1453           else if (reverse)
1454             part = extract_fixed_bit_field (word_mode, value, value_mode,
1455                                             thissize,
1456                                             total_bits - bitsdone - thissize,
1457                                             NULL_RTX, 1, false);
1458           else
1459             part = extract_fixed_bit_field (word_mode, value, value_mode,
1460                                             thissize, bitsdone, NULL_RTX,
1461                                             1, false);
1462         }
1463
1464       /* If OP0 is a register, then handle OFFSET here.  */
1465       rtx op0_piece = op0;
1466       opt_scalar_int_mode op0_piece_mode = op0_mode;
1467       if (SUBREG_P (op0) || REG_P (op0))
1468         {
1469           scalar_int_mode imode;
1470           if (op0_mode.exists (&imode)
1471               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1472             {
1473               if (offset)
1474                 op0_piece = const0_rtx;
1475             }
1476           else
1477             {
1478               op0_piece = operand_subword_force (op0,
1479                                                  offset * unit / BITS_PER_WORD,
1480                                                  GET_MODE (op0));
1481               op0_piece_mode = word_mode;
1482             }
1483           offset &= BITS_PER_WORD / unit - 1;
1484         }
1485
1486       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1487          it is just an out-of-bounds access.  Ignore it.  */
1488       if (op0_piece != const0_rtx)
1489         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1490                                offset * unit + thispos, bitregion_start,
1491                                bitregion_end, part, word_mode, reverse);
1492       bitsdone += thissize;
1493     }
1494 }
1495 \f
1496 /* A subroutine of extract_bit_field_1 that converts return value X
1497    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1498    to extract_bit_field.  */
1499
1500 static rtx
1501 convert_extracted_bit_field (rtx x, machine_mode mode,
1502                              machine_mode tmode, bool unsignedp)
1503 {
1504   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1505     return x;
1506
1507   /* If the x mode is not a scalar integral, first convert to the
1508      integer mode of that size and then access it as a floating-point
1509      value via a SUBREG.  */
1510   if (!SCALAR_INT_MODE_P (tmode))
1511     {
1512       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1513       x = convert_to_mode (int_mode, x, unsignedp);
1514       x = force_reg (int_mode, x);
1515       return gen_lowpart (tmode, x);
1516     }
1517
1518   return convert_to_mode (tmode, x, unsignedp);
1519 }
1520
1521 /* Try to use an ext(z)v pattern to extract a field from OP0.
1522    Return the extracted value on success, otherwise return null.
1523    EXTV describes the extraction instruction to use.  If OP0_MODE
1524    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1525    The other arguments are as for extract_bit_field.  */
1526
1527 static rtx
1528 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1529                               opt_scalar_int_mode op0_mode,
1530                               unsigned HOST_WIDE_INT bitsize,
1531                               unsigned HOST_WIDE_INT bitnum,
1532                               int unsignedp, rtx target,
1533                               machine_mode mode, machine_mode tmode)
1534 {
1535   class expand_operand ops[4];
1536   rtx spec_target = target;
1537   rtx spec_target_subreg = 0;
1538   scalar_int_mode ext_mode = extv->field_mode;
1539   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1540
1541   if (bitsize == 0 || unit < bitsize)
1542     return NULL_RTX;
1543
1544   if (MEM_P (op0))
1545     /* Get a reference to the first byte of the field.  */
1546     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1547                                 &bitnum);
1548   else
1549     {
1550       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1551       if (BYTES_BIG_ENDIAN)
1552         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1553
1554       /* If op0 is a register, we need it in EXT_MODE to make it
1555          acceptable to the format of ext(z)v.  */
1556       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1557         return NULL_RTX;
1558       if (REG_P (op0) && op0_mode.require () != ext_mode)
1559         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1560     }
1561
1562   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1563      "backwards" from the size of the unit we are extracting from.
1564      Otherwise, we count bits from the most significant on a
1565      BYTES/BITS_BIG_ENDIAN machine.  */
1566
1567   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1568     bitnum = unit - bitsize - bitnum;
1569
1570   if (target == 0)
1571     target = spec_target = gen_reg_rtx (tmode);
1572
1573   if (GET_MODE (target) != ext_mode)
1574     {
1575       rtx temp;
1576       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1577          between the mode of the extraction (word_mode) and the target
1578          mode.  Instead, create a temporary and use convert_move to set
1579          the target.  */
1580       if (REG_P (target)
1581           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1582           && (temp = gen_lowpart_if_possible (ext_mode, target)))
1583         {
1584           target = temp;
1585           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1586             spec_target_subreg = target;
1587         }
1588       else
1589         target = gen_reg_rtx (ext_mode);
1590     }
1591
1592   create_output_operand (&ops[0], target, ext_mode);
1593   create_fixed_operand (&ops[1], op0);
1594   create_integer_operand (&ops[2], bitsize);
1595   create_integer_operand (&ops[3], bitnum);
1596   if (maybe_expand_insn (extv->icode, 4, ops))
1597     {
1598       target = ops[0].value;
1599       if (target == spec_target)
1600         return target;
1601       if (target == spec_target_subreg)
1602         return spec_target;
1603       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1604     }
1605   return NULL_RTX;
1606 }
1607
1608 /* See whether it would be valid to extract the part of OP0 described
1609    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1610    operation.  Return the subreg if so, otherwise return null.  */
1611
1612 static rtx
1613 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1614                              poly_uint64 bitsize, poly_uint64 bitnum)
1615 {
1616   poly_uint64 bytenum;
1617   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1618       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1619       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1620       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1621     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1622   return NULL_RTX;
1623 }
1624
1625 /* A subroutine of extract_bit_field, with the same arguments.
1626    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1627    if we can find no other means of implementing the operation.
1628    if FALLBACK_P is false, return NULL instead.  */
1629
1630 static rtx
1631 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1632                      int unsignedp, rtx target, machine_mode mode,
1633                      machine_mode tmode, bool reverse, bool fallback_p,
1634                      rtx *alt_rtl)
1635 {
1636   rtx op0 = str_rtx;
1637   machine_mode mode1;
1638
1639   if (tmode == VOIDmode)
1640     tmode = mode;
1641
1642   while (GET_CODE (op0) == SUBREG)
1643     {
1644       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1645       op0 = SUBREG_REG (op0);
1646     }
1647
1648   /* If we have an out-of-bounds access to a register, just return an
1649      uninitialized register of the required mode.  This can occur if the
1650      source code contains an out-of-bounds access to a small array.  */
1651   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1652     return gen_reg_rtx (tmode);
1653
1654   if (REG_P (op0)
1655       && mode == GET_MODE (op0)
1656       && known_eq (bitnum, 0U)
1657       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1658     {
1659       if (reverse)
1660         op0 = flip_storage_order (mode, op0);
1661       /* We're trying to extract a full register from itself.  */
1662       return op0;
1663     }
1664
1665   /* First try to check for vector from vector extractions.  */
1666   if (VECTOR_MODE_P (GET_MODE (op0))
1667       && !MEM_P (op0)
1668       && VECTOR_MODE_P (tmode)
1669       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1670       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1671     {
1672       machine_mode new_mode = GET_MODE (op0);
1673       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1674         {
1675           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1676           poly_uint64 nunits;
1677           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1678                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1679               || !related_vector_mode (tmode, inner_mode,
1680                                        nunits).exists (&new_mode)
1681               || maybe_ne (GET_MODE_SIZE (new_mode),
1682                            GET_MODE_SIZE (GET_MODE (op0))))
1683             new_mode = VOIDmode;
1684         }
1685       poly_uint64 pos;
1686       if (new_mode != VOIDmode
1687           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1688               != CODE_FOR_nothing)
1689           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1690         {
1691           class expand_operand ops[3];
1692           machine_mode outermode = new_mode;
1693           machine_mode innermode = tmode;
1694           enum insn_code icode
1695             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1696
1697           if (new_mode != GET_MODE (op0))
1698             op0 = gen_lowpart (new_mode, op0);
1699           create_output_operand (&ops[0], target, innermode);
1700           ops[0].target = 1;
1701           create_input_operand (&ops[1], op0, outermode);
1702           create_integer_operand (&ops[2], pos);
1703           if (maybe_expand_insn (icode, 3, ops))
1704             {
1705               if (alt_rtl && ops[0].target)
1706                 *alt_rtl = target;
1707               target = ops[0].value;
1708               if (GET_MODE (target) != mode)
1709                 return gen_lowpart (tmode, target);
1710               return target;
1711             }
1712         }
1713     }
1714
1715   /* See if we can get a better vector mode before extracting.  */
1716   if (VECTOR_MODE_P (GET_MODE (op0))
1717       && !MEM_P (op0)
1718       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1719     {
1720       machine_mode new_mode;
1721
1722       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1723         new_mode = MIN_MODE_VECTOR_FLOAT;
1724       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1725         new_mode = MIN_MODE_VECTOR_FRACT;
1726       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1727         new_mode = MIN_MODE_VECTOR_UFRACT;
1728       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1729         new_mode = MIN_MODE_VECTOR_ACCUM;
1730       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1731         new_mode = MIN_MODE_VECTOR_UACCUM;
1732       else
1733         new_mode = MIN_MODE_VECTOR_INT;
1734
1735       FOR_EACH_MODE_FROM (new_mode, new_mode)
1736         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1737             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1738             && targetm.vector_mode_supported_p (new_mode)
1739             && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1740           break;
1741       if (new_mode != VOIDmode)
1742         op0 = gen_lowpart (new_mode, op0);
1743     }
1744
1745   /* Use vec_extract patterns for extracting parts of vectors whenever
1746      available.  If that fails, see whether the current modes and bitregion
1747      give a natural subreg.  */
1748   machine_mode outermode = GET_MODE (op0);
1749   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1750     {
1751       scalar_mode innermode = GET_MODE_INNER (outermode);
1752       enum insn_code icode
1753         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1754       poly_uint64 pos;
1755       if (icode != CODE_FOR_nothing
1756           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1757           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1758         {
1759           class expand_operand ops[3];
1760
1761           create_output_operand (&ops[0], target, innermode);
1762           ops[0].target = 1;
1763           create_input_operand (&ops[1], op0, outermode);
1764           create_integer_operand (&ops[2], pos);
1765           if (maybe_expand_insn (icode, 3, ops))
1766             {
1767               if (alt_rtl && ops[0].target)
1768                 *alt_rtl = target;
1769               target = ops[0].value;
1770               if (GET_MODE (target) != mode)
1771                 return gen_lowpart (tmode, target);
1772               return target;
1773             }
1774         }
1775       /* Using subregs is useful if we're extracting one register vector
1776          from a multi-register vector.  extract_bit_field_as_subreg checks
1777          for valid bitsize and bitnum, so we don't need to do that here.  */
1778       if (VECTOR_MODE_P (mode))
1779         {
1780           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1781           if (sub)
1782             return sub;
1783         }
1784     }
1785
1786   /* Make sure we are playing with integral modes.  Pun with subregs
1787      if we aren't.  */
1788   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1789   scalar_int_mode imode;
1790   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1791     {
1792       if (MEM_P (op0))
1793         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1794                                             0, MEM_SIZE (op0));
1795       else if (op0_mode.exists (&imode))
1796         {
1797           op0 = gen_lowpart (imode, op0);
1798
1799           /* If we got a SUBREG, force it into a register since we
1800              aren't going to be able to do another SUBREG on it.  */
1801           if (GET_CODE (op0) == SUBREG)
1802             op0 = force_reg (imode, op0);
1803         }
1804       else
1805         {
1806           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1807           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1808           emit_move_insn (mem, op0);
1809           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1810         }
1811     }
1812
1813   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1814      If that's wrong, the solution is to test for it and set TARGET to 0
1815      if needed.  */
1816
1817   /* Get the mode of the field to use for atomic access or subreg
1818      conversion.  */
1819   if (!SCALAR_INT_MODE_P (tmode)
1820       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1821     mode1 = mode;
1822   gcc_assert (mode1 != BLKmode);
1823
1824   /* Extraction of a full MODE1 value can be done with a subreg as long
1825      as the least significant bit of the value is the least significant
1826      bit of either OP0 or a word of OP0.  */
1827   if (!MEM_P (op0) && !reverse)
1828     {
1829       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1830       if (sub)
1831         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1832     }
1833
1834   /* Extraction of a full MODE1 value can be done with a load as long as
1835      the field is on a byte boundary and is sufficiently aligned.  */
1836   poly_uint64 bytenum;
1837   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1838     {
1839       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1840       if (reverse)
1841         op0 = flip_storage_order (mode1, op0);
1842       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1843     }
1844
1845   /* If we have a memory source and a non-constant bit offset, restrict
1846      the memory to the referenced bytes.  This is a worst-case fallback
1847      but is useful for things like vector booleans.  */
1848   if (MEM_P (op0) && !bitnum.is_constant ())
1849     {
1850       bytenum = bits_to_bytes_round_down (bitnum);
1851       bitnum = num_trailing_bits (bitnum);
1852       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1853       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1854       op0_mode = opt_scalar_int_mode ();
1855     }
1856
1857   /* It's possible we'll need to handle other cases here for
1858      polynomial bitnum and bitsize.  */
1859
1860   /* From here on we need to be looking at a fixed-size insertion.  */
1861   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1862                                      bitnum.to_constant (), unsignedp,
1863                                      target, mode, tmode, reverse, fallback_p);
1864 }
1865
1866 /* Subroutine of extract_bit_field_1, with the same arguments, except
1867    that BITSIZE and BITNUM are constant.  Handle cases specific to
1868    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1869    otherwise OP0 is a BLKmode MEM.  */
1870
1871 static rtx
1872 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1873                             unsigned HOST_WIDE_INT bitsize,
1874                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1875                             rtx target, machine_mode mode, machine_mode tmode,
1876                             bool reverse, bool fallback_p)
1877 {
1878   /* Handle fields bigger than a word.  */
1879
1880   if (bitsize > BITS_PER_WORD)
1881     {
1882       /* Here we transfer the words of the field
1883          in the order least significant first.
1884          This is because the most significant word is the one which may
1885          be less than full.  */
1886
1887       const bool backwards = WORDS_BIG_ENDIAN;
1888       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1889       unsigned int i;
1890       rtx_insn *last;
1891
1892       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1893         target = gen_reg_rtx (mode);
1894
1895       /* In case we're about to clobber a base register or something
1896          (see gcc.c-torture/execute/20040625-1.c).   */
1897       if (reg_mentioned_p (target, op0))
1898         target = gen_reg_rtx (mode);
1899
1900       /* Indicate for flow that the entire target reg is being set.  */
1901       emit_clobber (target);
1902
1903       /* The mode must be fixed-size, since extract_bit_field_1 handles
1904          extractions from variable-sized objects before calling this
1905          function.  */
1906       unsigned int target_size
1907         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1908       last = get_last_insn ();
1909       for (i = 0; i < nwords; i++)
1910         {
1911           /* If I is 0, use the low-order word in both field and target;
1912              if I is 1, use the next to lowest word; and so on.  */
1913           /* Word number in TARGET to use.  */
1914           unsigned int wordnum
1915             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1916           /* Offset from start of field in OP0.  */
1917           unsigned int bit_offset = (backwards ^ reverse
1918                                      ? MAX ((int) bitsize - ((int) i + 1)
1919                                             * BITS_PER_WORD,
1920                                             0)
1921                                      : (int) i * BITS_PER_WORD);
1922           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1923           rtx result_part
1924             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1925                                              bitsize - i * BITS_PER_WORD),
1926                                    bitnum + bit_offset, 1, target_part,
1927                                    mode, word_mode, reverse, fallback_p, NULL);
1928
1929           gcc_assert (target_part);
1930           if (!result_part)
1931             {
1932               delete_insns_since (last);
1933               return NULL;
1934             }
1935
1936           if (result_part != target_part)
1937             emit_move_insn (target_part, result_part);
1938         }
1939
1940       if (unsignedp)
1941         {
1942           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1943              need to be zero'd out.  */
1944           if (target_size > nwords * UNITS_PER_WORD)
1945             {
1946               unsigned int i, total_words;
1947
1948               total_words = target_size / UNITS_PER_WORD;
1949               for (i = nwords; i < total_words; i++)
1950                 emit_move_insn
1951                   (operand_subword (target,
1952                                     backwards ? total_words - i - 1 : i,
1953                                     1, VOIDmode),
1954                    const0_rtx);
1955             }
1956           return target;
1957         }
1958
1959       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1960       target = expand_shift (LSHIFT_EXPR, mode, target,
1961                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1962       return expand_shift (RSHIFT_EXPR, mode, target,
1963                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1964     }
1965
1966   /* If OP0 is a multi-word register, narrow it to the affected word.
1967      If the region spans two words, defer to extract_split_bit_field.  */
1968   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1969     {
1970       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1971         {
1972           if (!fallback_p)
1973             return NULL_RTX;
1974           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1975                                             unsignedp, reverse);
1976           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1977         }
1978       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1979                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1980       op0_mode = word_mode;
1981       bitnum %= BITS_PER_WORD;
1982     }
1983
1984   /* From here on we know the desired field is smaller than a word.
1985      If OP0 is a register, it too fits within a word.  */
1986   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1987   extraction_insn extv;
1988   if (!MEM_P (op0)
1989       && !reverse
1990       /* ??? We could limit the structure size to the part of OP0 that
1991          contains the field, with appropriate checks for endianness
1992          and TARGET_TRULY_NOOP_TRUNCATION.  */
1993       && get_best_reg_extraction_insn (&extv, pattern,
1994                                        GET_MODE_BITSIZE (op0_mode.require ()),
1995                                        tmode))
1996     {
1997       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1998                                                  bitsize, bitnum,
1999                                                  unsignedp, target, mode,
2000                                                  tmode);
2001       if (result)
2002         return result;
2003     }
2004
2005   /* If OP0 is a memory, try copying it to a register and seeing if a
2006      cheap register alternative is available.  */
2007   if (MEM_P (op0) & !reverse)
2008     {
2009       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2010                                         tmode))
2011         {
2012           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2013                                                      bitsize, bitnum,
2014                                                      unsignedp, target, mode,
2015                                                      tmode);
2016           if (result)
2017             return result;
2018         }
2019
2020       rtx_insn *last = get_last_insn ();
2021
2022       /* Try loading part of OP0 into a register and extracting the
2023          bitfield from that.  */
2024       unsigned HOST_WIDE_INT bitpos;
2025       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2026                                                0, 0, tmode, &bitpos);
2027       if (xop0)
2028         {
2029           xop0 = copy_to_reg (xop0);
2030           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2031                                             unsignedp, target,
2032                                             mode, tmode, reverse, false, NULL);
2033           if (result)
2034             return result;
2035           delete_insns_since (last);
2036         }
2037     }
2038
2039   if (!fallback_p)
2040     return NULL;
2041
2042   /* Find a correspondingly-sized integer field, so we can apply
2043      shifts and masks to it.  */
2044   scalar_int_mode int_mode;
2045   if (!int_mode_for_mode (tmode).exists (&int_mode))
2046     /* If this fails, we should probably push op0 out to memory and then
2047        do a load.  */
2048     int_mode = int_mode_for_mode (mode).require ();
2049
2050   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2051                                     bitnum, target, unsignedp, reverse);
2052
2053   /* Complex values must be reversed piecewise, so we need to undo the global
2054      reversal, convert to the complex mode and reverse again.  */
2055   if (reverse && COMPLEX_MODE_P (tmode))
2056     {
2057       target = flip_storage_order (int_mode, target);
2058       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2059       target = flip_storage_order (tmode, target);
2060     }
2061   else
2062     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2063
2064   return target;
2065 }
2066
2067 /* Generate code to extract a byte-field from STR_RTX
2068    containing BITSIZE bits, starting at BITNUM,
2069    and put it in TARGET if possible (if TARGET is nonzero).
2070    Regardless of TARGET, we return the rtx for where the value is placed.
2071
2072    STR_RTX is the structure containing the byte (a REG or MEM).
2073    UNSIGNEDP is nonzero if this is an unsigned bit field.
2074    MODE is the natural mode of the field value once extracted.
2075    TMODE is the mode the caller would like the value to have;
2076    but the value may be returned with type MODE instead.
2077
2078    If REVERSE is true, the extraction is to be done in reverse order.
2079
2080    If a TARGET is specified and we can store in it at no extra cost,
2081    we do so, and return TARGET.
2082    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2083    if they are equally easy.
2084
2085    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2086    then *ALT_RTL is set to TARGET (before legitimziation).  */
2087
2088 rtx
2089 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2090                    int unsignedp, rtx target, machine_mode mode,
2091                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2092 {
2093   machine_mode mode1;
2094
2095   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2096   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2097     mode1 = GET_MODE (str_rtx);
2098   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2099     mode1 = GET_MODE (target);
2100   else
2101     mode1 = tmode;
2102
2103   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2104   scalar_int_mode int_mode;
2105   if (bitsize.is_constant (&ibitsize)
2106       && bitnum.is_constant (&ibitnum)
2107       && is_a <scalar_int_mode> (mode1, &int_mode)
2108       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2109                                      int_mode, 0, 0))
2110     {
2111       /* Extraction of a full INT_MODE value can be done with a simple load.
2112          We know here that the field can be accessed with one single
2113          instruction.  For targets that support unaligned memory,
2114          an unaligned access may be necessary.  */
2115       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2116         {
2117           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2118                                                 ibitnum / BITS_PER_UNIT);
2119           if (reverse)
2120             result = flip_storage_order (int_mode, result);
2121           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2122           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2123         }
2124
2125       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2126                                       &ibitnum);
2127       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2128       str_rtx = copy_to_reg (str_rtx);
2129       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2130                                   target, mode, tmode, reverse, true, alt_rtl);
2131     }
2132
2133   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2134                               target, mode, tmode, reverse, true, alt_rtl);
2135 }
2136 \f
2137 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2138    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2139    otherwise OP0 is a BLKmode MEM.
2140
2141    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2142    If REVERSE is true, the extraction is to be done in reverse order.
2143
2144    If TARGET is nonzero, attempts to store the value there
2145    and return TARGET, but this is not guaranteed.
2146    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2147
2148 static rtx
2149 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2150                          opt_scalar_int_mode op0_mode,
2151                          unsigned HOST_WIDE_INT bitsize,
2152                          unsigned HOST_WIDE_INT bitnum, rtx target,
2153                          int unsignedp, bool reverse)
2154 {
2155   scalar_int_mode mode;
2156   if (MEM_P (op0))
2157     {
2158       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2159                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2160         /* The only way this should occur is if the field spans word
2161            boundaries.  */
2162         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2163                                         unsignedp, reverse);
2164
2165       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2166     }
2167   else
2168     mode = op0_mode.require ();
2169
2170   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2171                                     target, unsignedp, reverse);
2172 }
2173
2174 /* Helper function for extract_fixed_bit_field, extracts
2175    the bit field always using MODE, which is the mode of OP0.
2176    The other arguments are as for extract_fixed_bit_field.  */
2177
2178 static rtx
2179 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2180                            unsigned HOST_WIDE_INT bitsize,
2181                            unsigned HOST_WIDE_INT bitnum, rtx target,
2182                            int unsignedp, bool reverse)
2183 {
2184   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2185      for invalid input, such as extract equivalent of f5 from
2186      gcc.dg/pr48335-2.c.  */
2187
2188   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2189     /* BITNUM is the distance between our msb and that of OP0.
2190        Convert it to the distance from the lsb.  */
2191     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2192
2193   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2194      We have reduced the big-endian case to the little-endian case.  */
2195   if (reverse)
2196     op0 = flip_storage_order (mode, op0);
2197
2198   if (unsignedp)
2199     {
2200       if (bitnum)
2201         {
2202           /* If the field does not already start at the lsb,
2203              shift it so it does.  */
2204           /* Maybe propagate the target for the shift.  */
2205           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2206           if (tmode != mode)
2207             subtarget = 0;
2208           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2209         }
2210       /* Convert the value to the desired mode.  TMODE must also be a
2211          scalar integer for this conversion to make sense, since we
2212          shouldn't reinterpret the bits.  */
2213       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2214       if (mode != new_mode)
2215         op0 = convert_to_mode (new_mode, op0, 1);
2216
2217       /* Unless the msb of the field used to be the msb when we shifted,
2218          mask out the upper bits.  */
2219
2220       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2221         return expand_binop (new_mode, and_optab, op0,
2222                              mask_rtx (new_mode, 0, bitsize, 0),
2223                              target, 1, OPTAB_LIB_WIDEN);
2224       return op0;
2225     }
2226
2227   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2228      then arithmetic-shift its lsb to the lsb of the word.  */
2229   op0 = force_reg (mode, op0);
2230
2231   /* Find the narrowest integer mode that contains the field.  */
2232
2233   opt_scalar_int_mode mode_iter;
2234   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2235     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2236       break;
2237
2238   mode = mode_iter.require ();
2239   op0 = convert_to_mode (mode, op0, 0);
2240
2241   if (mode != tmode)
2242     target = 0;
2243
2244   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2245     {
2246       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2247       /* Maybe propagate the target for the shift.  */
2248       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2249       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2250     }
2251
2252   return expand_shift (RSHIFT_EXPR, mode, op0,
2253                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2254 }
2255
2256 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2257    VALUE << BITPOS.  */
2258
2259 static rtx
2260 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2261               int bitpos)
2262 {
2263   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2264 }
2265 \f
2266 /* Extract a bit field that is split across two words
2267    and return an RTX for the result.
2268
2269    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2270    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2271    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2272    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2273    a BLKmode MEM.
2274
2275    If REVERSE is true, the extraction is to be done in reverse order.  */
2276
2277 static rtx
2278 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2279                          unsigned HOST_WIDE_INT bitsize,
2280                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2281                          bool reverse)
2282 {
2283   unsigned int unit;
2284   unsigned int bitsdone = 0;
2285   rtx result = NULL_RTX;
2286   int first = 1;
2287
2288   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2289      much at a time.  */
2290   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2291     unit = BITS_PER_WORD;
2292   else
2293     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2294
2295   while (bitsdone < bitsize)
2296     {
2297       unsigned HOST_WIDE_INT thissize;
2298       rtx part;
2299       unsigned HOST_WIDE_INT thispos;
2300       unsigned HOST_WIDE_INT offset;
2301
2302       offset = (bitpos + bitsdone) / unit;
2303       thispos = (bitpos + bitsdone) % unit;
2304
2305       /* THISSIZE must not overrun a word boundary.  Otherwise,
2306          extract_fixed_bit_field will call us again, and we will mutually
2307          recurse forever.  */
2308       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2309       thissize = MIN (thissize, unit - thispos);
2310
2311       /* If OP0 is a register, then handle OFFSET here.  */
2312       rtx op0_piece = op0;
2313       opt_scalar_int_mode op0_piece_mode = op0_mode;
2314       if (SUBREG_P (op0) || REG_P (op0))
2315         {
2316           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2317           op0_piece_mode = word_mode;
2318           offset = 0;
2319         }
2320
2321       /* Extract the parts in bit-counting order,
2322          whose meaning is determined by BYTES_PER_UNIT.
2323          OFFSET is in UNITs, and UNIT is in bits.  */
2324       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2325                                       thissize, offset * unit + thispos,
2326                                       0, 1, reverse);
2327       bitsdone += thissize;
2328
2329       /* Shift this part into place for the result.  */
2330       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2331         {
2332           if (bitsize != bitsdone)
2333             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2334                                  bitsize - bitsdone, 0, 1);
2335         }
2336       else
2337         {
2338           if (bitsdone != thissize)
2339             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2340                                  bitsdone - thissize, 0, 1);
2341         }
2342
2343       if (first)
2344         result = part;
2345       else
2346         /* Combine the parts with bitwise or.  This works
2347            because we extracted each part as an unsigned bit field.  */
2348         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2349                                OPTAB_LIB_WIDEN);
2350
2351       first = 0;
2352     }
2353
2354   /* Unsigned bit field: we are done.  */
2355   if (unsignedp)
2356     return result;
2357   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2358   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2359                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2360   return expand_shift (RSHIFT_EXPR, word_mode, result,
2361                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2362 }
2363 \f
2364 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2365    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2366    MODE, fill the upper bits with zeros.  Fail if the layout of either
2367    mode is unknown (as for CC modes) or if the extraction would involve
2368    unprofitable mode punning.  Return the value on success, otherwise
2369    return null.
2370
2371    This is different from gen_lowpart* in these respects:
2372
2373      - the returned value must always be considered an rvalue
2374
2375      - when MODE is wider than SRC_MODE, the extraction involves
2376        a zero extension
2377
2378      - when MODE is smaller than SRC_MODE, the extraction involves
2379        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2380
2381    In other words, this routine performs a computation, whereas the
2382    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2383    operations.  */
2384
2385 rtx
2386 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2387 {
2388   scalar_int_mode int_mode, src_int_mode;
2389
2390   if (mode == src_mode)
2391     return src;
2392
2393   if (CONSTANT_P (src))
2394     {
2395       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2396          fails, it will happily create (subreg (symbol_ref)) or similar
2397          invalid SUBREGs.  */
2398       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2399       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2400       if (ret)
2401         return ret;
2402
2403       if (GET_MODE (src) == VOIDmode
2404           || !validate_subreg (mode, src_mode, src, byte))
2405         return NULL_RTX;
2406
2407       src = force_reg (GET_MODE (src), src);
2408       return gen_rtx_SUBREG (mode, src, byte);
2409     }
2410
2411   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2412     return NULL_RTX;
2413
2414   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2415       && targetm.modes_tieable_p (mode, src_mode))
2416     {
2417       rtx x = gen_lowpart_common (mode, src);
2418       if (x)
2419         return x;
2420     }
2421
2422   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2423       || !int_mode_for_mode (mode).exists (&int_mode))
2424     return NULL_RTX;
2425
2426   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2427     return NULL_RTX;
2428   if (!targetm.modes_tieable_p (int_mode, mode))
2429     return NULL_RTX;
2430
2431   src = gen_lowpart (src_int_mode, src);
2432   if (!validate_subreg (int_mode, src_int_mode, src,
2433                         subreg_lowpart_offset (int_mode, src_int_mode)))
2434     return NULL_RTX;
2435
2436   src = convert_modes (int_mode, src_int_mode, src, true);
2437   src = gen_lowpart (mode, src);
2438   return src;
2439 }
2440 \f
2441 /* Add INC into TARGET.  */
2442
2443 void
2444 expand_inc (rtx target, rtx inc)
2445 {
2446   rtx value = expand_binop (GET_MODE (target), add_optab,
2447                             target, inc,
2448                             target, 0, OPTAB_LIB_WIDEN);
2449   if (value != target)
2450     emit_move_insn (target, value);
2451 }
2452
2453 /* Subtract DEC from TARGET.  */
2454
2455 void
2456 expand_dec (rtx target, rtx dec)
2457 {
2458   rtx value = expand_binop (GET_MODE (target), sub_optab,
2459                             target, dec,
2460                             target, 0, OPTAB_LIB_WIDEN);
2461   if (value != target)
2462     emit_move_insn (target, value);
2463 }
2464 \f
2465 /* Output a shift instruction for expression code CODE,
2466    with SHIFTED being the rtx for the value to shift,
2467    and AMOUNT the rtx for the amount to shift by.
2468    Store the result in the rtx TARGET, if that is convenient.
2469    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2470    Return the rtx for where the value is.
2471    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2472    in which case 0 is returned.  */
2473
2474 static rtx
2475 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2476                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2477 {
2478   rtx op1, temp = 0;
2479   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2480   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2481   optab lshift_optab = ashl_optab;
2482   optab rshift_arith_optab = ashr_optab;
2483   optab rshift_uns_optab = lshr_optab;
2484   optab lrotate_optab = rotl_optab;
2485   optab rrotate_optab = rotr_optab;
2486   machine_mode op1_mode;
2487   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2488   int attempt;
2489   bool speed = optimize_insn_for_speed_p ();
2490
2491   op1 = amount;
2492   op1_mode = GET_MODE (op1);
2493
2494   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2495      shift amount is a vector, use the vector/vector shift patterns.  */
2496   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2497     {
2498       lshift_optab = vashl_optab;
2499       rshift_arith_optab = vashr_optab;
2500       rshift_uns_optab = vlshr_optab;
2501       lrotate_optab = vrotl_optab;
2502       rrotate_optab = vrotr_optab;
2503     }
2504
2505   /* Previously detected shift-counts computed by NEGATE_EXPR
2506      and shifted in the other direction; but that does not work
2507      on all machines.  */
2508
2509   if (SHIFT_COUNT_TRUNCATED)
2510     {
2511       if (CONST_INT_P (op1)
2512           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2513               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2514         op1 = gen_int_shift_amount (mode,
2515                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2516                                     % GET_MODE_BITSIZE (scalar_mode));
2517       else if (GET_CODE (op1) == SUBREG
2518                && subreg_lowpart_p (op1)
2519                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2520                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2521         op1 = SUBREG_REG (op1);
2522     }
2523
2524   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2525      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2526      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2527      amount instead.  */
2528   if (rotate
2529       && CONST_INT_P (op1)
2530       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2531                    GET_MODE_BITSIZE (scalar_mode) - 1))
2532     {
2533       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2534                                          - INTVAL (op1)));
2535       left = !left;
2536       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2537     }
2538
2539   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2540      Note that this is not the case for bigger values.  For instance a rotation
2541      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2542      0x04030201 (bswapsi).  */
2543   if (rotate
2544       && CONST_INT_P (op1)
2545       && INTVAL (op1) == BITS_PER_UNIT
2546       && GET_MODE_SIZE (scalar_mode) == 2
2547       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2548     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2549
2550   if (op1 == const0_rtx)
2551     return shifted;
2552
2553   /* Check whether its cheaper to implement a left shift by a constant
2554      bit count by a sequence of additions.  */
2555   if (code == LSHIFT_EXPR
2556       && CONST_INT_P (op1)
2557       && INTVAL (op1) > 0
2558       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2559       && INTVAL (op1) < MAX_BITS_PER_WORD
2560       && (shift_cost (speed, mode, INTVAL (op1))
2561           > INTVAL (op1) * add_cost (speed, mode))
2562       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2563     {
2564       int i;
2565       for (i = 0; i < INTVAL (op1); i++)
2566         {
2567           temp = force_reg (mode, shifted);
2568           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2569                                   unsignedp, OPTAB_LIB_WIDEN);
2570         }
2571       return shifted;
2572     }
2573
2574   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2575     {
2576       enum optab_methods methods;
2577
2578       if (attempt == 0)
2579         methods = OPTAB_DIRECT;
2580       else if (attempt == 1)
2581         methods = OPTAB_WIDEN;
2582       else
2583         methods = OPTAB_LIB_WIDEN;
2584
2585       if (rotate)
2586         {
2587           /* Widening does not work for rotation.  */
2588           if (methods == OPTAB_WIDEN)
2589             continue;
2590           else if (methods == OPTAB_LIB_WIDEN)
2591             {
2592               /* If we have been unable to open-code this by a rotation,
2593                  do it as the IOR of two shifts.  I.e., to rotate A
2594                  by N bits, compute
2595                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2596                  where C is the bitsize of A.
2597
2598                  It is theoretically possible that the target machine might
2599                  not be able to perform either shift and hence we would
2600                  be making two libcalls rather than just the one for the
2601                  shift (similarly if IOR could not be done).  We will allow
2602                  this extremely unlikely lossage to avoid complicating the
2603                  code below.  */
2604
2605               rtx subtarget = target == shifted ? 0 : target;
2606               rtx new_amount, other_amount;
2607               rtx temp1;
2608
2609               new_amount = op1;
2610               if (op1 == const0_rtx)
2611                 return shifted;
2612               else if (CONST_INT_P (op1))
2613                 other_amount = gen_int_shift_amount
2614                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2615               else
2616                 {
2617                   other_amount
2618                     = simplify_gen_unary (NEG, GET_MODE (op1),
2619                                           op1, GET_MODE (op1));
2620                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2621                   other_amount
2622                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2623                                            gen_int_mode (mask, GET_MODE (op1)));
2624                 }
2625
2626               shifted = force_reg (mode, shifted);
2627
2628               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2629                                      mode, shifted, new_amount, 0, 1);
2630               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2631                                       mode, shifted, other_amount,
2632                                       subtarget, 1);
2633               return expand_binop (mode, ior_optab, temp, temp1, target,
2634                                    unsignedp, methods);
2635             }
2636
2637           temp = expand_binop (mode,
2638                                left ? lrotate_optab : rrotate_optab,
2639                                shifted, op1, target, unsignedp, methods);
2640         }
2641       else if (unsignedp)
2642         temp = expand_binop (mode,
2643                              left ? lshift_optab : rshift_uns_optab,
2644                              shifted, op1, target, unsignedp, methods);
2645
2646       /* Do arithmetic shifts.
2647          Also, if we are going to widen the operand, we can just as well
2648          use an arithmetic right-shift instead of a logical one.  */
2649       if (temp == 0 && ! rotate
2650           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2651         {
2652           enum optab_methods methods1 = methods;
2653
2654           /* If trying to widen a log shift to an arithmetic shift,
2655              don't accept an arithmetic shift of the same size.  */
2656           if (unsignedp)
2657             methods1 = OPTAB_MUST_WIDEN;
2658
2659           /* Arithmetic shift */
2660
2661           temp = expand_binop (mode,
2662                                left ? lshift_optab : rshift_arith_optab,
2663                                shifted, op1, target, unsignedp, methods1);
2664         }
2665
2666       /* We used to try extzv here for logical right shifts, but that was
2667          only useful for one machine, the VAX, and caused poor code
2668          generation there for lshrdi3, so the code was deleted and a
2669          define_expand for lshrsi3 was added to vax.md.  */
2670     }
2671
2672   gcc_assert (temp != NULL_RTX || may_fail);
2673   return temp;
2674 }
2675
2676 /* Output a shift instruction for expression code CODE,
2677    with SHIFTED being the rtx for the value to shift,
2678    and AMOUNT the amount to shift by.
2679    Store the result in the rtx TARGET, if that is convenient.
2680    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2681    Return the rtx for where the value is.  */
2682
2683 rtx
2684 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2685               poly_int64 amount, rtx target, int unsignedp)
2686 {
2687   return expand_shift_1 (code, mode, shifted,
2688                          gen_int_shift_amount (mode, amount),
2689                          target, unsignedp);
2690 }
2691
2692 /* Likewise, but return 0 if that cannot be done.  */
2693
2694 static rtx
2695 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2696                     int amount, rtx target, int unsignedp)
2697 {
2698   return expand_shift_1 (code, mode,
2699                          shifted, GEN_INT (amount), target, unsignedp, true);
2700 }
2701
2702 /* Output a shift instruction for expression code CODE,
2703    with SHIFTED being the rtx for the value to shift,
2704    and AMOUNT the tree for the amount to shift by.
2705    Store the result in the rtx TARGET, if that is convenient.
2706    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2707    Return the rtx for where the value is.  */
2708
2709 rtx
2710 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2711                        tree amount, rtx target, int unsignedp)
2712 {
2713   return expand_shift_1 (code, mode,
2714                          shifted, expand_normal (amount), target, unsignedp);
2715 }
2716
2717 \f
2718 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2719                         const struct mult_cost *, machine_mode mode);
2720 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2721                               const struct algorithm *, enum mult_variant);
2722 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2723 static rtx extract_high_half (scalar_int_mode, rtx);
2724 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2725 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2726                                        int, int);
2727 /* Compute and return the best algorithm for multiplying by T.
2728    The algorithm must cost less than cost_limit
2729    If retval.cost >= COST_LIMIT, no algorithm was found and all
2730    other field of the returned struct are undefined.
2731    MODE is the machine mode of the multiplication.  */
2732
2733 static void
2734 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2735             const struct mult_cost *cost_limit, machine_mode mode)
2736 {
2737   int m;
2738   struct algorithm *alg_in, *best_alg;
2739   struct mult_cost best_cost;
2740   struct mult_cost new_limit;
2741   int op_cost, op_latency;
2742   unsigned HOST_WIDE_INT orig_t = t;
2743   unsigned HOST_WIDE_INT q;
2744   int maxm, hash_index;
2745   bool cache_hit = false;
2746   enum alg_code cache_alg = alg_zero;
2747   bool speed = optimize_insn_for_speed_p ();
2748   scalar_int_mode imode;
2749   struct alg_hash_entry *entry_ptr;
2750
2751   /* Indicate that no algorithm is yet found.  If no algorithm
2752      is found, this value will be returned and indicate failure.  */
2753   alg_out->cost.cost = cost_limit->cost + 1;
2754   alg_out->cost.latency = cost_limit->latency + 1;
2755
2756   if (cost_limit->cost < 0
2757       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2758     return;
2759
2760   /* Be prepared for vector modes.  */
2761   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2762
2763   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2764
2765   /* Restrict the bits of "t" to the multiplication's mode.  */
2766   t &= GET_MODE_MASK (imode);
2767
2768   /* t == 1 can be done in zero cost.  */
2769   if (t == 1)
2770     {
2771       alg_out->ops = 1;
2772       alg_out->cost.cost = 0;
2773       alg_out->cost.latency = 0;
2774       alg_out->op[0] = alg_m;
2775       return;
2776     }
2777
2778   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2779      fail now.  */
2780   if (t == 0)
2781     {
2782       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2783         return;
2784       else
2785         {
2786           alg_out->ops = 1;
2787           alg_out->cost.cost = zero_cost (speed);
2788           alg_out->cost.latency = zero_cost (speed);
2789           alg_out->op[0] = alg_zero;
2790           return;
2791         }
2792     }
2793
2794   /* We'll be needing a couple extra algorithm structures now.  */
2795
2796   alg_in = XALLOCA (struct algorithm);
2797   best_alg = XALLOCA (struct algorithm);
2798   best_cost = *cost_limit;
2799
2800   /* Compute the hash index.  */
2801   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2802
2803   /* See if we already know what to do for T.  */
2804   entry_ptr = alg_hash_entry_ptr (hash_index);
2805   if (entry_ptr->t == t
2806       && entry_ptr->mode == mode
2807       && entry_ptr->speed == speed
2808       && entry_ptr->alg != alg_unknown)
2809     {
2810       cache_alg = entry_ptr->alg;
2811
2812       if (cache_alg == alg_impossible)
2813         {
2814           /* The cache tells us that it's impossible to synthesize
2815              multiplication by T within entry_ptr->cost.  */
2816           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2817             /* COST_LIMIT is at least as restrictive as the one
2818                recorded in the hash table, in which case we have no
2819                hope of synthesizing a multiplication.  Just
2820                return.  */
2821             return;
2822
2823           /* If we get here, COST_LIMIT is less restrictive than the
2824              one recorded in the hash table, so we may be able to
2825              synthesize a multiplication.  Proceed as if we didn't
2826              have the cache entry.  */
2827         }
2828       else
2829         {
2830           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2831             /* The cached algorithm shows that this multiplication
2832                requires more cost than COST_LIMIT.  Just return.  This
2833                way, we don't clobber this cache entry with
2834                alg_impossible but retain useful information.  */
2835             return;
2836
2837           cache_hit = true;
2838
2839           switch (cache_alg)
2840             {
2841             case alg_shift:
2842               goto do_alg_shift;
2843
2844             case alg_add_t_m2:
2845             case alg_sub_t_m2:
2846               goto do_alg_addsub_t_m2;
2847
2848             case alg_add_factor:
2849             case alg_sub_factor:
2850               goto do_alg_addsub_factor;
2851
2852             case alg_add_t2_m:
2853               goto do_alg_add_t2_m;
2854
2855             case alg_sub_t2_m:
2856               goto do_alg_sub_t2_m;
2857
2858             default:
2859               gcc_unreachable ();
2860             }
2861         }
2862     }
2863
2864   /* If we have a group of zero bits at the low-order part of T, try
2865      multiplying by the remaining bits and then doing a shift.  */
2866
2867   if ((t & 1) == 0)
2868     {
2869     do_alg_shift:
2870       m = ctz_or_zero (t); /* m = number of low zero bits */
2871       if (m < maxm)
2872         {
2873           q = t >> m;
2874           /* The function expand_shift will choose between a shift and
2875              a sequence of additions, so the observed cost is given as
2876              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2877           op_cost = m * add_cost (speed, mode);
2878           if (shift_cost (speed, mode, m) < op_cost)
2879             op_cost = shift_cost (speed, mode, m);
2880           new_limit.cost = best_cost.cost - op_cost;
2881           new_limit.latency = best_cost.latency - op_cost;
2882           synth_mult (alg_in, q, &new_limit, mode);
2883
2884           alg_in->cost.cost += op_cost;
2885           alg_in->cost.latency += op_cost;
2886           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2887             {
2888               best_cost = alg_in->cost;
2889               std::swap (alg_in, best_alg);
2890               best_alg->log[best_alg->ops] = m;
2891               best_alg->op[best_alg->ops] = alg_shift;
2892             }
2893
2894           /* See if treating ORIG_T as a signed number yields a better
2895              sequence.  Try this sequence only for a negative ORIG_T
2896              as it would be useless for a non-negative ORIG_T.  */
2897           if ((HOST_WIDE_INT) orig_t < 0)
2898             {
2899               /* Shift ORIG_T as follows because a right shift of a
2900                  negative-valued signed type is implementation
2901                  defined.  */
2902               q = ~(~orig_t >> m);
2903               /* The function expand_shift will choose between a shift
2904                  and a sequence of additions, so the observed cost is
2905                  given as MIN (m * add_cost(speed, mode),
2906                  shift_cost(speed, mode, m)).  */
2907               op_cost = m * add_cost (speed, mode);
2908               if (shift_cost (speed, mode, m) < op_cost)
2909                 op_cost = shift_cost (speed, mode, m);
2910               new_limit.cost = best_cost.cost - op_cost;
2911               new_limit.latency = best_cost.latency - op_cost;
2912               synth_mult (alg_in, q, &new_limit, mode);
2913
2914               alg_in->cost.cost += op_cost;
2915               alg_in->cost.latency += op_cost;
2916               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2917                 {
2918                   best_cost = alg_in->cost;
2919                   std::swap (alg_in, best_alg);
2920                   best_alg->log[best_alg->ops] = m;
2921                   best_alg->op[best_alg->ops] = alg_shift;
2922                 }
2923             }
2924         }
2925       if (cache_hit)
2926         goto done;
2927     }
2928
2929   /* If we have an odd number, add or subtract one.  */
2930   if ((t & 1) != 0)
2931     {
2932       unsigned HOST_WIDE_INT w;
2933
2934     do_alg_addsub_t_m2:
2935       for (w = 1; (w & t) != 0; w <<= 1)
2936         ;
2937       /* If T was -1, then W will be zero after the loop.  This is another
2938          case where T ends with ...111.  Handling this with (T + 1) and
2939          subtract 1 produces slightly better code and results in algorithm
2940          selection much faster than treating it like the ...0111 case
2941          below.  */
2942       if (w == 0
2943           || (w > 2
2944               /* Reject the case where t is 3.
2945                  Thus we prefer addition in that case.  */
2946               && t != 3))
2947         {
2948           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2949
2950           op_cost = add_cost (speed, mode);
2951           new_limit.cost = best_cost.cost - op_cost;
2952           new_limit.latency = best_cost.latency - op_cost;
2953           synth_mult (alg_in, t + 1, &new_limit, mode);
2954
2955           alg_in->cost.cost += op_cost;
2956           alg_in->cost.latency += op_cost;
2957           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2958             {
2959               best_cost = alg_in->cost;
2960               std::swap (alg_in, best_alg);
2961               best_alg->log[best_alg->ops] = 0;
2962               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2963             }
2964         }
2965       else
2966         {
2967           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2968
2969           op_cost = add_cost (speed, mode);
2970           new_limit.cost = best_cost.cost - op_cost;
2971           new_limit.latency = best_cost.latency - op_cost;
2972           synth_mult (alg_in, t - 1, &new_limit, mode);
2973
2974           alg_in->cost.cost += op_cost;
2975           alg_in->cost.latency += op_cost;
2976           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2977             {
2978               best_cost = alg_in->cost;
2979               std::swap (alg_in, best_alg);
2980               best_alg->log[best_alg->ops] = 0;
2981               best_alg->op[best_alg->ops] = alg_add_t_m2;
2982             }
2983         }
2984
2985       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2986          quickly with a - a * n for some appropriate constant n.  */
2987       m = exact_log2 (-orig_t + 1);
2988       if (m >= 0 && m < maxm)
2989         {
2990           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2991           /* If the target has a cheap shift-and-subtract insn use
2992              that in preference to a shift insn followed by a sub insn.
2993              Assume that the shift-and-sub is "atomic" with a latency
2994              equal to it's cost, otherwise assume that on superscalar
2995              hardware the shift may be executed concurrently with the
2996              earlier steps in the algorithm.  */
2997           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2998             {
2999               op_cost = shiftsub1_cost (speed, mode, m);
3000               op_latency = op_cost;
3001             }
3002           else
3003             op_latency = add_cost (speed, mode);
3004
3005           new_limit.cost = best_cost.cost - op_cost;
3006           new_limit.latency = best_cost.latency - op_latency;
3007           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3008                       &new_limit, mode);
3009
3010           alg_in->cost.cost += op_cost;
3011           alg_in->cost.latency += op_latency;
3012           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3013             {
3014               best_cost = alg_in->cost;
3015               std::swap (alg_in, best_alg);
3016               best_alg->log[best_alg->ops] = m;
3017               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3018             }
3019         }
3020
3021       if (cache_hit)
3022         goto done;
3023     }
3024
3025   /* Look for factors of t of the form
3026      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3027      If we find such a factor, we can multiply by t using an algorithm that
3028      multiplies by q, shift the result by m and add/subtract it to itself.
3029
3030      We search for large factors first and loop down, even if large factors
3031      are less probable than small; if we find a large factor we will find a
3032      good sequence quickly, and therefore be able to prune (by decreasing
3033      COST_LIMIT) the search.  */
3034
3035  do_alg_addsub_factor:
3036   for (m = floor_log2 (t - 1); m >= 2; m--)
3037     {
3038       unsigned HOST_WIDE_INT d;
3039
3040       d = (HOST_WIDE_INT_1U << m) + 1;
3041       if (t % d == 0 && t > d && m < maxm
3042           && (!cache_hit || cache_alg == alg_add_factor))
3043         {
3044           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3045           if (shiftadd_cost (speed, mode, m) <= op_cost)
3046             op_cost = shiftadd_cost (speed, mode, m);
3047
3048           op_latency = op_cost;
3049
3050
3051           new_limit.cost = best_cost.cost - op_cost;
3052           new_limit.latency = best_cost.latency - op_latency;
3053           synth_mult (alg_in, t / d, &new_limit, mode);
3054
3055           alg_in->cost.cost += op_cost;
3056           alg_in->cost.latency += op_latency;
3057           if (alg_in->cost.latency < op_cost)
3058             alg_in->cost.latency = op_cost;
3059           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3060             {
3061               best_cost = alg_in->cost;
3062               std::swap (alg_in, best_alg);
3063               best_alg->log[best_alg->ops] = m;
3064               best_alg->op[best_alg->ops] = alg_add_factor;
3065             }
3066           /* Other factors will have been taken care of in the recursion.  */
3067           break;
3068         }
3069
3070       d = (HOST_WIDE_INT_1U << m) - 1;
3071       if (t % d == 0 && t > d && m < maxm
3072           && (!cache_hit || cache_alg == alg_sub_factor))
3073         {
3074           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3075           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3076             op_cost = shiftsub0_cost (speed, mode, m);
3077
3078           op_latency = op_cost;
3079
3080           new_limit.cost = best_cost.cost - op_cost;
3081           new_limit.latency = best_cost.latency - op_latency;
3082           synth_mult (alg_in, t / d, &new_limit, mode);
3083
3084           alg_in->cost.cost += op_cost;
3085           alg_in->cost.latency += op_latency;
3086           if (alg_in->cost.latency < op_cost)
3087             alg_in->cost.latency = op_cost;
3088           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3089             {
3090               best_cost = alg_in->cost;
3091               std::swap (alg_in, best_alg);
3092               best_alg->log[best_alg->ops] = m;
3093               best_alg->op[best_alg->ops] = alg_sub_factor;
3094             }
3095           break;
3096         }
3097     }
3098   if (cache_hit)
3099     goto done;
3100
3101   /* Try shift-and-add (load effective address) instructions,
3102      i.e. do a*3, a*5, a*9.  */
3103   if ((t & 1) != 0)
3104     {
3105     do_alg_add_t2_m:
3106       q = t - 1;
3107       m = ctz_hwi (q);
3108       if (q && m < maxm)
3109         {
3110           op_cost = shiftadd_cost (speed, mode, m);
3111           new_limit.cost = best_cost.cost - op_cost;
3112           new_limit.latency = best_cost.latency - op_cost;
3113           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3114
3115           alg_in->cost.cost += op_cost;
3116           alg_in->cost.latency += op_cost;
3117           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3118             {
3119               best_cost = alg_in->cost;
3120               std::swap (alg_in, best_alg);
3121               best_alg->log[best_alg->ops] = m;
3122               best_alg->op[best_alg->ops] = alg_add_t2_m;
3123             }
3124         }
3125       if (cache_hit)
3126         goto done;
3127
3128     do_alg_sub_t2_m:
3129       q = t + 1;
3130       m = ctz_hwi (q);
3131       if (q && m < maxm)
3132         {
3133           op_cost = shiftsub0_cost (speed, mode, m);
3134           new_limit.cost = best_cost.cost - op_cost;
3135           new_limit.latency = best_cost.latency - op_cost;
3136           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3137
3138           alg_in->cost.cost += op_cost;
3139           alg_in->cost.latency += op_cost;
3140           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3141             {
3142               best_cost = alg_in->cost;
3143               std::swap (alg_in, best_alg);
3144               best_alg->log[best_alg->ops] = m;
3145               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3146             }
3147         }
3148       if (cache_hit)
3149         goto done;
3150     }
3151
3152  done:
3153   /* If best_cost has not decreased, we have not found any algorithm.  */
3154   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3155     {
3156       /* We failed to find an algorithm.  Record alg_impossible for
3157          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3158          we are asked to find an algorithm for T within the same or
3159          lower COST_LIMIT, we can immediately return to the
3160          caller.  */
3161       entry_ptr->t = t;
3162       entry_ptr->mode = mode;
3163       entry_ptr->speed = speed;
3164       entry_ptr->alg = alg_impossible;
3165       entry_ptr->cost = *cost_limit;
3166       return;
3167     }
3168
3169   /* Cache the result.  */
3170   if (!cache_hit)
3171     {
3172       entry_ptr->t = t;
3173       entry_ptr->mode = mode;
3174       entry_ptr->speed = speed;
3175       entry_ptr->alg = best_alg->op[best_alg->ops];
3176       entry_ptr->cost.cost = best_cost.cost;
3177       entry_ptr->cost.latency = best_cost.latency;
3178     }
3179
3180   /* If we are getting a too long sequence for `struct algorithm'
3181      to record, make this search fail.  */
3182   if (best_alg->ops == MAX_BITS_PER_WORD)
3183     return;
3184
3185   /* Copy the algorithm from temporary space to the space at alg_out.
3186      We avoid using structure assignment because the majority of
3187      best_alg is normally undefined, and this is a critical function.  */
3188   alg_out->ops = best_alg->ops + 1;
3189   alg_out->cost = best_cost;
3190   memcpy (alg_out->op, best_alg->op,
3191           alg_out->ops * sizeof *alg_out->op);
3192   memcpy (alg_out->log, best_alg->log,
3193           alg_out->ops * sizeof *alg_out->log);
3194 }
3195 \f
3196 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3197    Try three variations:
3198
3199        - a shift/add sequence based on VAL itself
3200        - a shift/add sequence based on -VAL, followed by a negation
3201        - a shift/add sequence based on VAL - 1, followed by an addition.
3202
3203    Return true if the cheapest of these cost less than MULT_COST,
3204    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3205
3206 bool
3207 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3208                      struct algorithm *alg, enum mult_variant *variant,
3209                      int mult_cost)
3210 {
3211   struct algorithm alg2;
3212   struct mult_cost limit;
3213   int op_cost;
3214   bool speed = optimize_insn_for_speed_p ();
3215
3216   /* Fail quickly for impossible bounds.  */
3217   if (mult_cost < 0)
3218     return false;
3219
3220   /* Ensure that mult_cost provides a reasonable upper bound.
3221      Any constant multiplication can be performed with less
3222      than 2 * bits additions.  */
3223   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3224   if (mult_cost > op_cost)
3225     mult_cost = op_cost;
3226
3227   *variant = basic_variant;
3228   limit.cost = mult_cost;
3229   limit.latency = mult_cost;
3230   synth_mult (alg, val, &limit, mode);
3231
3232   /* This works only if the inverted value actually fits in an
3233      `unsigned int' */
3234   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3235     {
3236       op_cost = neg_cost (speed, mode);
3237       if (MULT_COST_LESS (&alg->cost, mult_cost))
3238         {
3239           limit.cost = alg->cost.cost - op_cost;
3240           limit.latency = alg->cost.latency - op_cost;
3241         }
3242       else
3243         {
3244           limit.cost = mult_cost - op_cost;
3245           limit.latency = mult_cost - op_cost;
3246         }
3247
3248       synth_mult (&alg2, -val, &limit, mode);
3249       alg2.cost.cost += op_cost;
3250       alg2.cost.latency += op_cost;
3251       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3252         *alg = alg2, *variant = negate_variant;
3253     }
3254
3255   /* This proves very useful for division-by-constant.  */
3256   op_cost = add_cost (speed, mode);
3257   if (MULT_COST_LESS (&alg->cost, mult_cost))
3258     {
3259       limit.cost = alg->cost.cost - op_cost;
3260       limit.latency = alg->cost.latency - op_cost;
3261     }
3262   else
3263     {
3264       limit.cost = mult_cost - op_cost;
3265       limit.latency = mult_cost - op_cost;
3266     }
3267
3268   synth_mult (&alg2, val - 1, &limit, mode);
3269   alg2.cost.cost += op_cost;
3270   alg2.cost.latency += op_cost;
3271   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3272     *alg = alg2, *variant = add_variant;
3273
3274   return MULT_COST_LESS (&alg->cost, mult_cost);
3275 }
3276
3277 /* A subroutine of expand_mult, used for constant multiplications.
3278    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3279    convenient.  Use the shift/add sequence described by ALG and apply
3280    the final fixup specified by VARIANT.  */
3281
3282 static rtx
3283 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3284                    rtx target, const struct algorithm *alg,
3285                    enum mult_variant variant)
3286 {
3287   unsigned HOST_WIDE_INT val_so_far;
3288   rtx_insn *insn;
3289   rtx accum, tem;
3290   int opno;
3291   machine_mode nmode;
3292
3293   /* Avoid referencing memory over and over and invalid sharing
3294      on SUBREGs.  */
3295   op0 = force_reg (mode, op0);
3296
3297   /* ACCUM starts out either as OP0 or as a zero, depending on
3298      the first operation.  */
3299
3300   if (alg->op[0] == alg_zero)
3301     {
3302       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3303       val_so_far = 0;
3304     }
3305   else if (alg->op[0] == alg_m)
3306     {
3307       accum = copy_to_mode_reg (mode, op0);
3308       val_so_far = 1;
3309     }
3310   else
3311     gcc_unreachable ();
3312
3313   for (opno = 1; opno < alg->ops; opno++)
3314     {
3315       int log = alg->log[opno];
3316       rtx shift_subtarget = optimize ? 0 : accum;
3317       rtx add_target
3318         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3319            && !optimize)
3320           ? target : 0;
3321       rtx accum_target = optimize ? 0 : accum;
3322       rtx accum_inner;
3323
3324       switch (alg->op[opno])
3325         {
3326         case alg_shift:
3327           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3328           /* REG_EQUAL note will be attached to the following insn.  */
3329           emit_move_insn (accum, tem);
3330           val_so_far <<= log;
3331           break;
3332
3333         case alg_add_t_m2:
3334           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3335           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3336                                  add_target ? add_target : accum_target);
3337           val_so_far += HOST_WIDE_INT_1U << log;
3338           break;
3339
3340         case alg_sub_t_m2:
3341           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3342           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3343                                  add_target ? add_target : accum_target);
3344           val_so_far -= HOST_WIDE_INT_1U << log;
3345           break;
3346
3347         case alg_add_t2_m:
3348           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3349                                 log, shift_subtarget, 0);
3350           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3351                                  add_target ? add_target : accum_target);
3352           val_so_far = (val_so_far << log) + 1;
3353           break;
3354
3355         case alg_sub_t2_m:
3356           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3357                                 log, shift_subtarget, 0);
3358           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3359                                  add_target ? add_target : accum_target);
3360           val_so_far = (val_so_far << log) - 1;
3361           break;
3362
3363         case alg_add_factor:
3364           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3365           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3366                                  add_target ? add_target : accum_target);
3367           val_so_far += val_so_far << log;
3368           break;
3369
3370         case alg_sub_factor:
3371           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3372           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3373                                  (add_target
3374                                   ? add_target : (optimize ? 0 : tem)));
3375           val_so_far = (val_so_far << log) - val_so_far;
3376           break;
3377
3378         default:
3379           gcc_unreachable ();
3380         }
3381
3382       if (SCALAR_INT_MODE_P (mode))
3383         {
3384           /* Write a REG_EQUAL note on the last insn so that we can cse
3385              multiplication sequences.  Note that if ACCUM is a SUBREG,
3386              we've set the inner register and must properly indicate that.  */
3387           tem = op0, nmode = mode;
3388           accum_inner = accum;
3389           if (GET_CODE (accum) == SUBREG)
3390             {
3391               accum_inner = SUBREG_REG (accum);
3392               nmode = GET_MODE (accum_inner);
3393               tem = gen_lowpart (nmode, op0);
3394             }
3395
3396           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3397              In that case, only the low bits of accum would be guaranteed to
3398              be equal to the content of the REG_EQUAL note, the upper bits
3399              can be anything.  */
3400           if (!paradoxical_subreg_p (tem))
3401             {
3402               insn = get_last_insn ();
3403               wide_int wval_so_far
3404                 = wi::uhwi (val_so_far,
3405                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3406               rtx c = immed_wide_int_const (wval_so_far, nmode);
3407               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3408                                 accum_inner);
3409             }
3410         }
3411     }
3412
3413   if (variant == negate_variant)
3414     {
3415       val_so_far = -val_so_far;
3416       accum = expand_unop (mode, neg_optab, accum, target, 0);
3417     }
3418   else if (variant == add_variant)
3419     {
3420       val_so_far = val_so_far + 1;
3421       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3422     }
3423
3424   /* Compare only the bits of val and val_so_far that are significant
3425      in the result mode, to avoid sign-/zero-extension confusion.  */
3426   nmode = GET_MODE_INNER (mode);
3427   val &= GET_MODE_MASK (nmode);
3428   val_so_far &= GET_MODE_MASK (nmode);
3429   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3430
3431   return accum;
3432 }
3433
3434 /* Perform a multiplication and return an rtx for the result.
3435    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3436    TARGET is a suggestion for where to store the result (an rtx).
3437
3438    We check specially for a constant integer as OP1.
3439    If you want this check for OP0 as well, then before calling
3440    you should swap the two operands if OP0 would be constant.  */
3441
3442 rtx
3443 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3444              int unsignedp, bool no_libcall)
3445 {
3446   enum mult_variant variant;
3447   struct algorithm algorithm;
3448   rtx scalar_op1;
3449   int max_cost;
3450   bool speed = optimize_insn_for_speed_p ();
3451   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3452
3453   if (CONSTANT_P (op0))
3454     std::swap (op0, op1);
3455
3456   /* For vectors, there are several simplifications that can be made if
3457      all elements of the vector constant are identical.  */
3458   scalar_op1 = unwrap_const_vec_duplicate (op1);
3459
3460   if (INTEGRAL_MODE_P (mode))
3461     {
3462       rtx fake_reg;
3463       HOST_WIDE_INT coeff;
3464       bool is_neg;
3465       int mode_bitsize;
3466
3467       if (op1 == CONST0_RTX (mode))
3468         return op1;
3469       if (op1 == CONST1_RTX (mode))
3470         return op0;
3471       if (op1 == CONSTM1_RTX (mode))
3472         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3473                             op0, target, 0);
3474
3475       if (do_trapv)
3476         goto skip_synth;
3477
3478       /* If mode is integer vector mode, check if the backend supports
3479          vector lshift (by scalar or vector) at all.  If not, we can't use
3480          synthetized multiply.  */
3481       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3482           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3483           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3484         goto skip_synth;
3485
3486       /* These are the operations that are potentially turned into
3487          a sequence of shifts and additions.  */
3488       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3489
3490       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3491          less than or equal in size to `unsigned int' this doesn't matter.
3492          If the mode is larger than `unsigned int', then synth_mult works
3493          only if the constant value exactly fits in an `unsigned int' without
3494          any truncation.  This means that multiplying by negative values does
3495          not work; results are off by 2^32 on a 32 bit machine.  */
3496       if (CONST_INT_P (scalar_op1))
3497         {
3498           coeff = INTVAL (scalar_op1);
3499           is_neg = coeff < 0;
3500         }
3501 #if TARGET_SUPPORTS_WIDE_INT
3502       else if (CONST_WIDE_INT_P (scalar_op1))
3503 #else
3504       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3505 #endif
3506         {
3507           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3508           /* Perfect power of 2 (other than 1, which is handled above).  */
3509           if (shift > 0)
3510             return expand_shift (LSHIFT_EXPR, mode, op0,
3511                                  shift, target, unsignedp);
3512           else
3513             goto skip_synth;
3514         }
3515       else
3516         goto skip_synth;
3517
3518       /* We used to test optimize here, on the grounds that it's better to
3519          produce a smaller program when -O is not used.  But this causes
3520          such a terrible slowdown sometimes that it seems better to always
3521          use synth_mult.  */
3522
3523       /* Special case powers of two.  */
3524       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3525           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3526         return expand_shift (LSHIFT_EXPR, mode, op0,
3527                              floor_log2 (coeff), target, unsignedp);
3528
3529       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3530
3531       /* Attempt to handle multiplication of DImode values by negative
3532          coefficients, by performing the multiplication by a positive
3533          multiplier and then inverting the result.  */
3534       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3535         {
3536           /* Its safe to use -coeff even for INT_MIN, as the
3537              result is interpreted as an unsigned coefficient.
3538              Exclude cost of op0 from max_cost to match the cost
3539              calculation of the synth_mult.  */
3540           coeff = -(unsigned HOST_WIDE_INT) coeff;
3541           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3542                                     mode, speed)
3543                       - neg_cost (speed, mode));
3544           if (max_cost <= 0)
3545             goto skip_synth;
3546
3547           /* Special case powers of two.  */
3548           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3549             {
3550               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3551                                        floor_log2 (coeff), target, unsignedp);
3552               return expand_unop (mode, neg_optab, temp, target, 0);
3553             }
3554
3555           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3556                                    max_cost))
3557             {
3558               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3559                                             &algorithm, variant);
3560               return expand_unop (mode, neg_optab, temp, target, 0);
3561             }
3562           goto skip_synth;
3563         }
3564
3565       /* Exclude cost of op0 from max_cost to match the cost
3566          calculation of the synth_mult.  */
3567       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3568       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3569         return expand_mult_const (mode, op0, coeff, target,
3570                                   &algorithm, variant);
3571     }
3572  skip_synth:
3573
3574   /* Expand x*2.0 as x+x.  */
3575   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3576       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3577     {
3578       op0 = force_reg (GET_MODE (op0), op0);
3579       return expand_binop (mode, add_optab, op0, op0,
3580                            target, unsignedp,
3581                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3582     }
3583
3584   /* This used to use umul_optab if unsigned, but for non-widening multiply
3585      there is no difference between signed and unsigned.  */
3586   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3587                       op0, op1, target, unsignedp,
3588                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3589   gcc_assert (op0 || no_libcall);
3590   return op0;
3591 }
3592
3593 /* Return a cost estimate for multiplying a register by the given
3594    COEFFicient in the given MODE and SPEED.  */
3595
3596 int
3597 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3598 {
3599   int max_cost;
3600   struct algorithm algorithm;
3601   enum mult_variant variant;
3602
3603   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3604   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3605                            mode, speed);
3606   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3607     return algorithm.cost.cost;
3608   else
3609     return max_cost;
3610 }
3611
3612 /* Perform a widening multiplication and return an rtx for the result.
3613    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3614    TARGET is a suggestion for where to store the result (an rtx).
3615    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3616    or smul_widen_optab.
3617
3618    We check specially for a constant integer as OP1, comparing the
3619    cost of a widening multiply against the cost of a sequence of shifts
3620    and adds.  */
3621
3622 rtx
3623 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3624                       int unsignedp, optab this_optab)
3625 {
3626   bool speed = optimize_insn_for_speed_p ();
3627   rtx cop1;
3628
3629   if (CONST_INT_P (op1)
3630       && GET_MODE (op0) != VOIDmode
3631       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3632                                 this_optab == umul_widen_optab))
3633       && CONST_INT_P (cop1)
3634       && (INTVAL (cop1) >= 0
3635           || HWI_COMPUTABLE_MODE_P (mode)))
3636     {
3637       HOST_WIDE_INT coeff = INTVAL (cop1);
3638       int max_cost;
3639       enum mult_variant variant;
3640       struct algorithm algorithm;
3641
3642       if (coeff == 0)
3643         return CONST0_RTX (mode);
3644
3645       /* Special case powers of two.  */
3646       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3647         {
3648           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3649           return expand_shift (LSHIFT_EXPR, mode, op0,
3650                                floor_log2 (coeff), target, unsignedp);
3651         }
3652
3653       /* Exclude cost of op0 from max_cost to match the cost
3654          calculation of the synth_mult.  */
3655       max_cost = mul_widen_cost (speed, mode);
3656       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3657                                max_cost))
3658         {
3659           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3660           return expand_mult_const (mode, op0, coeff, target,
3661                                     &algorithm, variant);
3662         }
3663     }
3664   return expand_binop (mode, this_optab, op0, op1, target,
3665                        unsignedp, OPTAB_LIB_WIDEN);
3666 }
3667 \f
3668 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3669    replace division by D, and put the least significant N bits of the result
3670    in *MULTIPLIER_PTR and return the most significant bit.
3671
3672    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3673    needed precision is in PRECISION (should be <= N).
3674
3675    PRECISION should be as small as possible so this function can choose
3676    multiplier more freely.
3677
3678    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3679    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3680
3681    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3682    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3683
3684 unsigned HOST_WIDE_INT
3685 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3686                    unsigned HOST_WIDE_INT *multiplier_ptr,
3687                    int *post_shift_ptr, int *lgup_ptr)
3688 {
3689   int lgup, post_shift;
3690   int pow, pow2;
3691
3692   /* lgup = ceil(log2(divisor)); */
3693   lgup = ceil_log2 (d);
3694
3695   gcc_assert (lgup <= n);
3696
3697   pow = n + lgup;
3698   pow2 = n + lgup - precision;
3699
3700   /* mlow = 2^(N + lgup)/d */
3701   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3702   wide_int mlow = wi::udiv_trunc (val, d);
3703
3704   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3705   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3706   wide_int mhigh = wi::udiv_trunc (val, d);
3707
3708   /* If precision == N, then mlow, mhigh exceed 2^N
3709      (but they do not exceed 2^(N+1)).  */
3710
3711   /* Reduce to lowest terms.  */
3712   for (post_shift = lgup; post_shift > 0; post_shift--)
3713     {
3714       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3715                                                        HOST_BITS_PER_WIDE_INT);
3716       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3717                                                        HOST_BITS_PER_WIDE_INT);
3718       if (ml_lo >= mh_lo)
3719         break;
3720
3721       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3722       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3723     }
3724
3725   *post_shift_ptr = post_shift;
3726   *lgup_ptr = lgup;
3727   if (n < HOST_BITS_PER_WIDE_INT)
3728     {
3729       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3730       *multiplier_ptr = mhigh.to_uhwi () & mask;
3731       return mhigh.to_uhwi () > mask;
3732     }
3733   else
3734     {
3735       *multiplier_ptr = mhigh.to_uhwi ();
3736       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3737     }
3738 }
3739
3740 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3741    congruent to 1 (mod 2**N).  */
3742
3743 static unsigned HOST_WIDE_INT
3744 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3745 {
3746   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3747
3748   /* The algorithm notes that the choice y = x satisfies
3749      x*y == 1 mod 2^3, since x is assumed odd.
3750      Each iteration doubles the number of bits of significance in y.  */
3751
3752   unsigned HOST_WIDE_INT mask;
3753   unsigned HOST_WIDE_INT y = x;
3754   int nbit = 3;
3755
3756   mask = (n == HOST_BITS_PER_WIDE_INT
3757           ? HOST_WIDE_INT_M1U
3758           : (HOST_WIDE_INT_1U << n) - 1);
3759
3760   while (nbit < n)
3761     {
3762       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3763       nbit *= 2;
3764     }
3765   return y;
3766 }
3767
3768 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3769    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3770    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3771    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3772    become signed.
3773
3774    The result is put in TARGET if that is convenient.
3775
3776    MODE is the mode of operation.  */
3777
3778 rtx
3779 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3780                              rtx op1, rtx target, int unsignedp)
3781 {
3782   rtx tem;
3783   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3784
3785   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3786                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3787   tem = expand_and (mode, tem, op1, NULL_RTX);
3788   adj_operand
3789     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3790                      adj_operand);
3791
3792   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3793                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3794   tem = expand_and (mode, tem, op0, NULL_RTX);
3795   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3796                           target);
3797
3798   return target;
3799 }
3800
3801 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3802
3803 static rtx
3804 extract_high_half (scalar_int_mode mode, rtx op)
3805 {
3806   if (mode == word_mode)
3807     return gen_highpart (mode, op);
3808
3809   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3810
3811   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3812                      GET_MODE_BITSIZE (mode), 0, 1);
3813   return convert_modes (mode, wider_mode, op, 0);
3814 }
3815
3816 /* Like expmed_mult_highpart, but only consider using a multiplication
3817    optab.  OP1 is an rtx for the constant operand.  */
3818
3819 static rtx
3820 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3821                             rtx target, int unsignedp, int max_cost)
3822 {
3823   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3824   optab moptab;
3825   rtx tem;
3826   int size;
3827   bool speed = optimize_insn_for_speed_p ();
3828
3829   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3830
3831   size = GET_MODE_BITSIZE (mode);
3832
3833   /* Firstly, try using a multiplication insn that only generates the needed
3834      high part of the product, and in the sign flavor of unsignedp.  */
3835   if (mul_highpart_cost (speed, mode) < max_cost)
3836     {
3837       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3838       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3839                           unsignedp, OPTAB_DIRECT);
3840       if (tem)
3841         return tem;
3842     }
3843
3844   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3845      Need to adjust the result after the multiplication.  */
3846   if (size - 1 < BITS_PER_WORD
3847       && (mul_highpart_cost (speed, mode)
3848           + 2 * shift_cost (speed, mode, size-1)
3849           + 4 * add_cost (speed, mode) < max_cost))
3850     {
3851       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3852       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3853                           unsignedp, OPTAB_DIRECT);
3854       if (tem)
3855         /* We used the wrong signedness.  Adjust the result.  */
3856         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3857                                             tem, unsignedp);
3858     }
3859
3860   /* Try widening multiplication.  */
3861   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3862   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3863       && mul_widen_cost (speed, wider_mode) < max_cost)
3864     {
3865       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3866                           unsignedp, OPTAB_WIDEN);
3867       if (tem)
3868         return extract_high_half (mode, tem);
3869     }
3870
3871   /* Try widening the mode and perform a non-widening multiplication.  */
3872   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3873       && size - 1 < BITS_PER_WORD
3874       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3875           < max_cost))
3876     {
3877       rtx_insn *insns;
3878       rtx wop0, wop1;
3879
3880       /* We need to widen the operands, for example to ensure the
3881          constant multiplier is correctly sign or zero extended.
3882          Use a sequence to clean-up any instructions emitted by
3883          the conversions if things don't work out.  */
3884       start_sequence ();
3885       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3886       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3887       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3888                           unsignedp, OPTAB_WIDEN);
3889       insns = get_insns ();
3890       end_sequence ();
3891
3892       if (tem)
3893         {
3894           emit_insn (insns);
3895           return extract_high_half (mode, tem);
3896         }
3897     }
3898
3899   /* Try widening multiplication of opposite signedness, and adjust.  */
3900   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3901   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3902       && size - 1 < BITS_PER_WORD
3903       && (mul_widen_cost (speed, wider_mode)
3904           + 2 * shift_cost (speed, mode, size-1)
3905           + 4 * add_cost (speed, mode) < max_cost))
3906     {
3907       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3908                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3909       if (tem != 0)
3910         {
3911           tem = extract_high_half (mode, tem);
3912           /* We used the wrong signedness.  Adjust the result.  */
3913           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3914                                               target, unsignedp);
3915         }
3916     }
3917
3918   return 0;
3919 }
3920
3921 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3922    putting the high half of the result in TARGET if that is convenient,
3923    and return where the result is.  If the operation cannot be performed,
3924    0 is returned.
3925
3926    MODE is the mode of operation and result.
3927
3928    UNSIGNEDP nonzero means unsigned multiply.
3929
3930    MAX_COST is the total allowed cost for the expanded RTL.  */
3931
3932 static rtx
3933 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3934                       rtx target, int unsignedp, int max_cost)
3935 {
3936   unsigned HOST_WIDE_INT cnst1;
3937   int extra_cost;
3938   bool sign_adjust = false;
3939   enum mult_variant variant;
3940   struct algorithm alg;
3941   rtx tem;
3942   bool speed = optimize_insn_for_speed_p ();
3943
3944   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3945   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3946
3947   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3948
3949   /* We can't optimize modes wider than BITS_PER_WORD.
3950      ??? We might be able to perform double-word arithmetic if
3951      mode == word_mode, however all the cost calculations in
3952      synth_mult etc. assume single-word operations.  */
3953   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3954   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3955     return expmed_mult_highpart_optab (mode, op0, op1, target,
3956                                        unsignedp, max_cost);
3957
3958   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3959
3960   /* Check whether we try to multiply by a negative constant.  */
3961   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3962     {
3963       sign_adjust = true;
3964       extra_cost += add_cost (speed, mode);
3965     }
3966
3967   /* See whether shift/add multiplication is cheap enough.  */
3968   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3969                            max_cost - extra_cost))
3970     {
3971       /* See whether the specialized multiplication optabs are
3972          cheaper than the shift/add version.  */
3973       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3974                                         alg.cost.cost + extra_cost);
3975       if (tem)
3976         return tem;
3977
3978       tem = convert_to_mode (wider_mode, op0, unsignedp);
3979       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3980       tem = extract_high_half (mode, tem);
3981
3982       /* Adjust result for signedness.  */
3983       if (sign_adjust)
3984         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3985
3986       return tem;
3987     }
3988   return expmed_mult_highpart_optab (mode, op0, op1, target,
3989                                      unsignedp, max_cost);
3990 }
3991
3992
3993 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3994
3995 static rtx
3996 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3997 {
3998   rtx result, temp, shift;
3999   rtx_code_label *label;
4000   int logd;
4001   int prec = GET_MODE_PRECISION (mode);
4002
4003   logd = floor_log2 (d);
4004   result = gen_reg_rtx (mode);
4005
4006   /* Avoid conditional branches when they're expensive.  */
4007   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4008       && optimize_insn_for_speed_p ())
4009     {
4010       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4011                                       mode, 0, -1);
4012       if (signmask)
4013         {
4014           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4015           signmask = force_reg (mode, signmask);
4016           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4017
4018           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4019              which instruction sequence to use.  If logical right shifts
4020              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4021              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4022
4023           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4024           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4025               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4026                   > COSTS_N_INSNS (2)))
4027             {
4028               temp = expand_binop (mode, xor_optab, op0, signmask,
4029                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4030               temp = expand_binop (mode, sub_optab, temp, signmask,
4031                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4032               temp = expand_binop (mode, and_optab, temp,
4033                                    gen_int_mode (masklow, mode),
4034                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4035               temp = expand_binop (mode, xor_optab, temp, signmask,
4036                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4037               temp = expand_binop (mode, sub_optab, temp, signmask,
4038                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4039             }
4040           else
4041             {
4042               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4043                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4044               signmask = force_reg (mode, signmask);
4045
4046               temp = expand_binop (mode, add_optab, op0, signmask,
4047                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4048               temp = expand_binop (mode, and_optab, temp,
4049                                    gen_int_mode (masklow, mode),
4050                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4051               temp = expand_binop (mode, sub_optab, temp, signmask,
4052                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4053             }
4054           return temp;
4055         }
4056     }
4057
4058   /* Mask contains the mode's signbit and the significant bits of the
4059      modulus.  By including the signbit in the operation, many targets
4060      can avoid an explicit compare operation in the following comparison
4061      against zero.  */
4062   wide_int mask = wi::mask (logd, false, prec);
4063   mask = wi::set_bit (mask, prec - 1);
4064
4065   temp = expand_binop (mode, and_optab, op0,
4066                        immed_wide_int_const (mask, mode),
4067                        result, 1, OPTAB_LIB_WIDEN);
4068   if (temp != result)
4069     emit_move_insn (result, temp);
4070
4071   label = gen_label_rtx ();
4072   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4073
4074   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4075                        0, OPTAB_LIB_WIDEN);
4076
4077   mask = wi::mask (logd, true, prec);
4078   temp = expand_binop (mode, ior_optab, temp,
4079                        immed_wide_int_const (mask, mode),
4080                        result, 1, OPTAB_LIB_WIDEN);
4081   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4082                        0, OPTAB_LIB_WIDEN);
4083   if (temp != result)
4084     emit_move_insn (result, temp);
4085   emit_label (label);
4086   return result;
4087 }
4088
4089 /* Expand signed division of OP0 by a power of two D in mode MODE.
4090    This routine is only called for positive values of D.  */
4091
4092 static rtx
4093 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4094 {
4095   rtx temp;
4096   rtx_code_label *label;
4097   int logd;
4098
4099   logd = floor_log2 (d);
4100
4101   if (d == 2
4102       && BRANCH_COST (optimize_insn_for_speed_p (),
4103                       false) >= 1)
4104     {
4105       temp = gen_reg_rtx (mode);
4106       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4107       if (temp != NULL_RTX)
4108         {
4109           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4110                                0, OPTAB_LIB_WIDEN);
4111           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4112         }
4113     }
4114
4115   if (HAVE_conditional_move
4116       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4117     {
4118       rtx temp2;
4119
4120       start_sequence ();
4121       temp2 = copy_to_mode_reg (mode, op0);
4122       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4123                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4124       temp = force_reg (mode, temp);
4125
4126       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4127       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4128                                      mode, temp, temp2, mode, 0);
4129       if (temp2)
4130         {
4131           rtx_insn *seq = get_insns ();
4132           end_sequence ();
4133           emit_insn (seq);
4134           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4135         }
4136       end_sequence ();
4137     }
4138
4139   if (BRANCH_COST (optimize_insn_for_speed_p (),
4140                    false) >= 2)
4141     {
4142       int ushift = GET_MODE_BITSIZE (mode) - logd;
4143
4144       temp = gen_reg_rtx (mode);
4145       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4146       if (temp != NULL_RTX)
4147         {
4148           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4149               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4150               > COSTS_N_INSNS (1))
4151             temp = expand_binop (mode, and_optab, temp,
4152                                  gen_int_mode (d - 1, mode),
4153                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4154           else
4155             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4156                                  ushift, NULL_RTX, 1);
4157           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4158                                0, OPTAB_LIB_WIDEN);
4159           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4160         }
4161     }
4162
4163   label = gen_label_rtx ();
4164   temp = copy_to_mode_reg (mode, op0);
4165   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4166   expand_inc (temp, gen_int_mode (d - 1, mode));
4167   emit_label (label);
4168   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4169 }
4170 \f
4171 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4172    if that is convenient, and returning where the result is.
4173    You may request either the quotient or the remainder as the result;
4174    specify REM_FLAG nonzero to get the remainder.
4175
4176    CODE is the expression code for which kind of division this is;
4177    it controls how rounding is done.  MODE is the machine mode to use.
4178    UNSIGNEDP nonzero means do unsigned division.  */
4179
4180 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4181    and then correct it by or'ing in missing high bits
4182    if result of ANDI is nonzero.
4183    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4184    This could optimize to a bfexts instruction.
4185    But C doesn't use these operations, so their optimizations are
4186    left for later.  */
4187 /* ??? For modulo, we don't actually need the highpart of the first product,
4188    the low part will do nicely.  And for small divisors, the second multiply
4189    can also be a low-part only multiply or even be completely left out.
4190    E.g. to calculate the remainder of a division by 3 with a 32 bit
4191    multiply, multiply with 0x55555556 and extract the upper two bits;
4192    the result is exact for inputs up to 0x1fffffff.
4193    The input range can be reduced by using cross-sum rules.
4194    For odd divisors >= 3, the following table gives right shift counts
4195    so that if a number is shifted by an integer multiple of the given
4196    amount, the remainder stays the same:
4197    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4198    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4199    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4200    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4201    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4202
4203    Cross-sum rules for even numbers can be derived by leaving as many bits
4204    to the right alone as the divisor has zeros to the right.
4205    E.g. if x is an unsigned 32 bit number:
4206    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4207    */
4208
4209 rtx
4210 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4211                rtx op0, rtx op1, rtx target, int unsignedp,
4212                enum optab_methods methods)
4213 {
4214   machine_mode compute_mode;
4215   rtx tquotient;
4216   rtx quotient = 0, remainder = 0;
4217   rtx_insn *last;
4218   rtx_insn *insn;
4219   optab optab1, optab2;
4220   int op1_is_constant, op1_is_pow2 = 0;
4221   int max_cost, extra_cost;
4222   static HOST_WIDE_INT last_div_const = 0;
4223   bool speed = optimize_insn_for_speed_p ();
4224
4225   op1_is_constant = CONST_INT_P (op1);
4226   if (op1_is_constant)
4227     {
4228       wide_int ext_op1 = rtx_mode_t (op1, mode);
4229       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4230                      || (! unsignedp
4231                          && wi::popcount (wi::neg (ext_op1)) == 1));
4232     }
4233
4234   /*
4235      This is the structure of expand_divmod:
4236
4237      First comes code to fix up the operands so we can perform the operations
4238      correctly and efficiently.
4239
4240      Second comes a switch statement with code specific for each rounding mode.
4241      For some special operands this code emits all RTL for the desired
4242      operation, for other cases, it generates only a quotient and stores it in
4243      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4244      to indicate that it has not done anything.
4245
4246      Last comes code that finishes the operation.  If QUOTIENT is set and
4247      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4248      QUOTIENT is not set, it is computed using trunc rounding.
4249
4250      We try to generate special code for division and remainder when OP1 is a
4251      constant.  If |OP1| = 2**n we can use shifts and some other fast
4252      operations.  For other values of OP1, we compute a carefully selected
4253      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4254      by m.
4255
4256      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4257      half of the product.  Different strategies for generating the product are
4258      implemented in expmed_mult_highpart.
4259
4260      If what we actually want is the remainder, we generate that by another
4261      by-constant multiplication and a subtraction.  */
4262
4263   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4264      code below will malfunction if we are, so check here and handle
4265      the special case if so.  */
4266   if (op1 == const1_rtx)
4267     return rem_flag ? const0_rtx : op0;
4268
4269     /* When dividing by -1, we could get an overflow.
4270      negv_optab can handle overflows.  */
4271   if (! unsignedp && op1 == constm1_rtx)
4272     {
4273       if (rem_flag)
4274         return const0_rtx;
4275       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4276                           ? negv_optab : neg_optab, op0, target, 0);
4277     }
4278
4279   if (target
4280       /* Don't use the function value register as a target
4281          since we have to read it as well as write it,
4282          and function-inlining gets confused by this.  */
4283       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4284           /* Don't clobber an operand while doing a multi-step calculation.  */
4285           || ((rem_flag || op1_is_constant)
4286               && (reg_mentioned_p (target, op0)
4287                   || (MEM_P (op0) && MEM_P (target))))
4288           || reg_mentioned_p (target, op1)
4289           || (MEM_P (op1) && MEM_P (target))))
4290     target = 0;
4291
4292   /* Get the mode in which to perform this computation.  Normally it will
4293      be MODE, but sometimes we can't do the desired operation in MODE.
4294      If so, pick a wider mode in which we can do the operation.  Convert
4295      to that mode at the start to avoid repeated conversions.
4296
4297      First see what operations we need.  These depend on the expression
4298      we are evaluating.  (We assume that divxx3 insns exist under the
4299      same conditions that modxx3 insns and that these insns don't normally
4300      fail.  If these assumptions are not correct, we may generate less
4301      efficient code in some cases.)
4302
4303      Then see if we find a mode in which we can open-code that operation
4304      (either a division, modulus, or shift).  Finally, check for the smallest
4305      mode for which we can do the operation with a library call.  */
4306
4307   /* We might want to refine this now that we have division-by-constant
4308      optimization.  Since expmed_mult_highpart tries so many variants, it is
4309      not straightforward to generalize this.  Maybe we should make an array
4310      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4311
4312   optab1 = (op1_is_pow2
4313             ? (unsignedp ? lshr_optab : ashr_optab)
4314             : (unsignedp ? udiv_optab : sdiv_optab));
4315   optab2 = (op1_is_pow2 ? optab1
4316             : (unsignedp ? udivmod_optab : sdivmod_optab));
4317
4318   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4319     {
4320       FOR_EACH_MODE_FROM (compute_mode, mode)
4321       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4322           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4323         break;
4324
4325       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4326         FOR_EACH_MODE_FROM (compute_mode, mode)
4327           if (optab_libfunc (optab1, compute_mode)
4328               || optab_libfunc (optab2, compute_mode))
4329             break;
4330     }
4331   else
4332     compute_mode = mode;
4333
4334   /* If we still couldn't find a mode, use MODE, but expand_binop will
4335      probably die.  */
4336   if (compute_mode == VOIDmode)
4337     compute_mode = mode;
4338
4339   if (target && GET_MODE (target) == compute_mode)
4340     tquotient = target;
4341   else
4342     tquotient = gen_reg_rtx (compute_mode);
4343
4344 #if 0
4345   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4346      (mode), and thereby get better code when OP1 is a constant.  Do that
4347      later.  It will require going over all usages of SIZE below.  */
4348   size = GET_MODE_BITSIZE (mode);
4349 #endif
4350
4351   /* Only deduct something for a REM if the last divide done was
4352      for a different constant.   Then set the constant of the last
4353      divide.  */
4354   max_cost = (unsignedp
4355               ? udiv_cost (speed, compute_mode)
4356               : sdiv_cost (speed, compute_mode));
4357   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4358                      && INTVAL (op1) == last_div_const))
4359     max_cost -= (mul_cost (speed, compute_mode)
4360                  + add_cost (speed, compute_mode));
4361
4362   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4363
4364   /* Now convert to the best mode to use.  */
4365   if (compute_mode != mode)
4366     {
4367       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4368       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4369
4370       /* convert_modes may have placed op1 into a register, so we
4371          must recompute the following.  */
4372       op1_is_constant = CONST_INT_P (op1);
4373       if (op1_is_constant)
4374         {
4375           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4376           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4377                          || (! unsignedp
4378                              && wi::popcount (wi::neg (ext_op1)) == 1));
4379         }
4380       else
4381         op1_is_pow2 = 0;
4382     }
4383
4384   /* If one of the operands is a volatile MEM, copy it into a register.  */
4385
4386   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4387     op0 = force_reg (compute_mode, op0);
4388   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4389     op1 = force_reg (compute_mode, op1);
4390
4391   /* If we need the remainder or if OP1 is constant, we need to
4392      put OP0 in a register in case it has any queued subexpressions.  */
4393   if (rem_flag || op1_is_constant)
4394     op0 = force_reg (compute_mode, op0);
4395
4396   last = get_last_insn ();
4397
4398   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4399   if (unsignedp)
4400     {
4401       if (code == FLOOR_DIV_EXPR)
4402         code = TRUNC_DIV_EXPR;
4403       if (code == FLOOR_MOD_EXPR)
4404         code = TRUNC_MOD_EXPR;
4405       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4406         code = TRUNC_DIV_EXPR;
4407     }
4408
4409   if (op1 != const0_rtx)
4410     switch (code)
4411       {
4412       case TRUNC_MOD_EXPR:
4413       case TRUNC_DIV_EXPR:
4414         if (op1_is_constant)
4415           {
4416             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4417             int size = GET_MODE_BITSIZE (int_mode);
4418             if (unsignedp)
4419               {
4420                 unsigned HOST_WIDE_INT mh, ml;
4421                 int pre_shift, post_shift;
4422                 int dummy;
4423                 wide_int wd = rtx_mode_t (op1, int_mode);
4424                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4425
4426                 if (wi::popcount (wd) == 1)
4427                   {
4428                     pre_shift = floor_log2 (d);
4429                     if (rem_flag)
4430                       {
4431                         unsigned HOST_WIDE_INT mask
4432                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4433                         remainder
4434                           = expand_binop (int_mode, and_optab, op0,
4435                                           gen_int_mode (mask, int_mode),
4436                                           remainder, 1, methods);
4437                         if (remainder)
4438                           return gen_lowpart (mode, remainder);
4439                       }
4440                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4441                                              pre_shift, tquotient, 1);
4442                   }
4443                 else if (size <= HOST_BITS_PER_WIDE_INT)
4444                   {
4445                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4446                       {
4447                         /* Most significant bit of divisor is set; emit an scc
4448                            insn.  */
4449                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4450                                                           int_mode, 1, 1);
4451                       }
4452                     else
4453                       {
4454                         /* Find a suitable multiplier and right shift count
4455                            instead of multiplying with D.  */
4456
4457                         mh = choose_multiplier (d, size, size,
4458                                                 &ml, &post_shift, &dummy);
4459
4460                         /* If the suggested multiplier is more than SIZE bits,
4461                            we can do better for even divisors, using an
4462                            initial right shift.  */
4463                         if (mh != 0 && (d & 1) == 0)
4464                           {
4465                             pre_shift = ctz_or_zero (d);
4466                             mh = choose_multiplier (d >> pre_shift, size,
4467                                                     size - pre_shift,
4468                                                     &ml, &post_shift, &dummy);
4469                             gcc_assert (!mh);
4470                           }
4471                         else
4472                           pre_shift = 0;
4473
4474                         if (mh != 0)
4475                           {
4476                             rtx t1, t2, t3, t4;
4477
4478                             if (post_shift - 1 >= BITS_PER_WORD)
4479                               goto fail1;
4480
4481                             extra_cost
4482                               = (shift_cost (speed, int_mode, post_shift - 1)
4483                                  + shift_cost (speed, int_mode, 1)
4484                                  + 2 * add_cost (speed, int_mode));
4485                             t1 = expmed_mult_highpart
4486                               (int_mode, op0, gen_int_mode (ml, int_mode),
4487                                NULL_RTX, 1, max_cost - extra_cost);
4488                             if (t1 == 0)
4489                               goto fail1;
4490                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4491                                                                op0, t1),
4492                                                 NULL_RTX);
4493                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4494                                                t2, 1, NULL_RTX, 1);
4495                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4496                                                               t1, t3),
4497                                                 NULL_RTX);
4498                             quotient = expand_shift
4499                               (RSHIFT_EXPR, int_mode, t4,
4500                                post_shift - 1, tquotient, 1);
4501                           }
4502                         else
4503                           {
4504                             rtx t1, t2;
4505
4506                             if (pre_shift >= BITS_PER_WORD
4507                                 || post_shift >= BITS_PER_WORD)
4508                               goto fail1;
4509
4510                             t1 = expand_shift
4511                               (RSHIFT_EXPR, int_mode, op0,
4512                                pre_shift, NULL_RTX, 1);
4513                             extra_cost
4514                               = (shift_cost (speed, int_mode, pre_shift)
4515                                  + shift_cost (speed, int_mode, post_shift));
4516                             t2 = expmed_mult_highpart
4517                               (int_mode, t1,
4518                                gen_int_mode (ml, int_mode),
4519                                NULL_RTX, 1, max_cost - extra_cost);
4520                             if (t2 == 0)
4521                               goto fail1;
4522                             quotient = expand_shift
4523                               (RSHIFT_EXPR, int_mode, t2,
4524                                post_shift, tquotient, 1);
4525                           }
4526                       }
4527                   }
4528                 else            /* Too wide mode to use tricky code */
4529                   break;
4530
4531                 insn = get_last_insn ();
4532                 if (insn != last)
4533                   set_dst_reg_note (insn, REG_EQUAL,
4534                                     gen_rtx_UDIV (int_mode, op0, op1),
4535                                     quotient);
4536               }
4537             else                /* TRUNC_DIV, signed */
4538               {
4539                 unsigned HOST_WIDE_INT ml;
4540                 int lgup, post_shift;
4541                 rtx mlr;
4542                 HOST_WIDE_INT d = INTVAL (op1);
4543                 unsigned HOST_WIDE_INT abs_d;
4544
4545                 /* Not prepared to handle division/remainder by
4546                    0xffffffffffffffff8000000000000000 etc.  */
4547                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4548                   break;
4549
4550                 /* Since d might be INT_MIN, we have to cast to
4551                    unsigned HOST_WIDE_INT before negating to avoid
4552                    undefined signed overflow.  */
4553                 abs_d = (d >= 0
4554                          ? (unsigned HOST_WIDE_INT) d
4555                          : - (unsigned HOST_WIDE_INT) d);
4556
4557                 /* n rem d = n rem -d */
4558                 if (rem_flag && d < 0)
4559                   {
4560                     d = abs_d;
4561                     op1 = gen_int_mode (abs_d, int_mode);
4562                   }
4563
4564                 if (d == 1)
4565                   quotient = op0;
4566                 else if (d == -1)
4567                   quotient = expand_unop (int_mode, neg_optab, op0,
4568                                           tquotient, 0);
4569                 else if (size <= HOST_BITS_PER_WIDE_INT
4570                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4571                   {
4572                     /* This case is not handled correctly below.  */
4573                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4574                                                 int_mode, 1, 1);
4575                     if (quotient == 0)
4576                       goto fail1;
4577                   }
4578                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4579                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4580                          && (rem_flag
4581                              ? smod_pow2_cheap (speed, int_mode)
4582                              : sdiv_pow2_cheap (speed, int_mode))
4583                          /* We assume that cheap metric is true if the
4584                             optab has an expander for this mode.  */
4585                          && ((optab_handler ((rem_flag ? smod_optab
4586                                               : sdiv_optab),
4587                                              int_mode)
4588                               != CODE_FOR_nothing)
4589                              || (optab_handler (sdivmod_optab, int_mode)
4590                                  != CODE_FOR_nothing)))
4591                   ;
4592                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4593                   {
4594                     if (rem_flag)
4595                       {
4596                         remainder = expand_smod_pow2 (int_mode, op0, d);
4597                         if (remainder)
4598                           return gen_lowpart (mode, remainder);
4599                       }
4600
4601                     if (sdiv_pow2_cheap (speed, int_mode)
4602                         && ((optab_handler (sdiv_optab, int_mode)
4603                              != CODE_FOR_nothing)
4604                             || (optab_handler (sdivmod_optab, int_mode)
4605                                 != CODE_FOR_nothing)))
4606                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4607                                                 int_mode, op0,
4608                                                 gen_int_mode (abs_d,
4609                                                               int_mode),
4610                                                 NULL_RTX, 0);
4611                     else
4612                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4613
4614                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4615                        negate the quotient.  */
4616                     if (d < 0)
4617                       {
4618                         insn = get_last_insn ();
4619                         if (insn != last
4620                             && abs_d < (HOST_WIDE_INT_1U
4621                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4622                           set_dst_reg_note (insn, REG_EQUAL,
4623                                             gen_rtx_DIV (int_mode, op0,
4624                                                          gen_int_mode
4625                                                            (abs_d,
4626                                                             int_mode)),
4627                                             quotient);
4628
4629                         quotient = expand_unop (int_mode, neg_optab,
4630                                                 quotient, quotient, 0);
4631                       }
4632                   }
4633                 else if (size <= HOST_BITS_PER_WIDE_INT)
4634                   {
4635                     choose_multiplier (abs_d, size, size - 1,
4636                                        &ml, &post_shift, &lgup);
4637                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4638                       {
4639                         rtx t1, t2, t3;
4640
4641                         if (post_shift >= BITS_PER_WORD
4642                             || size - 1 >= BITS_PER_WORD)
4643                           goto fail1;
4644
4645                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4646                                       + shift_cost (speed, int_mode, size - 1)
4647                                       + add_cost (speed, int_mode));
4648                         t1 = expmed_mult_highpart
4649                           (int_mode, op0, gen_int_mode (ml, int_mode),
4650                            NULL_RTX, 0, max_cost - extra_cost);
4651                         if (t1 == 0)
4652                           goto fail1;
4653                         t2 = expand_shift
4654                           (RSHIFT_EXPR, int_mode, t1,
4655                            post_shift, NULL_RTX, 0);
4656                         t3 = expand_shift
4657                           (RSHIFT_EXPR, int_mode, op0,
4658                            size - 1, NULL_RTX, 0);
4659                         if (d < 0)
4660                           quotient
4661                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4662                                              tquotient);
4663                         else
4664                           quotient
4665                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4666                                              tquotient);
4667                       }
4668                     else
4669                       {
4670                         rtx t1, t2, t3, t4;
4671
4672                         if (post_shift >= BITS_PER_WORD
4673                             || size - 1 >= BITS_PER_WORD)
4674                           goto fail1;
4675
4676                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4677                         mlr = gen_int_mode (ml, int_mode);
4678                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4679                                       + shift_cost (speed, int_mode, size - 1)
4680                                       + 2 * add_cost (speed, int_mode));
4681                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4682                                                    NULL_RTX, 0,
4683                                                    max_cost - extra_cost);
4684                         if (t1 == 0)
4685                           goto fail1;
4686                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4687                                             NULL_RTX);
4688                         t3 = expand_shift
4689                           (RSHIFT_EXPR, int_mode, t2,
4690                            post_shift, NULL_RTX, 0);
4691                         t4 = expand_shift
4692                           (RSHIFT_EXPR, int_mode, op0,
4693                            size - 1, NULL_RTX, 0);
4694                         if (d < 0)
4695                           quotient
4696                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4697                                              tquotient);
4698                         else
4699                           quotient
4700                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4701                                              tquotient);
4702                       }
4703                   }
4704                 else            /* Too wide mode to use tricky code */
4705                   break;
4706
4707                 insn = get_last_insn ();
4708                 if (insn != last)
4709                   set_dst_reg_note (insn, REG_EQUAL,
4710                                     gen_rtx_DIV (int_mode, op0, op1),
4711                                     quotient);
4712               }
4713             break;
4714           }
4715       fail1:
4716         delete_insns_since (last);
4717         break;
4718
4719       case FLOOR_DIV_EXPR:
4720       case FLOOR_MOD_EXPR:
4721       /* We will come here only for signed operations.  */
4722         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4723           {
4724             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4725             int size = GET_MODE_BITSIZE (int_mode);
4726             unsigned HOST_WIDE_INT mh, ml;
4727             int pre_shift, lgup, post_shift;
4728             HOST_WIDE_INT d = INTVAL (op1);
4729
4730             if (d > 0)
4731               {
4732                 /* We could just as easily deal with negative constants here,
4733                    but it does not seem worth the trouble for GCC 2.6.  */
4734                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4735                   {
4736                     pre_shift = floor_log2 (d);
4737                     if (rem_flag)
4738                       {
4739                         unsigned HOST_WIDE_INT mask
4740                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4741                         remainder = expand_binop
4742                           (int_mode, and_optab, op0,
4743                            gen_int_mode (mask, int_mode),
4744                            remainder, 0, methods);
4745                         if (remainder)
4746                           return gen_lowpart (mode, remainder);
4747                       }
4748                     quotient = expand_shift
4749                       (RSHIFT_EXPR, int_mode, op0,
4750                        pre_shift, tquotient, 0);
4751                   }
4752                 else
4753                   {
4754                     rtx t1, t2, t3, t4;
4755
4756                     mh = choose_multiplier (d, size, size - 1,
4757                                             &ml, &post_shift, &lgup);
4758                     gcc_assert (!mh);
4759
4760                     if (post_shift < BITS_PER_WORD
4761                         && size - 1 < BITS_PER_WORD)
4762                       {
4763                         t1 = expand_shift
4764                           (RSHIFT_EXPR, int_mode, op0,
4765                            size - 1, NULL_RTX, 0);
4766                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4767                                            NULL_RTX, 0, OPTAB_WIDEN);
4768                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4769                                       + shift_cost (speed, int_mode, size - 1)
4770                                       + 2 * add_cost (speed, int_mode));
4771                         t3 = expmed_mult_highpart
4772                           (int_mode, t2, gen_int_mode (ml, int_mode),
4773                            NULL_RTX, 1, max_cost - extra_cost);
4774                         if (t3 != 0)
4775                           {
4776                             t4 = expand_shift
4777                               (RSHIFT_EXPR, int_mode, t3,
4778                                post_shift, NULL_RTX, 1);
4779                             quotient = expand_binop (int_mode, xor_optab,
4780                                                      t4, t1, tquotient, 0,
4781                                                      OPTAB_WIDEN);
4782                           }
4783                       }
4784                   }
4785               }
4786             else
4787               {
4788                 rtx nsign, t1, t2, t3, t4;
4789                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4790                                                   op0, constm1_rtx), NULL_RTX);
4791                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4792                                    0, OPTAB_WIDEN);
4793                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4794                                       size - 1, NULL_RTX, 0);
4795                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4796                                     NULL_RTX);
4797                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4798                                     NULL_RTX, 0);
4799                 if (t4)
4800                   {
4801                     rtx t5;
4802                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4803                                       NULL_RTX, 0);
4804                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4805                                               tquotient);
4806                   }
4807               }
4808           }
4809
4810         if (quotient != 0)
4811           break;
4812         delete_insns_since (last);
4813
4814         /* Try using an instruction that produces both the quotient and
4815            remainder, using truncation.  We can easily compensate the quotient
4816            or remainder to get floor rounding, once we have the remainder.
4817            Notice that we compute also the final remainder value here,
4818            and return the result right away.  */
4819         if (target == 0 || GET_MODE (target) != compute_mode)
4820           target = gen_reg_rtx (compute_mode);
4821
4822         if (rem_flag)
4823           {
4824             remainder
4825               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4826             quotient = gen_reg_rtx (compute_mode);
4827           }
4828         else
4829           {
4830             quotient
4831               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4832             remainder = gen_reg_rtx (compute_mode);
4833           }
4834
4835         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4836                                  quotient, remainder, 0))
4837           {
4838             /* This could be computed with a branch-less sequence.
4839                Save that for later.  */
4840             rtx tem;
4841             rtx_code_label *label = gen_label_rtx ();
4842             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4843             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4844                                 NULL_RTX, 0, OPTAB_WIDEN);
4845             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4846             expand_dec (quotient, const1_rtx);
4847             expand_inc (remainder, op1);
4848             emit_label (label);
4849             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4850           }
4851
4852         /* No luck with division elimination or divmod.  Have to do it
4853            by conditionally adjusting op0 *and* the result.  */
4854         {
4855           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4856           rtx adjusted_op0;
4857           rtx tem;
4858
4859           quotient = gen_reg_rtx (compute_mode);
4860           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4861           label1 = gen_label_rtx ();
4862           label2 = gen_label_rtx ();
4863           label3 = gen_label_rtx ();
4864           label4 = gen_label_rtx ();
4865           label5 = gen_label_rtx ();
4866           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4867           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4868           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4869                               quotient, 0, methods);
4870           if (tem != quotient)
4871             emit_move_insn (quotient, tem);
4872           emit_jump_insn (targetm.gen_jump (label5));
4873           emit_barrier ();
4874           emit_label (label1);
4875           expand_inc (adjusted_op0, const1_rtx);
4876           emit_jump_insn (targetm.gen_jump (label4));
4877           emit_barrier ();
4878           emit_label (label2);
4879           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4880           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4881                               quotient, 0, methods);
4882           if (tem != quotient)
4883             emit_move_insn (quotient, tem);
4884           emit_jump_insn (targetm.gen_jump (label5));
4885           emit_barrier ();
4886           emit_label (label3);
4887           expand_dec (adjusted_op0, const1_rtx);
4888           emit_label (label4);
4889           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4890                               quotient, 0, methods);
4891           if (tem != quotient)
4892             emit_move_insn (quotient, tem);
4893           expand_dec (quotient, const1_rtx);
4894           emit_label (label5);
4895         }
4896         break;
4897
4898       case CEIL_DIV_EXPR:
4899       case CEIL_MOD_EXPR:
4900         if (unsignedp)
4901           {
4902             if (op1_is_constant
4903                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4904                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4905                     || INTVAL (op1) >= 0))
4906               {
4907                 scalar_int_mode int_mode
4908                   = as_a <scalar_int_mode> (compute_mode);
4909                 rtx t1, t2, t3;
4910                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4911                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4912                                    floor_log2 (d), tquotient, 1);
4913                 t2 = expand_binop (int_mode, and_optab, op0,
4914                                    gen_int_mode (d - 1, int_mode),
4915                                    NULL_RTX, 1, methods);
4916                 t3 = gen_reg_rtx (int_mode);
4917                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4918                 if (t3 == 0)
4919                   {
4920                     rtx_code_label *lab;
4921                     lab = gen_label_rtx ();
4922                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4923                     expand_inc (t1, const1_rtx);
4924                     emit_label (lab);
4925                     quotient = t1;
4926                   }
4927                 else
4928                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4929                                             tquotient);
4930                 break;
4931               }
4932
4933             /* Try using an instruction that produces both the quotient and
4934                remainder, using truncation.  We can easily compensate the
4935                quotient or remainder to get ceiling rounding, once we have the
4936                remainder.  Notice that we compute also the final remainder
4937                value here, and return the result right away.  */
4938             if (target == 0 || GET_MODE (target) != compute_mode)
4939               target = gen_reg_rtx (compute_mode);
4940
4941             if (rem_flag)
4942               {
4943                 remainder = (REG_P (target)
4944                              ? target : gen_reg_rtx (compute_mode));
4945                 quotient = gen_reg_rtx (compute_mode);
4946               }
4947             else
4948               {
4949                 quotient = (REG_P (target)
4950                             ? target : gen_reg_rtx (compute_mode));
4951                 remainder = gen_reg_rtx (compute_mode);
4952               }
4953
4954             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4955                                      remainder, 1))
4956               {
4957                 /* This could be computed with a branch-less sequence.
4958                    Save that for later.  */
4959                 rtx_code_label *label = gen_label_rtx ();
4960                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4961                                  compute_mode, label);
4962                 expand_inc (quotient, const1_rtx);
4963                 expand_dec (remainder, op1);
4964                 emit_label (label);
4965                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4966               }
4967
4968             /* No luck with division elimination or divmod.  Have to do it
4969                by conditionally adjusting op0 *and* the result.  */
4970             {
4971               rtx_code_label *label1, *label2;
4972               rtx adjusted_op0, tem;
4973
4974               quotient = gen_reg_rtx (compute_mode);
4975               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4976               label1 = gen_label_rtx ();
4977               label2 = gen_label_rtx ();
4978               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4979                                compute_mode, label1);
4980               emit_move_insn  (quotient, const0_rtx);
4981               emit_jump_insn (targetm.gen_jump (label2));
4982               emit_barrier ();
4983               emit_label (label1);
4984               expand_dec (adjusted_op0, const1_rtx);
4985               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4986                                   quotient, 1, methods);
4987               if (tem != quotient)
4988                 emit_move_insn (quotient, tem);
4989               expand_inc (quotient, const1_rtx);
4990               emit_label (label2);
4991             }
4992           }
4993         else /* signed */
4994           {
4995             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4996                 && INTVAL (op1) >= 0)
4997               {
4998                 /* This is extremely similar to the code for the unsigned case
4999                    above.  For 2.7 we should merge these variants, but for
5000                    2.6.1 I don't want to touch the code for unsigned since that
5001                    get used in C.  The signed case will only be used by other
5002                    languages (Ada).  */
5003
5004                 rtx t1, t2, t3;
5005                 unsigned HOST_WIDE_INT d = INTVAL (op1);
5006                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5007                                    floor_log2 (d), tquotient, 0);
5008                 t2 = expand_binop (compute_mode, and_optab, op0,
5009                                    gen_int_mode (d - 1, compute_mode),
5010                                    NULL_RTX, 1, methods);
5011                 t3 = gen_reg_rtx (compute_mode);
5012                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5013                                       compute_mode, 1, 1);
5014                 if (t3 == 0)
5015                   {
5016                     rtx_code_label *lab;
5017                     lab = gen_label_rtx ();
5018                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5019                     expand_inc (t1, const1_rtx);
5020                     emit_label (lab);
5021                     quotient = t1;
5022                   }
5023                 else
5024                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5025                                                           t1, t3),
5026                                             tquotient);
5027                 break;
5028               }
5029
5030             /* Try using an instruction that produces both the quotient and
5031                remainder, using truncation.  We can easily compensate the
5032                quotient or remainder to get ceiling rounding, once we have the
5033                remainder.  Notice that we compute also the final remainder
5034                value here, and return the result right away.  */
5035             if (target == 0 || GET_MODE (target) != compute_mode)
5036               target = gen_reg_rtx (compute_mode);
5037             if (rem_flag)
5038               {
5039                 remainder= (REG_P (target)
5040                             ? target : gen_reg_rtx (compute_mode));
5041                 quotient = gen_reg_rtx (compute_mode);
5042               }
5043             else
5044               {
5045                 quotient = (REG_P (target)
5046                             ? target : gen_reg_rtx (compute_mode));
5047                 remainder = gen_reg_rtx (compute_mode);
5048               }
5049
5050             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5051                                      remainder, 0))
5052               {
5053                 /* This could be computed with a branch-less sequence.
5054                    Save that for later.  */
5055                 rtx tem;
5056                 rtx_code_label *label = gen_label_rtx ();
5057                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5058                                  compute_mode, label);
5059                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5060                                     NULL_RTX, 0, OPTAB_WIDEN);
5061                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5062                 expand_inc (quotient, const1_rtx);
5063                 expand_dec (remainder, op1);
5064                 emit_label (label);
5065                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5066               }
5067
5068             /* No luck with division elimination or divmod.  Have to do it
5069                by conditionally adjusting op0 *and* the result.  */
5070             {
5071               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5072               rtx adjusted_op0;
5073               rtx tem;
5074
5075               quotient = gen_reg_rtx (compute_mode);
5076               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5077               label1 = gen_label_rtx ();
5078               label2 = gen_label_rtx ();
5079               label3 = gen_label_rtx ();
5080               label4 = gen_label_rtx ();
5081               label5 = gen_label_rtx ();
5082               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5083               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5084                                compute_mode, label1);
5085               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5086                                   quotient, 0, methods);
5087               if (tem != quotient)
5088                 emit_move_insn (quotient, tem);
5089               emit_jump_insn (targetm.gen_jump (label5));
5090               emit_barrier ();
5091               emit_label (label1);
5092               expand_dec (adjusted_op0, const1_rtx);
5093               emit_jump_insn (targetm.gen_jump (label4));
5094               emit_barrier ();
5095               emit_label (label2);
5096               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5097                                compute_mode, label3);
5098               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5099                                   quotient, 0, methods);
5100               if (tem != quotient)
5101                 emit_move_insn (quotient, tem);
5102               emit_jump_insn (targetm.gen_jump (label5));
5103               emit_barrier ();
5104               emit_label (label3);
5105               expand_inc (adjusted_op0, const1_rtx);
5106               emit_label (label4);
5107               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5108                                   quotient, 0, methods);
5109               if (tem != quotient)
5110                 emit_move_insn (quotient, tem);
5111               expand_inc (quotient, const1_rtx);
5112               emit_label (label5);
5113             }
5114           }
5115         break;
5116
5117       case EXACT_DIV_EXPR:
5118         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5119           {
5120             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5121             int size = GET_MODE_BITSIZE (int_mode);
5122             HOST_WIDE_INT d = INTVAL (op1);
5123             unsigned HOST_WIDE_INT ml;
5124             int pre_shift;
5125             rtx t1;
5126
5127             pre_shift = ctz_or_zero (d);
5128             ml = invert_mod2n (d >> pre_shift, size);
5129             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5130                                pre_shift, NULL_RTX, unsignedp);
5131             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5132                                     NULL_RTX, 1);
5133
5134             insn = get_last_insn ();
5135             set_dst_reg_note (insn, REG_EQUAL,
5136                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5137                                               int_mode, op0, op1),
5138                               quotient);
5139           }
5140         break;
5141
5142       case ROUND_DIV_EXPR:
5143       case ROUND_MOD_EXPR:
5144         if (unsignedp)
5145           {
5146             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5147             rtx tem;
5148             rtx_code_label *label;
5149             label = gen_label_rtx ();
5150             quotient = gen_reg_rtx (int_mode);
5151             remainder = gen_reg_rtx (int_mode);
5152             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5153               {
5154                 rtx tem;
5155                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5156                                          quotient, 1, methods);
5157                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5158                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5159                                           remainder, 1, methods);
5160               }
5161             tem = plus_constant (int_mode, op1, -1);
5162             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5163             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5164             expand_inc (quotient, const1_rtx);
5165             expand_dec (remainder, op1);
5166             emit_label (label);
5167           }
5168         else
5169           {
5170             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5171             int size = GET_MODE_BITSIZE (int_mode);
5172             rtx abs_rem, abs_op1, tem, mask;
5173             rtx_code_label *label;
5174             label = gen_label_rtx ();
5175             quotient = gen_reg_rtx (int_mode);
5176             remainder = gen_reg_rtx (int_mode);
5177             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5178               {
5179                 rtx tem;
5180                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5181                                          quotient, 0, methods);
5182                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5183                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5184                                           remainder, 0, methods);
5185               }
5186             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5187             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5188             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5189                                 1, NULL_RTX, 1);
5190             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5191             tem = expand_binop (int_mode, xor_optab, op0, op1,
5192                                 NULL_RTX, 0, OPTAB_WIDEN);
5193             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5194                                  size - 1, NULL_RTX, 0);
5195             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5196                                 NULL_RTX, 0, OPTAB_WIDEN);
5197             tem = expand_binop (int_mode, sub_optab, tem, mask,
5198                                 NULL_RTX, 0, OPTAB_WIDEN);
5199             expand_inc (quotient, tem);
5200             tem = expand_binop (int_mode, xor_optab, mask, op1,
5201                                 NULL_RTX, 0, OPTAB_WIDEN);
5202             tem = expand_binop (int_mode, sub_optab, tem, mask,
5203                                 NULL_RTX, 0, OPTAB_WIDEN);
5204             expand_dec (remainder, tem);
5205             emit_label (label);
5206           }
5207         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5208
5209       default:
5210         gcc_unreachable ();
5211       }
5212
5213   if (quotient == 0)
5214     {
5215       if (target && GET_MODE (target) != compute_mode)
5216         target = 0;
5217
5218       if (rem_flag)
5219         {
5220           /* Try to produce the remainder without producing the quotient.
5221              If we seem to have a divmod pattern that does not require widening,
5222              don't try widening here.  We should really have a WIDEN argument
5223              to expand_twoval_binop, since what we'd really like to do here is
5224              1) try a mod insn in compute_mode
5225              2) try a divmod insn in compute_mode
5226              3) try a div insn in compute_mode and multiply-subtract to get
5227                 remainder
5228              4) try the same things with widening allowed.  */
5229           remainder
5230             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5231                                  op0, op1, target,
5232                                  unsignedp,
5233                                  ((optab_handler (optab2, compute_mode)
5234                                    != CODE_FOR_nothing)
5235                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5236           if (remainder == 0)
5237             {
5238               /* No luck there.  Can we do remainder and divide at once
5239                  without a library call?  */
5240               remainder = gen_reg_rtx (compute_mode);
5241               if (! expand_twoval_binop ((unsignedp
5242                                           ? udivmod_optab
5243                                           : sdivmod_optab),
5244                                          op0, op1,
5245                                          NULL_RTX, remainder, unsignedp))
5246                 remainder = 0;
5247             }
5248
5249           if (remainder)
5250             return gen_lowpart (mode, remainder);
5251         }
5252
5253       /* Produce the quotient.  Try a quotient insn, but not a library call.
5254          If we have a divmod in this mode, use it in preference to widening
5255          the div (for this test we assume it will not fail). Note that optab2
5256          is set to the one of the two optabs that the call below will use.  */
5257       quotient
5258         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5259                              op0, op1, rem_flag ? NULL_RTX : target,
5260                              unsignedp,
5261                              ((optab_handler (optab2, compute_mode)
5262                                != CODE_FOR_nothing)
5263                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5264
5265       if (quotient == 0)
5266         {
5267           /* No luck there.  Try a quotient-and-remainder insn,
5268              keeping the quotient alone.  */
5269           quotient = gen_reg_rtx (compute_mode);
5270           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5271                                      op0, op1,
5272                                      quotient, NULL_RTX, unsignedp))
5273             {
5274               quotient = 0;
5275               if (! rem_flag)
5276                 /* Still no luck.  If we are not computing the remainder,
5277                    use a library call for the quotient.  */
5278                 quotient = sign_expand_binop (compute_mode,
5279                                               udiv_optab, sdiv_optab,
5280                                               op0, op1, target,
5281                                               unsignedp, methods);
5282             }
5283         }
5284     }
5285
5286   if (rem_flag)
5287     {
5288       if (target && GET_MODE (target) != compute_mode)
5289         target = 0;
5290
5291       if (quotient == 0)
5292         {
5293           /* No divide instruction either.  Use library for remainder.  */
5294           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5295                                          op0, op1, target,
5296                                          unsignedp, methods);
5297           /* No remainder function.  Try a quotient-and-remainder
5298              function, keeping the remainder.  */
5299           if (!remainder
5300               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5301             {
5302               remainder = gen_reg_rtx (compute_mode);
5303               if (!expand_twoval_binop_libfunc
5304                   (unsignedp ? udivmod_optab : sdivmod_optab,
5305                    op0, op1,
5306                    NULL_RTX, remainder,
5307                    unsignedp ? UMOD : MOD))
5308                 remainder = NULL_RTX;
5309             }
5310         }
5311       else
5312         {
5313           /* We divided.  Now finish doing X - Y * (X / Y).  */
5314           remainder = expand_mult (compute_mode, quotient, op1,
5315                                    NULL_RTX, unsignedp);
5316           remainder = expand_binop (compute_mode, sub_optab, op0,
5317                                     remainder, target, unsignedp,
5318                                     methods);
5319         }
5320     }
5321
5322   if (methods != OPTAB_LIB_WIDEN
5323       && (rem_flag ? remainder : quotient) == NULL_RTX)
5324     return NULL_RTX;
5325
5326   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5327 }
5328 \f
5329 /* Return a tree node with data type TYPE, describing the value of X.
5330    Usually this is an VAR_DECL, if there is no obvious better choice.
5331    X may be an expression, however we only support those expressions
5332    generated by loop.c.  */
5333
5334 tree
5335 make_tree (tree type, rtx x)
5336 {
5337   tree t;
5338
5339   switch (GET_CODE (x))
5340     {
5341     case CONST_INT:
5342     case CONST_WIDE_INT:
5343       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5344       return t;
5345
5346     case CONST_DOUBLE:
5347       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5348       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5349         t = wide_int_to_tree (type,
5350                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5351                                                     HOST_BITS_PER_WIDE_INT * 2));
5352       else
5353         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5354
5355       return t;
5356
5357     case CONST_VECTOR:
5358       {
5359         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5360         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5361         tree itype = TREE_TYPE (type);
5362
5363         /* Build a tree with vector elements.  */
5364         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5365         unsigned int count = elts.encoded_nelts ();
5366         for (unsigned int i = 0; i < count; ++i)
5367           {
5368             rtx elt = CONST_VECTOR_ELT (x, i);
5369             elts.quick_push (make_tree (itype, elt));
5370           }
5371
5372         return elts.build ();
5373       }
5374
5375     case PLUS:
5376       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5377                           make_tree (type, XEXP (x, 1)));
5378
5379     case MINUS:
5380       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5381                           make_tree (type, XEXP (x, 1)));
5382
5383     case NEG:
5384       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5385
5386     case MULT:
5387       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5388                           make_tree (type, XEXP (x, 1)));
5389
5390     case ASHIFT:
5391       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5392                           make_tree (type, XEXP (x, 1)));
5393
5394     case LSHIFTRT:
5395       t = unsigned_type_for (type);
5396       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5397                                          make_tree (t, XEXP (x, 0)),
5398                                          make_tree (type, XEXP (x, 1))));
5399
5400     case ASHIFTRT:
5401       t = signed_type_for (type);
5402       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5403                                          make_tree (t, XEXP (x, 0)),
5404                                          make_tree (type, XEXP (x, 1))));
5405
5406     case DIV:
5407       if (TREE_CODE (type) != REAL_TYPE)
5408         t = signed_type_for (type);
5409       else
5410         t = type;
5411
5412       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5413                                          make_tree (t, XEXP (x, 0)),
5414                                          make_tree (t, XEXP (x, 1))));
5415     case UDIV:
5416       t = unsigned_type_for (type);
5417       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5418                                          make_tree (t, XEXP (x, 0)),
5419                                          make_tree (t, XEXP (x, 1))));
5420
5421     case SIGN_EXTEND:
5422     case ZERO_EXTEND:
5423       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5424                                           GET_CODE (x) == ZERO_EXTEND);
5425       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5426
5427     case CONST:
5428       return make_tree (type, XEXP (x, 0));
5429
5430     case SYMBOL_REF:
5431       t = SYMBOL_REF_DECL (x);
5432       if (t)
5433         return fold_convert (type, build_fold_addr_expr (t));
5434       /* fall through.  */
5435
5436     default:
5437       if (CONST_POLY_INT_P (x))
5438         return wide_int_to_tree (t, const_poly_int_value (x));
5439
5440       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5441
5442       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5443          address mode to pointer mode.  */
5444       if (POINTER_TYPE_P (type))
5445         x = convert_memory_address_addr_space
5446           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5447
5448       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5449          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5450       t->decl_with_rtl.rtl = x;
5451
5452       return t;
5453     }
5454 }
5455 \f
5456 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5457    and returning TARGET.
5458
5459    If TARGET is 0, a pseudo-register or constant is returned.  */
5460
5461 rtx
5462 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5463 {
5464   rtx tem = 0;
5465
5466   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5467     tem = simplify_binary_operation (AND, mode, op0, op1);
5468   if (tem == 0)
5469     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5470
5471   if (target == 0)
5472     target = tem;
5473   else if (tem != target)
5474     emit_move_insn (target, tem);
5475   return target;
5476 }
5477
5478 /* Helper function for emit_store_flag.  */
5479 rtx
5480 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5481              machine_mode mode, machine_mode compare_mode,
5482              int unsignedp, rtx x, rtx y, int normalizep,
5483              machine_mode target_mode)
5484 {
5485   class expand_operand ops[4];
5486   rtx op0, comparison, subtarget;
5487   rtx_insn *last;
5488   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5489   scalar_int_mode int_target_mode;
5490
5491   last = get_last_insn ();
5492   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5493   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5494   if (!x || !y)
5495     {
5496       delete_insns_since (last);
5497       return NULL_RTX;
5498     }
5499
5500   if (target_mode == VOIDmode)
5501     int_target_mode = result_mode;
5502   else
5503     int_target_mode = as_a <scalar_int_mode> (target_mode);
5504   if (!target)
5505     target = gen_reg_rtx (int_target_mode);
5506
5507   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5508
5509   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5510   create_fixed_operand (&ops[1], comparison);
5511   create_fixed_operand (&ops[2], x);
5512   create_fixed_operand (&ops[3], y);
5513   if (!maybe_expand_insn (icode, 4, ops))
5514     {
5515       delete_insns_since (last);
5516       return NULL_RTX;
5517     }
5518   subtarget = ops[0].value;
5519
5520   /* If we are converting to a wider mode, first convert to
5521      INT_TARGET_MODE, then normalize.  This produces better combining
5522      opportunities on machines that have a SIGN_EXTRACT when we are
5523      testing a single bit.  This mostly benefits the 68k.
5524
5525      If STORE_FLAG_VALUE does not have the sign bit set when
5526      interpreted in MODE, we can do this conversion as unsigned, which
5527      is usually more efficient.  */
5528   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5529     {
5530       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5531                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5532
5533       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5534       convert_move (target, subtarget, unsignedp);
5535
5536       op0 = target;
5537       result_mode = int_target_mode;
5538     }
5539   else
5540     op0 = subtarget;
5541
5542   /* If we want to keep subexpressions around, don't reuse our last
5543      target.  */
5544   if (optimize)
5545     subtarget = 0;
5546
5547   /* Now normalize to the proper value in MODE.  Sometimes we don't
5548      have to do anything.  */
5549   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5550     ;
5551   /* STORE_FLAG_VALUE might be the most negative number, so write
5552      the comparison this way to avoid a compiler-time warning.  */
5553   else if (- normalizep == STORE_FLAG_VALUE)
5554     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5555
5556   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5557      it hard to use a value of just the sign bit due to ANSI integer
5558      constant typing rules.  */
5559   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5560     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5561                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5562                         normalizep == 1);
5563   else
5564     {
5565       gcc_assert (STORE_FLAG_VALUE & 1);
5566
5567       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5568       if (normalizep == -1)
5569         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5570     }
5571
5572   /* If we were converting to a smaller mode, do the conversion now.  */
5573   if (int_target_mode != result_mode)
5574     {
5575       convert_move (target, op0, 0);
5576       return target;
5577     }
5578   else
5579     return op0;
5580 }
5581
5582
5583 /* A subroutine of emit_store_flag only including "tricks" that do not
5584    need a recursive call.  These are kept separate to avoid infinite
5585    loops.  */
5586
5587 static rtx
5588 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5589                    machine_mode mode, int unsignedp, int normalizep,
5590                    machine_mode target_mode)
5591 {
5592   rtx subtarget;
5593   enum insn_code icode;
5594   machine_mode compare_mode;
5595   enum mode_class mclass;
5596   enum rtx_code scode;
5597
5598   if (unsignedp)
5599     code = unsigned_condition (code);
5600   scode = swap_condition (code);
5601
5602   /* If one operand is constant, make it the second one.  Only do this
5603      if the other operand is not constant as well.  */
5604
5605   if (swap_commutative_operands_p (op0, op1))
5606     {
5607       std::swap (op0, op1);
5608       code = swap_condition (code);
5609     }
5610
5611   if (mode == VOIDmode)
5612     mode = GET_MODE (op0);
5613
5614   if (CONST_SCALAR_INT_P (op1))
5615     canonicalize_comparison (mode, &code, &op1);
5616
5617   /* For some comparisons with 1 and -1, we can convert this to
5618      comparisons with zero.  This will often produce more opportunities for
5619      store-flag insns.  */
5620
5621   switch (code)
5622     {
5623     case LT:
5624       if (op1 == const1_rtx)
5625         op1 = const0_rtx, code = LE;
5626       break;
5627     case LE:
5628       if (op1 == constm1_rtx)
5629         op1 = const0_rtx, code = LT;
5630       break;
5631     case GE:
5632       if (op1 == const1_rtx)
5633         op1 = const0_rtx, code = GT;
5634       break;
5635     case GT:
5636       if (op1 == constm1_rtx)
5637         op1 = const0_rtx, code = GE;
5638       break;
5639     case GEU:
5640       if (op1 == const1_rtx)
5641         op1 = const0_rtx, code = NE;
5642       break;
5643     case LTU:
5644       if (op1 == const1_rtx)
5645         op1 = const0_rtx, code = EQ;
5646       break;
5647     default:
5648       break;
5649     }
5650
5651   /* If we are comparing a double-word integer with zero or -1, we can
5652      convert the comparison into one involving a single word.  */
5653   scalar_int_mode int_mode;
5654   if (is_int_mode (mode, &int_mode)
5655       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5656       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5657     {
5658       rtx tem;
5659       if ((code == EQ || code == NE)
5660           && (op1 == const0_rtx || op1 == constm1_rtx))
5661         {
5662           rtx op00, op01;
5663
5664           /* Do a logical OR or AND of the two words and compare the
5665              result.  */
5666           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5667           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5668           tem = expand_binop (word_mode,
5669                               op1 == const0_rtx ? ior_optab : and_optab,
5670                               op00, op01, NULL_RTX, unsignedp,
5671                               OPTAB_DIRECT);
5672
5673           if (tem != 0)
5674             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5675                                    unsignedp, normalizep);
5676         }
5677       else if ((code == LT || code == GE) && op1 == const0_rtx)
5678         {
5679           rtx op0h;
5680
5681           /* If testing the sign bit, can just test on high word.  */
5682           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5683                                       subreg_highpart_offset (word_mode,
5684                                                               int_mode));
5685           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5686                                  unsignedp, normalizep);
5687         }
5688       else
5689         tem = NULL_RTX;
5690
5691       if (tem)
5692         {
5693           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5694             return tem;
5695           if (!target)
5696             target = gen_reg_rtx (target_mode);
5697
5698           convert_move (target, tem,
5699                         !val_signbit_known_set_p (word_mode,
5700                                                   (normalizep ? normalizep
5701                                                    : STORE_FLAG_VALUE)));
5702           return target;
5703         }
5704     }
5705
5706   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5707      complement of A (for GE) and shifting the sign bit to the low bit.  */
5708   if (op1 == const0_rtx && (code == LT || code == GE)
5709       && is_int_mode (mode, &int_mode)
5710       && (normalizep || STORE_FLAG_VALUE == 1
5711           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5712     {
5713       scalar_int_mode int_target_mode;
5714       subtarget = target;
5715
5716       if (!target)
5717         int_target_mode = int_mode;
5718       else
5719         {
5720           /* If the result is to be wider than OP0, it is best to convert it
5721              first.  If it is to be narrower, it is *incorrect* to convert it
5722              first.  */
5723           int_target_mode = as_a <scalar_int_mode> (target_mode);
5724           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5725             {
5726               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5727               int_mode = int_target_mode;
5728             }
5729         }
5730
5731       if (int_target_mode != int_mode)
5732         subtarget = 0;
5733
5734       if (code == GE)
5735         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5736                            ((STORE_FLAG_VALUE == 1 || normalizep)
5737                             ? 0 : subtarget), 0);
5738
5739       if (STORE_FLAG_VALUE == 1 || normalizep)
5740         /* If we are supposed to produce a 0/1 value, we want to do
5741            a logical shift from the sign bit to the low-order bit; for
5742            a -1/0 value, we do an arithmetic shift.  */
5743         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5744                             GET_MODE_BITSIZE (int_mode) - 1,
5745                             subtarget, normalizep != -1);
5746
5747       if (int_mode != int_target_mode)
5748         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5749
5750       return op0;
5751     }
5752
5753   mclass = GET_MODE_CLASS (mode);
5754   FOR_EACH_MODE_FROM (compare_mode, mode)
5755     {
5756      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5757      icode = optab_handler (cstore_optab, optab_mode);
5758      if (icode != CODE_FOR_nothing)
5759         {
5760           do_pending_stack_adjust ();
5761           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5762                                  unsignedp, op0, op1, normalizep, target_mode);
5763           if (tem)
5764             return tem;
5765
5766           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5767             {
5768               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5769                                  unsignedp, op1, op0, normalizep, target_mode);
5770               if (tem)
5771                 return tem;
5772             }
5773           break;
5774         }
5775     }
5776
5777   return 0;
5778 }
5779
5780 /* Subroutine of emit_store_flag that handles cases in which the operands
5781    are scalar integers.  SUBTARGET is the target to use for temporary
5782    operations and TRUEVAL is the value to store when the condition is
5783    true.  All other arguments are as for emit_store_flag.  */
5784
5785 rtx
5786 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5787                      rtx op1, scalar_int_mode mode, int unsignedp,
5788                      int normalizep, rtx trueval)
5789 {
5790   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5791   rtx_insn *last = get_last_insn ();
5792
5793   /* If this is an equality comparison of integers, we can try to exclusive-or
5794      (or subtract) the two operands and use a recursive call to try the
5795      comparison with zero.  Don't do any of these cases if branches are
5796      very cheap.  */
5797
5798   if ((code == EQ || code == NE) && op1 != const0_rtx)
5799     {
5800       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5801                               OPTAB_WIDEN);
5802
5803       if (tem == 0)
5804         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5805                             OPTAB_WIDEN);
5806       if (tem != 0)
5807         tem = emit_store_flag (target, code, tem, const0_rtx,
5808                                mode, unsignedp, normalizep);
5809       if (tem != 0)
5810         return tem;
5811
5812       delete_insns_since (last);
5813     }
5814
5815   /* For integer comparisons, try the reverse comparison.  However, for
5816      small X and if we'd have anyway to extend, implementing "X != 0"
5817      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5818   rtx_code rcode = reverse_condition (code);
5819   if (can_compare_p (rcode, mode, ccp_store_flag)
5820       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5821             && code == NE
5822             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5823             && op1 == const0_rtx))
5824     {
5825       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5826                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5827
5828       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5829       if (want_add
5830           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5831                        optimize_insn_for_speed_p ()) == 0)
5832         {
5833           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5834                                        STORE_FLAG_VALUE, target_mode);
5835           if (tem != 0)
5836             tem = expand_binop (target_mode, add_optab, tem,
5837                                 gen_int_mode (normalizep, target_mode),
5838                                 target, 0, OPTAB_WIDEN);
5839           if (tem != 0)
5840             return tem;
5841         }
5842       else if (!want_add
5843                && rtx_cost (trueval, mode, XOR, 1,
5844                             optimize_insn_for_speed_p ()) == 0)
5845         {
5846           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5847                                        normalizep, target_mode);
5848           if (tem != 0)
5849             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5850                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5851           if (tem != 0)
5852             return tem;
5853         }
5854
5855       delete_insns_since (last);
5856     }
5857
5858   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5859      the constant zero.  Reject all other comparisons at this point.  Only
5860      do LE and GT if branches are expensive since they are expensive on
5861      2-operand machines.  */
5862
5863   if (op1 != const0_rtx
5864       || (code != EQ && code != NE
5865           && (BRANCH_COST (optimize_insn_for_speed_p (),
5866                            false) <= 1 || (code != LE && code != GT))))
5867     return 0;
5868
5869   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5870      do the necessary operation below.  */
5871
5872   rtx tem = 0;
5873
5874   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5875      the sign bit set.  */
5876
5877   if (code == LE)
5878     {
5879       /* This is destructive, so SUBTARGET can't be OP0.  */
5880       if (rtx_equal_p (subtarget, op0))
5881         subtarget = 0;
5882
5883       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5884                           OPTAB_WIDEN);
5885       if (tem)
5886         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5887                             OPTAB_WIDEN);
5888     }
5889
5890   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5891      number of bits in the mode of OP0, minus one.  */
5892
5893   if (code == GT)
5894     {
5895       if (rtx_equal_p (subtarget, op0))
5896         subtarget = 0;
5897
5898       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5899                                 GET_MODE_BITSIZE (mode) - 1,
5900                                 subtarget, 0);
5901       if (tem)
5902         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5903                             OPTAB_WIDEN);
5904     }
5905
5906   if (code == EQ || code == NE)
5907     {
5908       /* For EQ or NE, one way to do the comparison is to apply an operation
5909          that converts the operand into a positive number if it is nonzero
5910          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5911          for NE we negate.  This puts the result in the sign bit.  Then we
5912          normalize with a shift, if needed.
5913
5914          Two operations that can do the above actions are ABS and FFS, so try
5915          them.  If that doesn't work, and MODE is smaller than a full word,
5916          we can use zero-extension to the wider mode (an unsigned conversion)
5917          as the operation.  */
5918
5919       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5920          that is compensated by the subsequent overflow when subtracting
5921          one / negating.  */
5922
5923       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5924         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5925       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5926         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5927       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5928         {
5929           tem = convert_modes (word_mode, mode, op0, 1);
5930           mode = word_mode;
5931         }
5932
5933       if (tem != 0)
5934         {
5935           if (code == EQ)
5936             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5937                                 0, OPTAB_WIDEN);
5938           else
5939             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5940         }
5941
5942       /* If we couldn't do it that way, for NE we can "or" the two's complement
5943          of the value with itself.  For EQ, we take the one's complement of
5944          that "or", which is an extra insn, so we only handle EQ if branches
5945          are expensive.  */
5946
5947       if (tem == 0
5948           && (code == NE
5949               || BRANCH_COST (optimize_insn_for_speed_p (),
5950                               false) > 1))
5951         {
5952           if (rtx_equal_p (subtarget, op0))
5953             subtarget = 0;
5954
5955           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5956           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5957                               OPTAB_WIDEN);
5958
5959           if (tem && code == EQ)
5960             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5961         }
5962     }
5963
5964   if (tem && normalizep)
5965     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5966                               GET_MODE_BITSIZE (mode) - 1,
5967                               subtarget, normalizep == 1);
5968
5969   if (tem)
5970     {
5971       if (!target)
5972         ;
5973       else if (GET_MODE (tem) != target_mode)
5974         {
5975           convert_move (target, tem, 0);
5976           tem = target;
5977         }
5978       else if (!subtarget)
5979         {
5980           emit_move_insn (target, tem);
5981           tem = target;
5982         }
5983     }
5984   else
5985     delete_insns_since (last);
5986
5987   return tem;
5988 }
5989
5990 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5991    and storing in TARGET.  Normally return TARGET.
5992    Return 0 if that cannot be done.
5993
5994    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5995    it is VOIDmode, they cannot both be CONST_INT.
5996
5997    UNSIGNEDP is for the case where we have to widen the operands
5998    to perform the operation.  It says to use zero-extension.
5999
6000    NORMALIZEP is 1 if we should convert the result to be either zero
6001    or one.  Normalize is -1 if we should convert the result to be
6002    either zero or -1.  If NORMALIZEP is zero, the result will be left
6003    "raw" out of the scc insn.  */
6004
6005 rtx
6006 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6007                  machine_mode mode, int unsignedp, int normalizep)
6008 {
6009   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6010   enum rtx_code rcode;
6011   rtx subtarget;
6012   rtx tem, trueval;
6013   rtx_insn *last;
6014
6015   /* If we compare constants, we shouldn't use a store-flag operation,
6016      but a constant load.  We can get there via the vanilla route that
6017      usually generates a compare-branch sequence, but will in this case
6018      fold the comparison to a constant, and thus elide the branch.  */
6019   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6020     return NULL_RTX;
6021
6022   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6023                            target_mode);
6024   if (tem)
6025     return tem;
6026
6027   /* If we reached here, we can't do this with a scc insn, however there
6028      are some comparisons that can be done in other ways.  Don't do any
6029      of these cases if branches are very cheap.  */
6030   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6031     return 0;
6032
6033   /* See what we need to return.  We can only return a 1, -1, or the
6034      sign bit.  */
6035
6036   if (normalizep == 0)
6037     {
6038       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6039         normalizep = STORE_FLAG_VALUE;
6040
6041       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6042         ;
6043       else
6044         return 0;
6045     }
6046
6047   last = get_last_insn ();
6048
6049   /* If optimizing, use different pseudo registers for each insn, instead
6050      of reusing the same pseudo.  This leads to better CSE, but slows
6051      down the compiler, since there are more pseudos.  */
6052   subtarget = (!optimize
6053                && (target_mode == mode)) ? target : NULL_RTX;
6054   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6055
6056   /* For floating-point comparisons, try the reverse comparison or try
6057      changing the "orderedness" of the comparison.  */
6058   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6059     {
6060       enum rtx_code first_code;
6061       bool and_them;
6062
6063       rcode = reverse_condition_maybe_unordered (code);
6064       if (can_compare_p (rcode, mode, ccp_store_flag)
6065           && (code == ORDERED || code == UNORDERED
6066               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6067               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6068         {
6069           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6070                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6071
6072           /* For the reverse comparison, use either an addition or a XOR.  */
6073           if (want_add
6074               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6075                            optimize_insn_for_speed_p ()) == 0)
6076             {
6077               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6078                                        STORE_FLAG_VALUE, target_mode);
6079               if (tem)
6080                 return expand_binop (target_mode, add_optab, tem,
6081                                      gen_int_mode (normalizep, target_mode),
6082                                      target, 0, OPTAB_WIDEN);
6083             }
6084           else if (!want_add
6085                    && rtx_cost (trueval, mode, XOR, 1,
6086                                 optimize_insn_for_speed_p ()) == 0)
6087             {
6088               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6089                                        normalizep, target_mode);
6090               if (tem)
6091                 return expand_binop (target_mode, xor_optab, tem, trueval,
6092                                      target, INTVAL (trueval) >= 0,
6093                                      OPTAB_WIDEN);
6094             }
6095         }
6096
6097       delete_insns_since (last);
6098
6099       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6100       if (code == ORDERED || code == UNORDERED)
6101         return 0;
6102
6103       and_them = split_comparison (code, mode, &first_code, &code);
6104
6105       /* If there are no NaNs, the first comparison should always fall through.
6106          Effectively change the comparison to the other one.  */
6107       if (!HONOR_NANS (mode))
6108         {
6109           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6110           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6111                                     target_mode);
6112         }
6113
6114       if (!HAVE_conditional_move)
6115         return 0;
6116
6117       /* Do not turn a trapping comparison into a non-trapping one.  */
6118       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6119           && flag_trapping_math)
6120         return 0;
6121
6122       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6123          conditional move.  */
6124       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6125                                normalizep, target_mode);
6126       if (tem == 0)
6127         return 0;
6128
6129       if (and_them)
6130         tem = emit_conditional_move (target, code, op0, op1, mode,
6131                                      tem, const0_rtx, GET_MODE (tem), 0);
6132       else
6133         tem = emit_conditional_move (target, code, op0, op1, mode,
6134                                      trueval, tem, GET_MODE (tem), 0);
6135
6136       if (tem == 0)
6137         delete_insns_since (last);
6138       return tem;
6139     }
6140
6141   /* The remaining tricks only apply to integer comparisons.  */
6142
6143   scalar_int_mode int_mode;
6144   if (is_int_mode (mode, &int_mode))
6145     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6146                                 unsignedp, normalizep, trueval);
6147
6148   return 0;
6149 }
6150
6151 /* Like emit_store_flag, but always succeeds.  */
6152
6153 rtx
6154 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6155                        machine_mode mode, int unsignedp, int normalizep)
6156 {
6157   rtx tem;
6158   rtx_code_label *label;
6159   rtx trueval, falseval;
6160
6161   /* First see if emit_store_flag can do the job.  */
6162   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6163   if (tem != 0)
6164     return tem;
6165
6166   /* If one operand is constant, make it the second one.  Only do this
6167      if the other operand is not constant as well.  */
6168   if (swap_commutative_operands_p (op0, op1))
6169     {
6170       std::swap (op0, op1);
6171       code = swap_condition (code);
6172     }
6173
6174   if (mode == VOIDmode)
6175     mode = GET_MODE (op0);
6176
6177   if (!target)
6178     target = gen_reg_rtx (word_mode);
6179
6180   /* If this failed, we have to do this with set/compare/jump/set code.
6181      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6182   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6183   if (code == NE
6184       && GET_MODE_CLASS (mode) == MODE_INT
6185       && REG_P (target)
6186       && op0 == target
6187       && op1 == const0_rtx)
6188     {
6189       label = gen_label_rtx ();
6190       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6191                                NULL_RTX, NULL, label,
6192                                profile_probability::uninitialized ());
6193       emit_move_insn (target, trueval);
6194       emit_label (label);
6195       return target;
6196     }
6197
6198   if (!REG_P (target)
6199       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6200     target = gen_reg_rtx (GET_MODE (target));
6201
6202   /* Jump in the right direction if the target cannot implement CODE
6203      but can jump on its reverse condition.  */
6204   falseval = const0_rtx;
6205   if (! can_compare_p (code, mode, ccp_jump)
6206       && (! FLOAT_MODE_P (mode)
6207           || code == ORDERED || code == UNORDERED
6208           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6209           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6210     {
6211       enum rtx_code rcode;
6212       if (FLOAT_MODE_P (mode))
6213         rcode = reverse_condition_maybe_unordered (code);
6214       else
6215         rcode = reverse_condition (code);
6216
6217       /* Canonicalize to UNORDERED for the libcall.  */
6218       if (can_compare_p (rcode, mode, ccp_jump)
6219           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6220         {
6221           falseval = trueval;
6222           trueval = const0_rtx;
6223           code = rcode;
6224         }
6225     }
6226
6227   emit_move_insn (target, trueval);
6228   label = gen_label_rtx ();
6229   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6230                            label, profile_probability::uninitialized ());
6231
6232   emit_move_insn (target, falseval);
6233   emit_label (label);
6234
6235   return target;
6236 }
6237
6238 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6239    and exclusive ranges in order to create an equivalent comparison.  See
6240    canonicalize_cmp_for_target for the possible cases.  */
6241
6242 static enum rtx_code
6243 equivalent_cmp_code (enum rtx_code code)
6244 {
6245   switch (code)
6246     {
6247     case GT:
6248       return GE;
6249     case GE:
6250       return GT;
6251     case LT:
6252       return LE;
6253     case LE:
6254       return LT;
6255     case GTU:
6256       return GEU;
6257     case GEU:
6258       return GTU;
6259     case LTU:
6260       return LEU;
6261     case LEU:
6262       return LTU;
6263
6264     default:
6265       return code;
6266     }
6267 }
6268
6269 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6270    purpose of this is to end up with an immediate which can be loaded into a
6271    register in fewer moves, if possible.
6272
6273    For each integer comparison there exists an equivalent choice:
6274      i)   a >  b or a >= b + 1
6275      ii)  a <= b or a <  b + 1
6276      iii) a >= b or a >  b - 1
6277      iv)  a <  b or a <= b - 1
6278
6279    MODE is the mode of the first operand.
6280    CODE points to the comparison code.
6281    IMM points to the rtx containing the immediate.  *IMM must satisfy
6282    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6283    on exit.  */
6284
6285 void
6286 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6287 {
6288   if (!SCALAR_INT_MODE_P (mode))
6289     return;
6290
6291   int to_add = 0;
6292   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6293
6294   /* Extract the immediate value from the rtx.  */
6295   wide_int imm_val = rtx_mode_t (*imm, mode);
6296
6297   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6298     to_add = 1;
6299   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6300     to_add = -1;
6301   else
6302     return;
6303
6304   /* Check for overflow/underflow in the case of signed values and
6305      wrapping around in the case of unsigned values.  If any occur
6306      cancel the optimization.  */
6307   wi::overflow_type overflow = wi::OVF_NONE;
6308   wide_int imm_modif;
6309
6310   if (to_add == 1)
6311     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6312   else
6313     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6314
6315   if (overflow)
6316     return;
6317
6318   /* The following creates a pseudo; if we cannot do that, bail out.  */
6319   if (!can_create_pseudo_p ())
6320     return;
6321
6322   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6323   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6324
6325   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6326   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6327
6328   /* Update the immediate and the code.  */
6329   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6330     {
6331       *code = equivalent_cmp_code (*code);
6332       *imm = new_imm;
6333     }
6334 }
6335
6336
6337 \f
6338 /* Perform possibly multi-word comparison and conditional jump to LABEL
6339    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6340    now a thin wrapper around do_compare_rtx_and_jump.  */
6341
6342 static void
6343 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6344                  rtx_code_label *label)
6345 {
6346   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6347   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6348                            NULL, label, profile_probability::uninitialized ());
6349 }