gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2021 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158   /* Restore all->reg's mode.  */
 159   PUT_MODE (all->reg, to_mode);
 160 }
 161
 162 static void
 163 init_expmed_one_mode (struct init_expmed_rtl *all,
 164                       machine_mode mode, int speed)
 165 {
 166   int m, n, mode_bitsize;
 167   machine_mode mode_from;
 168
 169   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 170
 171   PUT_MODE (all->reg, mode);
 172   PUT_MODE (all->plus, mode);
 173   PUT_MODE (all->neg, mode);
 174   PUT_MODE (all->mult, mode);
 175   PUT_MODE (all->sdiv, mode);
 176   PUT_MODE (all->udiv, mode);
 177   PUT_MODE (all->sdiv_32, mode);
 178   PUT_MODE (all->smod_32, mode);
 179   PUT_MODE (all->wide_trunc, mode);
 180   PUT_MODE (all->shift, mode);
 181   PUT_MODE (all->shift_mult, mode);
 182   PUT_MODE (all->shift_add, mode);
 183   PUT_MODE (all->shift_sub0, mode);
 184   PUT_MODE (all->shift_sub1, mode);
 185   PUT_MODE (all->zext, mode);
 186   PUT_MODE (all->trunc, mode);
 187
 188   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 189   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 190   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 191   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 192   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 193
 194   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 195                                      <= 2 * add_cost (speed, mode)));
 196   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 197                                      <= 4 * add_cost (speed, mode)));
 198
 199   set_shift_cost (speed, mode, 0, 0);
 200   {
 201     int cost = add_cost (speed, mode);
 202     set_shiftadd_cost (speed, mode, 0, cost);
 203     set_shiftsub0_cost (speed, mode, 0, cost);
 204     set_shiftsub1_cost (speed, mode, 0, cost);
 205   }
 206
 207   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 208   for (m = 1; m < n; m++)
 209     {
 210       XEXP (all->shift, 1) = all->cint[m];
 211       XEXP (all->shift_mult, 1) = all->pow2[m];
 212
 213       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 214       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 215                                                        speed));
 216       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 217                                                         speed));
 218       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 219                                                         speed));
 220     }
 221
 222   scalar_int_mode int_mode_to;
 223   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 224     {
 225       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 226            mode_from = (machine_mode)(mode_from + 1))
 227         init_expmed_one_conv (all, int_mode_to,
 228                               as_a <scalar_int_mode> (mode_from), speed);
 229
 230       scalar_int_mode wider_mode;
 231       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 232           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 233         {
 234           PUT_MODE (all->reg, mode);
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1)
 239             = gen_int_shift_amount (wider_mode, mode_bitsize);
 240
 241           set_mul_widen_cost (speed, wider_mode,
 242                               set_src_cost (all->wide_mult, wider_mode, speed));
 243           set_mul_highpart_cost (speed, int_mode_to,
 244                                  set_src_cost (all->wide_trunc,
 245                                                int_mode_to, speed));
 246         }
 247     }
 248 }
 249
 250 void
 251 init_expmed (void)
 252 {
 253   struct init_expmed_rtl all;
 254   machine_mode mode = QImode;
 255   int m, speed;
 256
 257   memset (&all, 0, sizeof all);
 258   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 259     {
 260       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 261       all.cint[m] = GEN_INT (m);
 262     }
 263
 264   /* Avoid using hard regs in ways which may be unsupported.  */
 265   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 266   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 267   all.neg = gen_rtx_NEG (mode, all.reg);
 268   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 269   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 270   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 271   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 272   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 273   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 274   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 275   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 276   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 277   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 278   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 279   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 280   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 282   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312
 313   ggc_free (all.trunc);
 314   ggc_free (all.shift_sub1);
 315   ggc_free (all.shift_sub0);
 316   ggc_free (all.shift_add);
 317   ggc_free (all.shift_mult);
 318   ggc_free (all.shift);
 319   ggc_free (all.wide_trunc);
 320   ggc_free (all.wide_lshr);
 321   ggc_free (all.wide_mult);
 322   ggc_free (all.zext);
 323   ggc_free (all.smod_32);
 324   ggc_free (all.sdiv_32);
 325   ggc_free (all.udiv);
 326   ggc_free (all.sdiv);
 327   ggc_free (all.mult);
 328   ggc_free (all.neg);
 329   ggc_free (all.plus);
 330   ggc_free (all.reg);
 331 }
 332
 333 /* Return an rtx representing minus the value of X.
 334    MODE is the intended mode of the result,
 335    useful if X is a CONST_INT.  */
 336
 337 rtx
 338 negate_rtx (machine_mode mode, rtx x)
 339 {
 340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 341
 342   if (result == 0)
 343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 344
 345   return result;
 346 }
 347
 348 /* Whether reverse storage order is supported on the target.  */
 349 static int reverse_storage_order_supported = -1;
 350
 351 /* Check whether reverse storage order is supported on the target.  */
 352
 353 static void
 354 check_reverse_storage_order_support (void)
 355 {
 356   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 357     {
 358       reverse_storage_order_supported = 0;
 359       sorry ("reverse scalar storage order");
 360     }
 361   else
 362     reverse_storage_order_supported = 1;
 363 }
 364
 365 /* Whether reverse FP storage order is supported on the target.  */
 366 static int reverse_float_storage_order_supported = -1;
 367
 368 /* Check whether reverse FP storage order is supported on the target.  */
 369
 370 static void
 371 check_reverse_float_storage_order_support (void)
 372 {
 373   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 374     {
 375       reverse_float_storage_order_supported = 0;
 376       sorry ("reverse floating-point scalar storage order");
 377     }
 378   else
 379     reverse_float_storage_order_supported = 1;
 380 }
 381
 382 /* Return an rtx representing value of X with reverse storage order.
 383    MODE is the intended mode of the result,
 384    useful if X is a CONST_INT.  */
 385
 386 rtx
 387 flip_storage_order (machine_mode mode, rtx x)
 388 {
 389   scalar_int_mode int_mode;
 390   rtx result;
 391
 392   if (mode == QImode)
 393     return x;
 394
 395   if (COMPLEX_MODE_P (mode))
 396     {
 397       rtx real = read_complex_part (x, false);
 398       rtx imag = read_complex_part (x, true);
 399
 400       real = flip_storage_order (GET_MODE_INNER (mode), real);
 401       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 402
 403       return gen_rtx_CONCAT (mode, real, imag);
 404     }
 405
 406   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 407     check_reverse_storage_order_support ();
 408
 409   if (!is_a <scalar_int_mode> (mode, &int_mode))
 410     {
 411       if (FLOAT_MODE_P (mode)
 412           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 413         check_reverse_float_storage_order_support ();
 414
 415       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 416           || !targetm.scalar_mode_supported_p (int_mode))
 417         {
 418           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 419           return x;
 420         }
 421       x = gen_lowpart (int_mode, x);
 422     }
 423
 424   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 425   if (result == 0)
 426     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 427
 428   if (int_mode != mode)
 429     result = gen_lowpart (mode, result);
 430
 431   return result;
 432 }
 433
 434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 435    first unit of mode MODE that contains a bitfield of size BITSIZE at
 436    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 437    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 438    of the field within the new memory.  */
 439
 440 static rtx
 441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 442                       unsigned HOST_WIDE_INT bitsize,
 443                       unsigned HOST_WIDE_INT bitnum,
 444                       unsigned HOST_WIDE_INT *new_bitnum)
 445 {
 446   scalar_int_mode imode;
 447   if (mode.exists (&imode))
 448     {
 449       unsigned int unit = GET_MODE_BITSIZE (imode);
 450       *new_bitnum = bitnum % unit;
 451       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 452       return adjust_bitfield_address (mem, imode, offset);
 453     }
 454   else
 455     {
 456       *new_bitnum = bitnum % BITS_PER_UNIT;
 457       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 458       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 459                             / BITS_PER_UNIT);
 460       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 461     }
 462 }
 463
 464 /* The caller wants to perform insertion or extraction PATTERN on a
 465    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 466    BITREGION_START and BITREGION_END are as for store_bit_field
 467    and FIELDMODE is the natural mode of the field.
 468
 469    Search for a mode that is compatible with the memory access
 470    restrictions and (where applicable) with a register insertion or
 471    extraction.  Return the new memory on success, storing the adjusted
 472    bit position in *NEW_BITNUM.  Return null otherwise.  */
 473
 474 static rtx
 475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 476                               rtx op0, HOST_WIDE_INT bitsize,
 477                               HOST_WIDE_INT bitnum,
 478                               poly_uint64 bitregion_start,
 479                               poly_uint64 bitregion_end,
 480                               machine_mode fieldmode,
 481                               unsigned HOST_WIDE_INT *new_bitnum)
 482 {
 483   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 484                                 bitregion_end, MEM_ALIGN (op0),
 485                                 MEM_VOLATILE_P (op0));
 486   scalar_int_mode best_mode;
 487   if (iter.next_mode (&best_mode))
 488     {
 489       /* We can use a memory in BEST_MODE.  See whether this is true for
 490          any wider modes.  All other things being equal, we prefer to
 491          use the widest mode possible because it tends to expose more
 492          CSE opportunities.  */
 493       if (!iter.prefer_smaller_modes ())
 494         {
 495           /* Limit the search to the mode required by the corresponding
 496              register insertion or extraction instruction, if any.  */
 497           scalar_int_mode limit_mode = word_mode;
 498           extraction_insn insn;
 499           if (get_best_reg_extraction_insn (&insn, pattern,
 500                                             GET_MODE_BITSIZE (best_mode),
 501                                             fieldmode))
 502             limit_mode = insn.field_mode;
 503
 504           scalar_int_mode wider_mode;
 505           while (iter.next_mode (&wider_mode)
 506                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 507             best_mode = wider_mode;
 508         }
 509       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 510                                    new_bitnum);
 511     }
 512   return NULL_RTX;
 513 }
 514
 515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 516    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 517    offset is then BITNUM / BITS_PER_UNIT.  */
 518
 519 static bool
 520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 521                      machine_mode struct_mode)
 522 {
 523   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 524   if (BYTES_BIG_ENDIAN)
 525     return (multiple_p (bitnum, BITS_PER_UNIT)
 526             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 527                 || multiple_p (bitnum + bitsize,
 528                                regsize * BITS_PER_UNIT)));
 529   else
 530     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 531 }
 532
 533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 534    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 535    Return false if the access would touch memory outside the range
 536    BITREGION_START to BITREGION_END for conformance to the C++ memory
 537    model.  */
 538
 539 static bool
 540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 541                             unsigned HOST_WIDE_INT bitnum,
 542                             scalar_int_mode fieldmode,
 543                             poly_uint64 bitregion_start,
 544                             poly_uint64 bitregion_end)
 545 {
 546   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 547
 548   /* -fstrict-volatile-bitfields must be enabled and we must have a
 549      volatile MEM.  */
 550   if (!MEM_P (op0)
 551       || !MEM_VOLATILE_P (op0)
 552       || flag_strict_volatile_bitfields <= 0)
 553     return false;
 554
 555   /* The bit size must not be larger than the field mode, and
 556      the field mode must not be larger than a word.  */
 557   if (bitsize > modesize || modesize > BITS_PER_WORD)
 558     return false;
 559
 560   /* Check for cases of unaligned fields that must be split.  */
 561   if (bitnum % modesize + bitsize > modesize)
 562     return false;
 563
 564   /* The memory must be sufficiently aligned for a MODESIZE access.
 565      This condition guarantees, that the memory access will not
 566      touch anything after the end of the structure.  */
 567   if (MEM_ALIGN (op0) < modesize)
 568     return false;
 569
 570   /* Check for cases where the C++ memory model applies.  */
 571   if (maybe_ne (bitregion_end, 0U)
 572       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 573           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 574                        bitregion_end)))
 575     return false;
 576
 577   return true;
 578 }
 579
 580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 581    bit number BITNUM can be treated as a simple value of mode MODE.
 582    Store the byte offset in *BYTENUM if so.  */
 583
 584 static bool
 585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 586                        machine_mode mode, poly_uint64 *bytenum)
 587 {
 588   return (MEM_P (op0)
 589           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 590           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 591           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 592               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 593                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 594 }
 595 \f
 596 /* Try to use instruction INSV to store VALUE into a field of OP0.
 597    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 598    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 599    are as for store_bit_field.  */
 600
 601 static bool
 602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 603                             opt_scalar_int_mode op0_mode,
 604                             unsigned HOST_WIDE_INT bitsize,
 605                             unsigned HOST_WIDE_INT bitnum,
 606                             rtx value, scalar_int_mode value_mode)
 607 {
 608   class expand_operand ops[4];
 609   rtx value1;
 610   rtx xop0 = op0;
 611   rtx_insn *last = get_last_insn ();
 612   bool copy_back = false;
 613
 614   scalar_int_mode op_mode = insv->field_mode;
 615   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 616   if (bitsize == 0 || bitsize > unit)
 617     return false;
 618
 619   if (MEM_P (xop0))
 620     /* Get a reference to the first byte of the field.  */
 621     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 622                                  &bitnum);
 623   else
 624     {
 625       /* Convert from counting within OP0 to counting in OP_MODE.  */
 626       if (BYTES_BIG_ENDIAN)
 627         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 628
 629       /* If xop0 is a register, we need it in OP_MODE
 630          to make it acceptable to the format of insv.  */
 631       if (GET_CODE (xop0) == SUBREG)
 632         {
 633           /* If such a SUBREG can't be created, give up.  */
 634           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 635                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 636             return false;
 637           /* We can't just change the mode, because this might clobber op0,
 638              and we will need the original value of op0 if insv fails.  */
 639           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 640                                  SUBREG_BYTE (xop0));
 641         }
 642       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 643         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 644     }
 645
 646   /* If the destination is a paradoxical subreg such that we need a
 647      truncate to the inner mode, perform the insertion on a temporary and
 648      truncate the result to the original destination.  Note that we can't
 649      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 650      X) 0)) is (reg:N X).  */
 651   if (GET_CODE (xop0) == SUBREG
 652       && REG_P (SUBREG_REG (xop0))
 653       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 654                                          op_mode))
 655     {
 656       rtx tem = gen_reg_rtx (op_mode);
 657       emit_move_insn (tem, xop0);
 658       xop0 = tem;
 659       copy_back = true;
 660     }
 661
 662   /* There are similar overflow check at the start of store_bit_field_1,
 663      but that only check the situation where the field lies completely
 664      outside the register, while there do have situation where the field
 665      lies partialy in the register, we need to adjust bitsize for this
 666      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 667      will broken on those arch support bit insert instruction, like arm, aarch64
 668      etc.  */
 669   if (bitsize + bitnum > unit && bitnum < unit)
 670     {
 671       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 672                "destination object, data truncated into %wu-bit",
 673                bitsize, unit - bitnum);
 674       bitsize = unit - bitnum;
 675     }
 676
 677   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 678      "backwards" from the size of the unit we are inserting into.
 679      Otherwise, we count bits from the most significant on a
 680      BYTES/BITS_BIG_ENDIAN machine.  */
 681
 682   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 683     bitnum = unit - bitsize - bitnum;
 684
 685   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 686   value1 = value;
 687   if (value_mode != op_mode)
 688     {
 689       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 690         {
 691           rtx tmp;
 692           /* Optimization: Don't bother really extending VALUE
 693              if it has all the bits we will actually use.  However,
 694              if we must narrow it, be sure we do it correctly.  */
 695
 696           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 697             {
 698               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 699               if (! tmp)
 700                 tmp = simplify_gen_subreg (op_mode,
 701                                            force_reg (value_mode, value1),
 702                                            value_mode, 0);
 703             }
 704           else
 705             {
 706               tmp = gen_lowpart_if_possible (op_mode, value1);
 707               if (! tmp)
 708                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 709             }
 710           value1 = tmp;
 711         }
 712       else if (CONST_INT_P (value))
 713         value1 = gen_int_mode (INTVAL (value), op_mode);
 714       else
 715         /* Parse phase is supposed to make VALUE's data type
 716            match that of the component reference, which is a type
 717            at least as wide as the field; so VALUE should have
 718            a mode that corresponds to that type.  */
 719         gcc_assert (CONSTANT_P (value));
 720     }
 721
 722   create_fixed_operand (&ops[0], xop0);
 723   create_integer_operand (&ops[1], bitsize);
 724   create_integer_operand (&ops[2], bitnum);
 725   create_input_operand (&ops[3], value1, op_mode);
 726   if (maybe_expand_insn (insv->icode, 4, ops))
 727     {
 728       if (copy_back)
 729         convert_move (op0, xop0, true);
 730       return true;
 731     }
 732   delete_insns_since (last);
 733   return false;
 734 }
 735
 736 /* A subroutine of store_bit_field, with the same arguments.  Return true
 737    if the operation could be implemented.
 738
 739    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 740    no other way of implementing the operation.  If FALLBACK_P is false,
 741    return false instead.  */
 742
 743 static bool
 744 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 745                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 746                    machine_mode fieldmode,
 747                    rtx value, bool reverse, bool fallback_p)
 748 {
 749   rtx op0 = str_rtx;
 750
 751   while (GET_CODE (op0) == SUBREG)
 752     {
 753       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 754       op0 = SUBREG_REG (op0);
 755     }
 756
 757   /* No action is needed if the target is a register and if the field
 758      lies completely outside that register.  This can occur if the source
 759      code contains an out-of-bounds access to a small array.  */
 760   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 761     return true;
 762
 763   /* Use vec_set patterns for inserting parts of vectors whenever
 764      available.  */
 765   machine_mode outermode = GET_MODE (op0);
 766   scalar_mode innermode = GET_MODE_INNER (outermode);
 767   poly_uint64 pos;
 768   if (VECTOR_MODE_P (outermode)
 769       && !MEM_P (op0)
 770       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 771       && fieldmode == innermode
 772       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 773       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 774     {
 775       class expand_operand ops[3];
 776       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 777
 778       create_fixed_operand (&ops[0], op0);
 779       create_input_operand (&ops[1], value, innermode);
 780       create_integer_operand (&ops[2], pos);
 781       if (maybe_expand_insn (icode, 3, ops))
 782         return true;
 783     }
 784
 785   /* If the target is a register, overwriting the entire object, or storing
 786      a full-word or multi-word field can be done with just a SUBREG.  */
 787   if (!MEM_P (op0)
 788       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794       HOST_WIDE_INT regnum;
 795       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 796       if (known_eq (bitnum, 0U)
 797           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 798         {
 799           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 800           if (sub)
 801             {
 802               if (reverse)
 803                 sub = flip_storage_order (GET_MODE (op0), sub);
 804               emit_move_insn (op0, sub);
 805               return true;
 806             }
 807         }
 808       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 809                && multiple_p (bitsize, regsize * BITS_PER_UNIT)
 810                && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
 811         {
 812           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 813                                      regnum * regsize);
 814           if (sub)
 815             {
 816               if (reverse)
 817                 value = flip_storage_order (fieldmode, value);
 818               emit_move_insn (sub, value);
 819               return true;
 820             }
 821         }
 822     }
 823
 824   /* If the target is memory, storing any naturally aligned field can be
 825      done with a simple store.  For targets that support fast unaligned
 826      memory, any naturally sized, unit aligned field can be done directly.  */
 827   poly_uint64 bytenum;
 828   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 829     {
 830       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 831       if (reverse)
 832         value = flip_storage_order (fieldmode, value);
 833       emit_move_insn (op0, value);
 834       return true;
 835     }
 836
 837   /* It's possible we'll need to handle other cases here for
 838      polynomial bitnum and bitsize.  */
 839
 840   /* From here on we need to be looking at a fixed-size insertion.  */
 841   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 842   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 843
 844   /* Make sure we are playing with integral modes.  Pun with subregs
 845      if we aren't.  This must come after the entire register case above,
 846      since that case is valid for any mode.  The following cases are only
 847      valid for integral modes.  */
 848   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 849   scalar_int_mode imode;
 850   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 851     {
 852       if (MEM_P (op0))
 853         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 854                                             0, MEM_SIZE (op0));
 855       else if (!op0_mode.exists ())
 856         {
 857           if (ibitnum == 0
 858               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 859               && MEM_P (value)
 860               && !reverse)
 861             {
 862               value = adjust_address (value, GET_MODE (op0), 0);
 863               emit_move_insn (op0, value);
 864               return true;
 865             }
 866           if (!fallback_p)
 867             return false;
 868           rtx temp = assign_stack_temp (GET_MODE (op0),
 869                                         GET_MODE_SIZE (GET_MODE (op0)));
 870           emit_move_insn (temp, op0);
 871           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 872                              reverse, fallback_p);
 873           emit_move_insn (op0, temp);
 874           return true;
 875         }
 876       else
 877         op0 = gen_lowpart (op0_mode.require (), op0);
 878     }
 879
 880   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 881                                    bitregion_start, bitregion_end,
 882                                    fieldmode, value, reverse, fallback_p);
 883 }
 884
 885 /* Subroutine of store_bit_field_1, with the same arguments, except
 886    that BITSIZE and BITNUM are constant.  Handle cases specific to
 887    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 888    otherwise OP0 is a BLKmode MEM.  */
 889
 890 static bool
 891 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 892                           unsigned HOST_WIDE_INT bitsize,
 893                           unsigned HOST_WIDE_INT bitnum,
 894                           poly_uint64 bitregion_start,
 895                           poly_uint64 bitregion_end,
 896                           machine_mode fieldmode,
 897                           rtx value, bool reverse, bool fallback_p)
 898 {
 899   /* Storing an lsb-aligned field in a register
 900      can be done with a movstrict instruction.  */
 901
 902   if (!MEM_P (op0)
 903       && !reverse
 904       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 905       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 906       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 907     {
 908       class expand_operand ops[2];
 909       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 910       rtx arg0 = op0;
 911       unsigned HOST_WIDE_INT subreg_off;
 912
 913       if (GET_CODE (arg0) == SUBREG)
 914         {
 915           /* Else we've got some float mode source being extracted into
 916              a different float mode destination -- this combination of
 917              subregs results in Severe Tire Damage.  */
 918           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 919                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 920                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 921           arg0 = SUBREG_REG (arg0);
 922         }
 923
 924       subreg_off = bitnum / BITS_PER_UNIT;
 925       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
 926           /* STRICT_LOW_PART must have a non-paradoxical subreg as
 927              operand.  */
 928           && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
 929         {
 930           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 931
 932           create_fixed_operand (&ops[0], arg0);
 933           /* Shrink the source operand to FIELDMODE.  */
 934           create_convert_operand_to (&ops[1], value, fieldmode, false);
 935           if (maybe_expand_insn (icode, 2, ops))
 936             return true;
 937         }
 938     }
 939
 940   /* Handle fields bigger than a word.  */
 941
 942   if (bitsize > BITS_PER_WORD)
 943     {
 944       /* Here we transfer the words of the field
 945          in the order least significant first.
 946          This is because the most significant word is the one which may
 947          be less than full.
 948          However, only do that if the value is not BLKmode.  */
 949
 950       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 951       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 952       rtx_insn *last;
 953
 954       /* This is the mode we must force value to, so that there will be enough
 955          subwords to extract.  Note that fieldmode will often (always?) be
 956          VOIDmode, because that is what store_field uses to indicate that this
 957          is a bit field, but passing VOIDmode to operand_subword_force
 958          is not allowed.
 959
 960          The mode must be fixed-size, since insertions into variable-sized
 961          objects are meant to be handled before calling this function.  */
 962       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 963       if (value_mode == VOIDmode)
 964         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 965
 966       last = get_last_insn ();
 967       for (int i = 0; i < nwords; i++)
 968         {
 969           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 970              except maybe for the last iteration.  */
 971           const unsigned HOST_WIDE_INT new_bitsize
 972             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 973           /* Bit offset from the starting bit number in the target.  */
 974           const unsigned int bit_offset
 975             = backwards ^ reverse
 976               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 977               : i * BITS_PER_WORD;
 978           /* Starting word number in the value.  */
 979           const unsigned int wordnum
 980             = backwards
 981               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 982               : i;
 983           /* The chunk of the value in word_mode.  We use bit-field extraction
 984               in BLKmode to handle unaligned memory references and to shift the
 985               last chunk right on big-endian machines if need be.  */
 986           rtx value_word
 987             = fieldmode == BLKmode
 988               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 989                                    1, NULL_RTX, word_mode, word_mode, false,
 990                                    NULL)
 991               : operand_subword_force (value, wordnum, value_mode);
 992
 993           if (!store_bit_field_1 (op0, new_bitsize,
 994                                   bitnum + bit_offset,
 995                                   bitregion_start, bitregion_end,
 996                                   word_mode,
 997                                   value_word, reverse, fallback_p))
 998             {
 999               delete_insns_since (last);
1000               return false;
1001             }
1002         }
1003       return true;
1004     }
1005
1006   /* If VALUE has a floating-point or complex mode, access it as an
1007      integer of the corresponding size.  This can occur on a machine
1008      with 64 bit registers that uses SFmode for float.  It can also
1009      occur for unaligned float or complex fields.  */
1010   rtx orig_value = value;
1011   scalar_int_mode value_mode;
1012   if (GET_MODE (value) == VOIDmode)
1013     /* By this point we've dealt with values that are bigger than a word,
1014        so word_mode is a conservatively correct choice.  */
1015     value_mode = word_mode;
1016   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1017     {
1018       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1019       value = gen_reg_rtx (value_mode);
1020       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1021     }
1022
1023   /* If OP0 is a multi-word register, narrow it to the affected word.
1024      If the region spans two words, defer to store_split_bit_field.
1025      Don't do this if op0 is a single hard register wider than word
1026      such as a float or vector register.  */
1027   if (!MEM_P (op0)
1028       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1029       && (!REG_P (op0)
1030           || !HARD_REGISTER_P (op0)
1031           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1032     {
1033       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1034         {
1035           if (!fallback_p)
1036             return false;
1037
1038           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1039                                  bitregion_start, bitregion_end,
1040                                  value, value_mode, reverse);
1041           return true;
1042         }
1043       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1044                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1045       gcc_assert (op0);
1046       op0_mode = word_mode;
1047       bitnum %= BITS_PER_WORD;
1048     }
1049
1050   /* From here on we can assume that the field to be stored in fits
1051      within a word.  If the destination is a register, it too fits
1052      in a word.  */
1053
1054   extraction_insn insv;
1055   if (!MEM_P (op0)
1056       && !reverse
1057       && get_best_reg_extraction_insn (&insv, EP_insv,
1058                                        GET_MODE_BITSIZE (op0_mode.require ()),
1059                                        fieldmode)
1060       && store_bit_field_using_insv (&insv, op0, op0_mode,
1061                                      bitsize, bitnum, value, value_mode))
1062     return true;
1063
1064   /* If OP0 is a memory, try copying it to a register and seeing if a
1065      cheap register alternative is available.  */
1066   if (MEM_P (op0) && !reverse)
1067     {
1068       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1069                                         fieldmode)
1070           && store_bit_field_using_insv (&insv, op0, op0_mode,
1071                                          bitsize, bitnum, value, value_mode))
1072         return true;
1073
1074       rtx_insn *last = get_last_insn ();
1075
1076       /* Try loading part of OP0 into a register, inserting the bitfield
1077          into that, and then copying the result back to OP0.  */
1078       unsigned HOST_WIDE_INT bitpos;
1079       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1080                                                bitregion_start, bitregion_end,
1081                                                fieldmode, &bitpos);
1082       if (xop0)
1083         {
1084           rtx tempreg = copy_to_reg (xop0);
1085           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1086                                  bitregion_start, bitregion_end,
1087                                  fieldmode, orig_value, reverse, false))
1088             {
1089               emit_move_insn (xop0, tempreg);
1090               return true;
1091             }
1092           delete_insns_since (last);
1093         }
1094     }
1095
1096   if (!fallback_p)
1097     return false;
1098
1099   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1100                          bitregion_end, value, value_mode, reverse);
1101   return true;
1102 }
1103
1104 /* Generate code to store value from rtx VALUE
1105    into a bit-field within structure STR_RTX
1106    containing BITSIZE bits starting at bit BITNUM.
1107
1108    BITREGION_START is bitpos of the first bitfield in this region.
1109    BITREGION_END is the bitpos of the ending bitfield in this region.
1110    These two fields are 0, if the C++ memory model does not apply,
1111    or we are not interested in keeping track of bitfield regions.
1112
1113    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1114
1115    If REVERSE is true, the store is to be done in reverse order.  */
1116
1117 void
1118 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1119                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1120                  machine_mode fieldmode,
1121                  rtx value, bool reverse)
1122 {
1123   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1124   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1125   scalar_int_mode int_mode;
1126   if (bitsize.is_constant (&ibitsize)
1127       && bitnum.is_constant (&ibitnum)
1128       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1129       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1130                                      bitregion_start, bitregion_end))
1131     {
1132       /* Storing of a full word can be done with a simple store.
1133          We know here that the field can be accessed with one single
1134          instruction.  For targets that support unaligned memory,
1135          an unaligned access may be necessary.  */
1136       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1137         {
1138           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1139                                              ibitnum / BITS_PER_UNIT);
1140           if (reverse)
1141             value = flip_storage_order (int_mode, value);
1142           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1143           emit_move_insn (str_rtx, value);
1144         }
1145       else
1146         {
1147           rtx temp;
1148
1149           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1150                                           ibitnum, &ibitnum);
1151           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1152           temp = copy_to_reg (str_rtx);
1153           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1154                                   int_mode, value, reverse, true))
1155             gcc_unreachable ();
1156
1157           emit_move_insn (str_rtx, temp);
1158         }
1159
1160       return;
1161     }
1162
1163   /* Under the C++0x memory model, we must not touch bits outside the
1164      bit region.  Adjust the address to start at the beginning of the
1165      bit region.  */
1166   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1167     {
1168       scalar_int_mode best_mode;
1169       machine_mode addr_mode = VOIDmode;
1170
1171       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1172       bitnum -= bitregion_start;
1173       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1174       bitregion_end -= bitregion_start;
1175       bitregion_start = 0;
1176       if (bitsize.is_constant (&ibitsize)
1177           && bitnum.is_constant (&ibitnum)
1178           && get_best_mode (ibitsize, ibitnum,
1179                             bitregion_start, bitregion_end,
1180                             MEM_ALIGN (str_rtx), INT_MAX,
1181                             MEM_VOLATILE_P (str_rtx), &best_mode))
1182         addr_mode = best_mode;
1183       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1184                                               offset, size);
1185     }
1186
1187   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1188                           bitregion_start, bitregion_end,
1189                           fieldmode, value, reverse, true))
1190     gcc_unreachable ();
1191 }
1192 \f
1193 /* Use shifts and boolean operations to store VALUE into a bit field of
1194    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1195    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1196    the mode of VALUE.
1197
1198    If REVERSE is true, the store is to be done in reverse order.  */
1199
1200 static void
1201 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1202                        unsigned HOST_WIDE_INT bitsize,
1203                        unsigned HOST_WIDE_INT bitnum,
1204                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1205                        rtx value, scalar_int_mode value_mode, bool reverse)
1206 {
1207   /* There is a case not handled here:
1208      a structure with a known alignment of just a halfword
1209      and a field split across two aligned halfwords within the structure.
1210      Or likewise a structure with a known alignment of just a byte
1211      and a field split across two bytes.
1212      Such cases are not supposed to be able to occur.  */
1213
1214   scalar_int_mode best_mode;
1215   if (MEM_P (op0))
1216     {
1217       unsigned int max_bitsize = BITS_PER_WORD;
1218       scalar_int_mode imode;
1219       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1220         max_bitsize = GET_MODE_BITSIZE (imode);
1221
1222       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1223                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1224                           &best_mode))
1225         {
1226           /* The only way this should occur is if the field spans word
1227              boundaries.  */
1228           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1229                                  bitregion_start, bitregion_end,
1230                                  value, value_mode, reverse);
1231           return;
1232         }
1233
1234       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1235     }
1236   else
1237     best_mode = op0_mode.require ();
1238
1239   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1240                            value, value_mode, reverse);
1241 }
1242
1243 /* Helper function for store_fixed_bit_field, stores
1244    the bit field always using MODE, which is the mode of OP0.  The other
1245    arguments are as for store_fixed_bit_field.  */
1246
1247 static void
1248 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1249                          unsigned HOST_WIDE_INT bitsize,
1250                          unsigned HOST_WIDE_INT bitnum,
1251                          rtx value, scalar_int_mode value_mode, bool reverse)
1252 {
1253   rtx temp;
1254   int all_zero = 0;
1255   int all_one = 0;
1256
1257   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1258      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1259
1260   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1261     /* BITNUM is the distance between our msb
1262        and that of the containing datum.
1263        Convert it to the distance from the lsb.  */
1264     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1265
1266   /* Now BITNUM is always the distance between our lsb
1267      and that of OP0.  */
1268
1269   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1270      we must first convert its mode to MODE.  */
1271
1272   if (CONST_INT_P (value))
1273     {
1274       unsigned HOST_WIDE_INT v = UINTVAL (value);
1275
1276       if (bitsize < HOST_BITS_PER_WIDE_INT)
1277         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1278
1279       if (v == 0)
1280         all_zero = 1;
1281       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1282                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1283                || (bitsize == HOST_BITS_PER_WIDE_INT
1284                    && v == HOST_WIDE_INT_M1U))
1285         all_one = 1;
1286
1287       value = lshift_value (mode, v, bitnum);
1288     }
1289   else
1290     {
1291       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1292                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1293
1294       if (value_mode != mode)
1295         value = convert_to_mode (mode, value, 1);
1296
1297       if (must_and)
1298         value = expand_binop (mode, and_optab, value,
1299                               mask_rtx (mode, 0, bitsize, 0),
1300                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1301       if (bitnum > 0)
1302         value = expand_shift (LSHIFT_EXPR, mode, value,
1303                               bitnum, NULL_RTX, 1);
1304     }
1305
1306   if (reverse)
1307     value = flip_storage_order (mode, value);
1308
1309   /* Now clear the chosen bits in OP0,
1310      except that if VALUE is -1 we need not bother.  */
1311   /* We keep the intermediates in registers to allow CSE to combine
1312      consecutive bitfield assignments.  */
1313
1314   temp = force_reg (mode, op0);
1315
1316   if (! all_one)
1317     {
1318       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1319       if (reverse)
1320         mask = flip_storage_order (mode, mask);
1321       temp = expand_binop (mode, and_optab, temp, mask,
1322                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1323       temp = force_reg (mode, temp);
1324     }
1325
1326   /* Now logical-or VALUE into OP0, unless it is zero.  */
1327
1328   if (! all_zero)
1329     {
1330       temp = expand_binop (mode, ior_optab, temp, value,
1331                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1332       temp = force_reg (mode, temp);
1333     }
1334
1335   if (op0 != temp)
1336     {
1337       op0 = copy_rtx (op0);
1338       emit_move_insn (op0, temp);
1339     }
1340 }
1341 \f
1342 /* Store a bit field that is split across multiple accessible memory objects.
1343
1344    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1345    BITSIZE is the field width; BITPOS the position of its first bit
1346    (within the word).
1347    VALUE is the value to store, which has mode VALUE_MODE.
1348    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1349    a BLKmode MEM.
1350
1351    If REVERSE is true, the store is to be done in reverse order.
1352
1353    This does not yet handle fields wider than BITS_PER_WORD.  */
1354
1355 static void
1356 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1357                        unsigned HOST_WIDE_INT bitsize,
1358                        unsigned HOST_WIDE_INT bitpos,
1359                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1360                        rtx value, scalar_int_mode value_mode, bool reverse)
1361 {
1362   unsigned int unit, total_bits, bitsdone = 0;
1363
1364   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1365      much at a time.  */
1366   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1367     unit = BITS_PER_WORD;
1368   else
1369     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1370
1371   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1372      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1373      again, and we will mutually recurse forever.  */
1374   if (MEM_P (op0) && op0_mode.exists ())
1375     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1376
1377   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1378      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1379      that VALUE might be a floating-point constant.  */
1380   if (CONSTANT_P (value) && !CONST_INT_P (value))
1381     {
1382       rtx word = gen_lowpart_common (word_mode, value);
1383
1384       if (word && (value != word))
1385         value = word;
1386       else
1387         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1388       value_mode = word_mode;
1389     }
1390
1391   total_bits = GET_MODE_BITSIZE (value_mode);
1392
1393   while (bitsdone < bitsize)
1394     {
1395       unsigned HOST_WIDE_INT thissize;
1396       unsigned HOST_WIDE_INT thispos;
1397       unsigned HOST_WIDE_INT offset;
1398       rtx part;
1399
1400       offset = (bitpos + bitsdone) / unit;
1401       thispos = (bitpos + bitsdone) % unit;
1402
1403       /* When region of bytes we can touch is restricted, decrease
1404          UNIT close to the end of the region as needed.  If op0 is a REG
1405          or SUBREG of REG, don't do this, as there can't be data races
1406          on a register and we can expand shorter code in some cases.  */
1407       if (maybe_ne (bitregion_end, 0U)
1408           && unit > BITS_PER_UNIT
1409           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1410           && !REG_P (op0)
1411           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1412         {
1413           unit = unit / 2;
1414           continue;
1415         }
1416
1417       /* THISSIZE must not overrun a word boundary.  Otherwise,
1418          store_fixed_bit_field will call us again, and we will mutually
1419          recurse forever.  */
1420       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1421       thissize = MIN (thissize, unit - thispos);
1422
1423       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1424         {
1425           /* Fetch successively less significant portions.  */
1426           if (CONST_INT_P (value))
1427             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1428                              >> (bitsize - bitsdone - thissize))
1429                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1430           /* Likewise, but the source is little-endian.  */
1431           else if (reverse)
1432             part = extract_fixed_bit_field (word_mode, value, value_mode,
1433                                             thissize,
1434                                             bitsize - bitsdone - thissize,
1435                                             NULL_RTX, 1, false);
1436           else
1437             /* The args are chosen so that the last part includes the
1438                lsb.  Give extract_bit_field the value it needs (with
1439                endianness compensation) to fetch the piece we want.  */
1440             part = extract_fixed_bit_field (word_mode, value, value_mode,
1441                                             thissize,
1442                                             total_bits - bitsize + bitsdone,
1443                                             NULL_RTX, 1, false);
1444         }
1445       else
1446         {
1447           /* Fetch successively more significant portions.  */
1448           if (CONST_INT_P (value))
1449             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1450                              >> bitsdone)
1451                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1452           /* Likewise, but the source is big-endian.  */
1453           else if (reverse)
1454             part = extract_fixed_bit_field (word_mode, value, value_mode,
1455                                             thissize,
1456                                             total_bits - bitsdone - thissize,
1457                                             NULL_RTX, 1, false);
1458           else
1459             part = extract_fixed_bit_field (word_mode, value, value_mode,
1460                                             thissize, bitsdone, NULL_RTX,
1461                                             1, false);
1462         }
1463
1464       /* If OP0 is a register, then handle OFFSET here.  */
1465       rtx op0_piece = op0;
1466       opt_scalar_int_mode op0_piece_mode = op0_mode;
1467       if (SUBREG_P (op0) || REG_P (op0))
1468         {
1469           scalar_int_mode imode;
1470           if (op0_mode.exists (&imode)
1471               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1472             {
1473               if (offset)
1474                 op0_piece = const0_rtx;
1475             }
1476           else
1477             {
1478               op0_piece = operand_subword_force (op0,
1479                                                  offset * unit / BITS_PER_WORD,
1480                                                  GET_MODE (op0));
1481               op0_piece_mode = word_mode;
1482             }
1483           offset &= BITS_PER_WORD / unit - 1;
1484         }
1485
1486       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1487          it is just an out-of-bounds access.  Ignore it.  */
1488       if (op0_piece != const0_rtx)
1489         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1490                                offset * unit + thispos, bitregion_start,
1491                                bitregion_end, part, word_mode, reverse);
1492       bitsdone += thissize;
1493     }
1494 }
1495 \f
1496 /* A subroutine of extract_bit_field_1 that converts return value X
1497    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1498    to extract_bit_field.  */
1499
1500 static rtx
1501 convert_extracted_bit_field (rtx x, machine_mode mode,
1502                              machine_mode tmode, bool unsignedp)
1503 {
1504   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1505     return x;
1506
1507   /* If the x mode is not a scalar integral, first convert to the
1508      integer mode of that size and then access it as a floating-point
1509      value via a SUBREG.  */
1510   if (!SCALAR_INT_MODE_P (tmode))
1511     {
1512       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1513       x = convert_to_mode (int_mode, x, unsignedp);
1514       x = force_reg (int_mode, x);
1515       return gen_lowpart (tmode, x);
1516     }
1517
1518   return convert_to_mode (tmode, x, unsignedp);
1519 }
1520
1521 /* Try to use an ext(z)v pattern to extract a field from OP0.
1522    Return the extracted value on success, otherwise return null.
1523    EXTV describes the extraction instruction to use.  If OP0_MODE
1524    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1525    The other arguments are as for extract_bit_field.  */
1526
1527 static rtx
1528 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1529                               opt_scalar_int_mode op0_mode,
1530                               unsigned HOST_WIDE_INT bitsize,
1531                               unsigned HOST_WIDE_INT bitnum,
1532                               int unsignedp, rtx target,
1533                               machine_mode mode, machine_mode tmode)
1534 {
1535   class expand_operand ops[4];
1536   rtx spec_target = target;
1537   rtx spec_target_subreg = 0;
1538   scalar_int_mode ext_mode = extv->field_mode;
1539   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1540
1541   if (bitsize == 0 || unit < bitsize)
1542     return NULL_RTX;
1543
1544   if (MEM_P (op0))
1545     /* Get a reference to the first byte of the field.  */
1546     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1547                                 &bitnum);
1548   else
1549     {
1550       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1551       if (BYTES_BIG_ENDIAN)
1552         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1553
1554       /* If op0 is a register, we need it in EXT_MODE to make it
1555          acceptable to the format of ext(z)v.  */
1556       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1557         return NULL_RTX;
1558       if (REG_P (op0) && op0_mode.require () != ext_mode)
1559         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1560     }
1561
1562   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1563      "backwards" from the size of the unit we are extracting from.
1564      Otherwise, we count bits from the most significant on a
1565      BYTES/BITS_BIG_ENDIAN machine.  */
1566
1567   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1568     bitnum = unit - bitsize - bitnum;
1569
1570   if (target == 0)
1571     target = spec_target = gen_reg_rtx (tmode);
1572
1573   if (GET_MODE (target) != ext_mode)
1574     {
1575       rtx temp;
1576       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1577          between the mode of the extraction (word_mode) and the target
1578          mode.  Instead, create a temporary and use convert_move to set
1579          the target.  */
1580       if (REG_P (target)
1581           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1582           && (temp = gen_lowpart_if_possible (ext_mode, target)))
1583         {
1584           target = temp;
1585           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1586             spec_target_subreg = target;
1587         }
1588       else
1589         target = gen_reg_rtx (ext_mode);
1590     }
1591
1592   create_output_operand (&ops[0], target, ext_mode);
1593   create_fixed_operand (&ops[1], op0);
1594   create_integer_operand (&ops[2], bitsize);
1595   create_integer_operand (&ops[3], bitnum);
1596   if (maybe_expand_insn (extv->icode, 4, ops))
1597     {
1598       target = ops[0].value;
1599       if (target == spec_target)
1600         return target;
1601       if (target == spec_target_subreg)
1602         return spec_target;
1603       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1604     }
1605   return NULL_RTX;
1606 }
1607
1608 /* See whether it would be valid to extract the part of OP0 described
1609    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1610    operation.  Return the subreg if so, otherwise return null.  */
1611
1612 static rtx
1613 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1614                              poly_uint64 bitsize, poly_uint64 bitnum)
1615 {
1616   poly_uint64 bytenum;
1617   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1618       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1619       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1620       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1621     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1622   return NULL_RTX;
1623 }
1624
1625 /* A subroutine of extract_bit_field, with the same arguments.
1626    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1627    if we can find no other means of implementing the operation.
1628    if FALLBACK_P is false, return NULL instead.  */
1629
1630 static rtx
1631 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1632                      int unsignedp, rtx target, machine_mode mode,
1633                      machine_mode tmode, bool reverse, bool fallback_p,
1634                      rtx *alt_rtl)
1635 {
1636   rtx op0 = str_rtx;
1637   machine_mode mode1;
1638
1639   if (tmode == VOIDmode)
1640     tmode = mode;
1641
1642   while (GET_CODE (op0) == SUBREG)
1643     {
1644       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1645       op0 = SUBREG_REG (op0);
1646     }
1647
1648   /* If we have an out-of-bounds access to a register, just return an
1649      uninitialized register of the required mode.  This can occur if the
1650      source code contains an out-of-bounds access to a small array.  */
1651   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1652     return gen_reg_rtx (tmode);
1653
1654   if (REG_P (op0)
1655       && mode == GET_MODE (op0)
1656       && known_eq (bitnum, 0U)
1657       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1658     {
1659       if (reverse)
1660         op0 = flip_storage_order (mode, op0);
1661       /* We're trying to extract a full register from itself.  */
1662       return op0;
1663     }
1664
1665   /* First try to check for vector from vector extractions.  */
1666   if (VECTOR_MODE_P (GET_MODE (op0))
1667       && !MEM_P (op0)
1668       && VECTOR_MODE_P (tmode)
1669       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1670       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1671     {
1672       machine_mode new_mode = GET_MODE (op0);
1673       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1674         {
1675           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1676           poly_uint64 nunits;
1677           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1678                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1679               || !related_vector_mode (tmode, inner_mode,
1680                                        nunits).exists (&new_mode)
1681               || maybe_ne (GET_MODE_SIZE (new_mode),
1682                            GET_MODE_SIZE (GET_MODE (op0))))
1683             new_mode = VOIDmode;
1684         }
1685       poly_uint64 pos;
1686       if (new_mode != VOIDmode
1687           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1688               != CODE_FOR_nothing)
1689           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1690         {
1691           class expand_operand ops[3];
1692           machine_mode outermode = new_mode;
1693           machine_mode innermode = tmode;
1694           enum insn_code icode
1695             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1696
1697           if (new_mode != GET_MODE (op0))
1698             op0 = gen_lowpart (new_mode, op0);
1699           create_output_operand (&ops[0], target, innermode);
1700           ops[0].target = 1;
1701           create_input_operand (&ops[1], op0, outermode);
1702           create_integer_operand (&ops[2], pos);
1703           if (maybe_expand_insn (icode, 3, ops))
1704             {
1705               if (alt_rtl && ops[0].target)
1706                 *alt_rtl = target;
1707               target = ops[0].value;
1708               if (GET_MODE (target) != mode)
1709                 return gen_lowpart (tmode, target);
1710               return target;
1711             }
1712         }
1713     }
1714
1715   /* See if we can get a better vector mode before extracting.  */
1716   if (VECTOR_MODE_P (GET_MODE (op0))
1717       && !MEM_P (op0)
1718       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1719     {
1720       machine_mode new_mode;
1721
1722       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1723         new_mode = MIN_MODE_VECTOR_FLOAT;
1724       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1725         new_mode = MIN_MODE_VECTOR_FRACT;
1726       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1727         new_mode = MIN_MODE_VECTOR_UFRACT;
1728       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1729         new_mode = MIN_MODE_VECTOR_ACCUM;
1730       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1731         new_mode = MIN_MODE_VECTOR_UACCUM;
1732       else
1733         new_mode = MIN_MODE_VECTOR_INT;
1734
1735       FOR_EACH_MODE_FROM (new_mode, new_mode)
1736         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1737             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1738             && targetm.vector_mode_supported_p (new_mode))
1739           break;
1740       if (new_mode != VOIDmode)
1741         op0 = gen_lowpart (new_mode, op0);
1742     }
1743
1744   /* Use vec_extract patterns for extracting parts of vectors whenever
1745      available.  If that fails, see whether the current modes and bitregion
1746      give a natural subreg.  */
1747   machine_mode outermode = GET_MODE (op0);
1748   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1749     {
1750       scalar_mode innermode = GET_MODE_INNER (outermode);
1751       enum insn_code icode
1752         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1753       poly_uint64 pos;
1754       if (icode != CODE_FOR_nothing
1755           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1756           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1757         {
1758           class expand_operand ops[3];
1759
1760           create_output_operand (&ops[0], target, innermode);
1761           ops[0].target = 1;
1762           create_input_operand (&ops[1], op0, outermode);
1763           create_integer_operand (&ops[2], pos);
1764           if (maybe_expand_insn (icode, 3, ops))
1765             {
1766               if (alt_rtl && ops[0].target)
1767                 *alt_rtl = target;
1768               target = ops[0].value;
1769               if (GET_MODE (target) != mode)
1770                 return gen_lowpart (tmode, target);
1771               return target;
1772             }
1773         }
1774       /* Using subregs is useful if we're extracting one register vector
1775          from a multi-register vector.  extract_bit_field_as_subreg checks
1776          for valid bitsize and bitnum, so we don't need to do that here.  */
1777       if (VECTOR_MODE_P (mode))
1778         {
1779           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1780           if (sub)
1781             return sub;
1782         }
1783     }
1784
1785   /* Make sure we are playing with integral modes.  Pun with subregs
1786      if we aren't.  */
1787   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1788   scalar_int_mode imode;
1789   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1790     {
1791       if (MEM_P (op0))
1792         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1793                                             0, MEM_SIZE (op0));
1794       else if (op0_mode.exists (&imode))
1795         {
1796           op0 = gen_lowpart (imode, op0);
1797
1798           /* If we got a SUBREG, force it into a register since we
1799              aren't going to be able to do another SUBREG on it.  */
1800           if (GET_CODE (op0) == SUBREG)
1801             op0 = force_reg (imode, op0);
1802         }
1803       else
1804         {
1805           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1806           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1807           emit_move_insn (mem, op0);
1808           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1809         }
1810     }
1811
1812   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1813      If that's wrong, the solution is to test for it and set TARGET to 0
1814      if needed.  */
1815
1816   /* Get the mode of the field to use for atomic access or subreg
1817      conversion.  */
1818   if (!SCALAR_INT_MODE_P (tmode)
1819       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1820     mode1 = mode;
1821   gcc_assert (mode1 != BLKmode);
1822
1823   /* Extraction of a full MODE1 value can be done with a subreg as long
1824      as the least significant bit of the value is the least significant
1825      bit of either OP0 or a word of OP0.  */
1826   if (!MEM_P (op0) && !reverse)
1827     {
1828       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1829       if (sub)
1830         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1831     }
1832
1833   /* Extraction of a full MODE1 value can be done with a load as long as
1834      the field is on a byte boundary and is sufficiently aligned.  */
1835   poly_uint64 bytenum;
1836   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1837     {
1838       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1839       if (reverse)
1840         op0 = flip_storage_order (mode1, op0);
1841       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1842     }
1843
1844   /* If we have a memory source and a non-constant bit offset, restrict
1845      the memory to the referenced bytes.  This is a worst-case fallback
1846      but is useful for things like vector booleans.  */
1847   if (MEM_P (op0) && !bitnum.is_constant ())
1848     {
1849       bytenum = bits_to_bytes_round_down (bitnum);
1850       bitnum = num_trailing_bits (bitnum);
1851       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1852       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1853       op0_mode = opt_scalar_int_mode ();
1854     }
1855
1856   /* It's possible we'll need to handle other cases here for
1857      polynomial bitnum and bitsize.  */
1858
1859   /* From here on we need to be looking at a fixed-size insertion.  */
1860   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1861                                      bitnum.to_constant (), unsignedp,
1862                                      target, mode, tmode, reverse, fallback_p);
1863 }
1864
1865 /* Subroutine of extract_bit_field_1, with the same arguments, except
1866    that BITSIZE and BITNUM are constant.  Handle cases specific to
1867    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1868    otherwise OP0 is a BLKmode MEM.  */
1869
1870 static rtx
1871 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1872                             unsigned HOST_WIDE_INT bitsize,
1873                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1874                             rtx target, machine_mode mode, machine_mode tmode,
1875                             bool reverse, bool fallback_p)
1876 {
1877   /* Handle fields bigger than a word.  */
1878
1879   if (bitsize > BITS_PER_WORD)
1880     {
1881       /* Here we transfer the words of the field
1882          in the order least significant first.
1883          This is because the most significant word is the one which may
1884          be less than full.  */
1885
1886       const bool backwards = WORDS_BIG_ENDIAN;
1887       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1888       unsigned int i;
1889       rtx_insn *last;
1890
1891       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1892         target = gen_reg_rtx (mode);
1893
1894       /* In case we're about to clobber a base register or something
1895          (see gcc.c-torture/execute/20040625-1.c).   */
1896       if (reg_mentioned_p (target, op0))
1897         target = gen_reg_rtx (mode);
1898
1899       /* Indicate for flow that the entire target reg is being set.  */
1900       emit_clobber (target);
1901
1902       /* The mode must be fixed-size, since extract_bit_field_1 handles
1903          extractions from variable-sized objects before calling this
1904          function.  */
1905       unsigned int target_size
1906         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1907       last = get_last_insn ();
1908       for (i = 0; i < nwords; i++)
1909         {
1910           /* If I is 0, use the low-order word in both field and target;
1911              if I is 1, use the next to lowest word; and so on.  */
1912           /* Word number in TARGET to use.  */
1913           unsigned int wordnum
1914             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1915           /* Offset from start of field in OP0.  */
1916           unsigned int bit_offset = (backwards ^ reverse
1917                                      ? MAX ((int) bitsize - ((int) i + 1)
1918                                             * BITS_PER_WORD,
1919                                             0)
1920                                      : (int) i * BITS_PER_WORD);
1921           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1922           rtx result_part
1923             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1924                                              bitsize - i * BITS_PER_WORD),
1925                                    bitnum + bit_offset, 1, target_part,
1926                                    mode, word_mode, reverse, fallback_p, NULL);
1927
1928           gcc_assert (target_part);
1929           if (!result_part)
1930             {
1931               delete_insns_since (last);
1932               return NULL;
1933             }
1934
1935           if (result_part != target_part)
1936             emit_move_insn (target_part, result_part);
1937         }
1938
1939       if (unsignedp)
1940         {
1941           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1942              need to be zero'd out.  */
1943           if (target_size > nwords * UNITS_PER_WORD)
1944             {
1945               unsigned int i, total_words;
1946
1947               total_words = target_size / UNITS_PER_WORD;
1948               for (i = nwords; i < total_words; i++)
1949                 emit_move_insn
1950                   (operand_subword (target,
1951                                     backwards ? total_words - i - 1 : i,
1952                                     1, VOIDmode),
1953                    const0_rtx);
1954             }
1955           return target;
1956         }
1957
1958       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1959       target = expand_shift (LSHIFT_EXPR, mode, target,
1960                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1961       return expand_shift (RSHIFT_EXPR, mode, target,
1962                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1963     }
1964
1965   /* If OP0 is a multi-word register, narrow it to the affected word.
1966      If the region spans two words, defer to extract_split_bit_field.  */
1967   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1968     {
1969       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1970         {
1971           if (!fallback_p)
1972             return NULL_RTX;
1973           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1974                                             unsignedp, reverse);
1975           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1976         }
1977       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1978                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1979       op0_mode = word_mode;
1980       bitnum %= BITS_PER_WORD;
1981     }
1982
1983   /* From here on we know the desired field is smaller than a word.
1984      If OP0 is a register, it too fits within a word.  */
1985   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1986   extraction_insn extv;
1987   if (!MEM_P (op0)
1988       && !reverse
1989       /* ??? We could limit the structure size to the part of OP0 that
1990          contains the field, with appropriate checks for endianness
1991          and TARGET_TRULY_NOOP_TRUNCATION.  */
1992       && get_best_reg_extraction_insn (&extv, pattern,
1993                                        GET_MODE_BITSIZE (op0_mode.require ()),
1994                                        tmode))
1995     {
1996       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1997                                                  bitsize, bitnum,
1998                                                  unsignedp, target, mode,
1999                                                  tmode);
2000       if (result)
2001         return result;
2002     }
2003
2004   /* If OP0 is a memory, try copying it to a register and seeing if a
2005      cheap register alternative is available.  */
2006   if (MEM_P (op0) & !reverse)
2007     {
2008       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2009                                         tmode))
2010         {
2011           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2012                                                      bitsize, bitnum,
2013                                                      unsignedp, target, mode,
2014                                                      tmode);
2015           if (result)
2016             return result;
2017         }
2018
2019       rtx_insn *last = get_last_insn ();
2020
2021       /* Try loading part of OP0 into a register and extracting the
2022          bitfield from that.  */
2023       unsigned HOST_WIDE_INT bitpos;
2024       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2025                                                0, 0, tmode, &bitpos);
2026       if (xop0)
2027         {
2028           xop0 = copy_to_reg (xop0);
2029           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2030                                             unsignedp, target,
2031                                             mode, tmode, reverse, false, NULL);
2032           if (result)
2033             return result;
2034           delete_insns_since (last);
2035         }
2036     }
2037
2038   if (!fallback_p)
2039     return NULL;
2040
2041   /* Find a correspondingly-sized integer field, so we can apply
2042      shifts and masks to it.  */
2043   scalar_int_mode int_mode;
2044   if (!int_mode_for_mode (tmode).exists (&int_mode))
2045     /* If this fails, we should probably push op0 out to memory and then
2046        do a load.  */
2047     int_mode = int_mode_for_mode (mode).require ();
2048
2049   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2050                                     bitnum, target, unsignedp, reverse);
2051
2052   /* Complex values must be reversed piecewise, so we need to undo the global
2053      reversal, convert to the complex mode and reverse again.  */
2054   if (reverse && COMPLEX_MODE_P (tmode))
2055     {
2056       target = flip_storage_order (int_mode, target);
2057       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2058       target = flip_storage_order (tmode, target);
2059     }
2060   else
2061     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2062
2063   return target;
2064 }
2065
2066 /* Generate code to extract a byte-field from STR_RTX
2067    containing BITSIZE bits, starting at BITNUM,
2068    and put it in TARGET if possible (if TARGET is nonzero).
2069    Regardless of TARGET, we return the rtx for where the value is placed.
2070
2071    STR_RTX is the structure containing the byte (a REG or MEM).
2072    UNSIGNEDP is nonzero if this is an unsigned bit field.
2073    MODE is the natural mode of the field value once extracted.
2074    TMODE is the mode the caller would like the value to have;
2075    but the value may be returned with type MODE instead.
2076
2077    If REVERSE is true, the extraction is to be done in reverse order.
2078
2079    If a TARGET is specified and we can store in it at no extra cost,
2080    we do so, and return TARGET.
2081    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2082    if they are equally easy.
2083
2084    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2085    then *ALT_RTL is set to TARGET (before legitimziation).  */
2086
2087 rtx
2088 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2089                    int unsignedp, rtx target, machine_mode mode,
2090                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2091 {
2092   machine_mode mode1;
2093
2094   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2095   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2096     mode1 = GET_MODE (str_rtx);
2097   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2098     mode1 = GET_MODE (target);
2099   else
2100     mode1 = tmode;
2101
2102   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2103   scalar_int_mode int_mode;
2104   if (bitsize.is_constant (&ibitsize)
2105       && bitnum.is_constant (&ibitnum)
2106       && is_a <scalar_int_mode> (mode1, &int_mode)
2107       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2108                                      int_mode, 0, 0))
2109     {
2110       /* Extraction of a full INT_MODE value can be done with a simple load.
2111          We know here that the field can be accessed with one single
2112          instruction.  For targets that support unaligned memory,
2113          an unaligned access may be necessary.  */
2114       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2115         {
2116           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2117                                                 ibitnum / BITS_PER_UNIT);
2118           if (reverse)
2119             result = flip_storage_order (int_mode, result);
2120           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2121           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2122         }
2123
2124       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2125                                       &ibitnum);
2126       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2127       str_rtx = copy_to_reg (str_rtx);
2128       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2129                                   target, mode, tmode, reverse, true, alt_rtl);
2130     }
2131
2132   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2133                               target, mode, tmode, reverse, true, alt_rtl);
2134 }
2135 \f
2136 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2137    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2138    otherwise OP0 is a BLKmode MEM.
2139
2140    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2141    If REVERSE is true, the extraction is to be done in reverse order.
2142
2143    If TARGET is nonzero, attempts to store the value there
2144    and return TARGET, but this is not guaranteed.
2145    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2146
2147 static rtx
2148 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2149                          opt_scalar_int_mode op0_mode,
2150                          unsigned HOST_WIDE_INT bitsize,
2151                          unsigned HOST_WIDE_INT bitnum, rtx target,
2152                          int unsignedp, bool reverse)
2153 {
2154   scalar_int_mode mode;
2155   if (MEM_P (op0))
2156     {
2157       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2158                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2159         /* The only way this should occur is if the field spans word
2160            boundaries.  */
2161         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2162                                         unsignedp, reverse);
2163
2164       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2165     }
2166   else
2167     mode = op0_mode.require ();
2168
2169   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2170                                     target, unsignedp, reverse);
2171 }
2172
2173 /* Helper function for extract_fixed_bit_field, extracts
2174    the bit field always using MODE, which is the mode of OP0.
2175    The other arguments are as for extract_fixed_bit_field.  */
2176
2177 static rtx
2178 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2179                            unsigned HOST_WIDE_INT bitsize,
2180                            unsigned HOST_WIDE_INT bitnum, rtx target,
2181                            int unsignedp, bool reverse)
2182 {
2183   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2184      for invalid input, such as extract equivalent of f5 from
2185      gcc.dg/pr48335-2.c.  */
2186
2187   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2188     /* BITNUM is the distance between our msb and that of OP0.
2189        Convert it to the distance from the lsb.  */
2190     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2191
2192   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2193      We have reduced the big-endian case to the little-endian case.  */
2194   if (reverse)
2195     op0 = flip_storage_order (mode, op0);
2196
2197   if (unsignedp)
2198     {
2199       if (bitnum)
2200         {
2201           /* If the field does not already start at the lsb,
2202              shift it so it does.  */
2203           /* Maybe propagate the target for the shift.  */
2204           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2205           if (tmode != mode)
2206             subtarget = 0;
2207           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2208         }
2209       /* Convert the value to the desired mode.  TMODE must also be a
2210          scalar integer for this conversion to make sense, since we
2211          shouldn't reinterpret the bits.  */
2212       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2213       if (mode != new_mode)
2214         op0 = convert_to_mode (new_mode, op0, 1);
2215
2216       /* Unless the msb of the field used to be the msb when we shifted,
2217          mask out the upper bits.  */
2218
2219       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2220         return expand_binop (new_mode, and_optab, op0,
2221                              mask_rtx (new_mode, 0, bitsize, 0),
2222                              target, 1, OPTAB_LIB_WIDEN);
2223       return op0;
2224     }
2225
2226   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2227      then arithmetic-shift its lsb to the lsb of the word.  */
2228   op0 = force_reg (mode, op0);
2229
2230   /* Find the narrowest integer mode that contains the field.  */
2231
2232   opt_scalar_int_mode mode_iter;
2233   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2234     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2235       break;
2236
2237   mode = mode_iter.require ();
2238   op0 = convert_to_mode (mode, op0, 0);
2239
2240   if (mode != tmode)
2241     target = 0;
2242
2243   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2244     {
2245       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2246       /* Maybe propagate the target for the shift.  */
2247       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2248       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2249     }
2250
2251   return expand_shift (RSHIFT_EXPR, mode, op0,
2252                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2253 }
2254
2255 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2256    VALUE << BITPOS.  */
2257
2258 static rtx
2259 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2260               int bitpos)
2261 {
2262   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2263 }
2264 \f
2265 /* Extract a bit field that is split across two words
2266    and return an RTX for the result.
2267
2268    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2269    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2270    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2271    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2272    a BLKmode MEM.
2273
2274    If REVERSE is true, the extraction is to be done in reverse order.  */
2275
2276 static rtx
2277 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2278                          unsigned HOST_WIDE_INT bitsize,
2279                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2280                          bool reverse)
2281 {
2282   unsigned int unit;
2283   unsigned int bitsdone = 0;
2284   rtx result = NULL_RTX;
2285   int first = 1;
2286
2287   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2288      much at a time.  */
2289   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2290     unit = BITS_PER_WORD;
2291   else
2292     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2293
2294   while (bitsdone < bitsize)
2295     {
2296       unsigned HOST_WIDE_INT thissize;
2297       rtx part;
2298       unsigned HOST_WIDE_INT thispos;
2299       unsigned HOST_WIDE_INT offset;
2300
2301       offset = (bitpos + bitsdone) / unit;
2302       thispos = (bitpos + bitsdone) % unit;
2303
2304       /* THISSIZE must not overrun a word boundary.  Otherwise,
2305          extract_fixed_bit_field will call us again, and we will mutually
2306          recurse forever.  */
2307       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2308       thissize = MIN (thissize, unit - thispos);
2309
2310       /* If OP0 is a register, then handle OFFSET here.  */
2311       rtx op0_piece = op0;
2312       opt_scalar_int_mode op0_piece_mode = op0_mode;
2313       if (SUBREG_P (op0) || REG_P (op0))
2314         {
2315           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2316           op0_piece_mode = word_mode;
2317           offset = 0;
2318         }
2319
2320       /* Extract the parts in bit-counting order,
2321          whose meaning is determined by BYTES_PER_UNIT.
2322          OFFSET is in UNITs, and UNIT is in bits.  */
2323       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2324                                       thissize, offset * unit + thispos,
2325                                       0, 1, reverse);
2326       bitsdone += thissize;
2327
2328       /* Shift this part into place for the result.  */
2329       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2330         {
2331           if (bitsize != bitsdone)
2332             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2333                                  bitsize - bitsdone, 0, 1);
2334         }
2335       else
2336         {
2337           if (bitsdone != thissize)
2338             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2339                                  bitsdone - thissize, 0, 1);
2340         }
2341
2342       if (first)
2343         result = part;
2344       else
2345         /* Combine the parts with bitwise or.  This works
2346            because we extracted each part as an unsigned bit field.  */
2347         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2348                                OPTAB_LIB_WIDEN);
2349
2350       first = 0;
2351     }
2352
2353   /* Unsigned bit field: we are done.  */
2354   if (unsignedp)
2355     return result;
2356   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2357   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2358                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2359   return expand_shift (RSHIFT_EXPR, word_mode, result,
2360                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2361 }
2362 \f
2363 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2364    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2365    MODE, fill the upper bits with zeros.  Fail if the layout of either
2366    mode is unknown (as for CC modes) or if the extraction would involve
2367    unprofitable mode punning.  Return the value on success, otherwise
2368    return null.
2369
2370    This is different from gen_lowpart* in these respects:
2371
2372      - the returned value must always be considered an rvalue
2373
2374      - when MODE is wider than SRC_MODE, the extraction involves
2375        a zero extension
2376
2377      - when MODE is smaller than SRC_MODE, the extraction involves
2378        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2379
2380    In other words, this routine performs a computation, whereas the
2381    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2382    operations.  */
2383
2384 rtx
2385 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2386 {
2387   scalar_int_mode int_mode, src_int_mode;
2388
2389   if (mode == src_mode)
2390     return src;
2391
2392   if (CONSTANT_P (src))
2393     {
2394       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2395          fails, it will happily create (subreg (symbol_ref)) or similar
2396          invalid SUBREGs.  */
2397       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2398       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2399       if (ret)
2400         return ret;
2401
2402       if (GET_MODE (src) == VOIDmode
2403           || !validate_subreg (mode, src_mode, src, byte))
2404         return NULL_RTX;
2405
2406       src = force_reg (GET_MODE (src), src);
2407       return gen_rtx_SUBREG (mode, src, byte);
2408     }
2409
2410   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2411     return NULL_RTX;
2412
2413   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2414       && targetm.modes_tieable_p (mode, src_mode))
2415     {
2416       rtx x = gen_lowpart_common (mode, src);
2417       if (x)
2418         return x;
2419     }
2420
2421   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2422       || !int_mode_for_mode (mode).exists (&int_mode))
2423     return NULL_RTX;
2424
2425   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2426     return NULL_RTX;
2427   if (!targetm.modes_tieable_p (int_mode, mode))
2428     return NULL_RTX;
2429
2430   src = gen_lowpart (src_int_mode, src);
2431   if (!validate_subreg (int_mode, src_int_mode, src,
2432                         subreg_lowpart_offset (int_mode, src_int_mode)))
2433     return NULL_RTX;
2434
2435   src = convert_modes (int_mode, src_int_mode, src, true);
2436   src = gen_lowpart (mode, src);
2437   return src;
2438 }
2439 \f
2440 /* Add INC into TARGET.  */
2441
2442 void
2443 expand_inc (rtx target, rtx inc)
2444 {
2445   rtx value = expand_binop (GET_MODE (target), add_optab,
2446                             target, inc,
2447                             target, 0, OPTAB_LIB_WIDEN);
2448   if (value != target)
2449     emit_move_insn (target, value);
2450 }
2451
2452 /* Subtract DEC from TARGET.  */
2453
2454 void
2455 expand_dec (rtx target, rtx dec)
2456 {
2457   rtx value = expand_binop (GET_MODE (target), sub_optab,
2458                             target, dec,
2459                             target, 0, OPTAB_LIB_WIDEN);
2460   if (value != target)
2461     emit_move_insn (target, value);
2462 }
2463 \f
2464 /* Output a shift instruction for expression code CODE,
2465    with SHIFTED being the rtx for the value to shift,
2466    and AMOUNT the rtx for the amount to shift by.
2467    Store the result in the rtx TARGET, if that is convenient.
2468    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2469    Return the rtx for where the value is.
2470    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2471    in which case 0 is returned.  */
2472
2473 static rtx
2474 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2475                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2476 {
2477   rtx op1, temp = 0;
2478   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2479   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2480   optab lshift_optab = ashl_optab;
2481   optab rshift_arith_optab = ashr_optab;
2482   optab rshift_uns_optab = lshr_optab;
2483   optab lrotate_optab = rotl_optab;
2484   optab rrotate_optab = rotr_optab;
2485   machine_mode op1_mode;
2486   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2487   int attempt;
2488   bool speed = optimize_insn_for_speed_p ();
2489
2490   op1 = amount;
2491   op1_mode = GET_MODE (op1);
2492
2493   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2494      shift amount is a vector, use the vector/vector shift patterns.  */
2495   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2496     {
2497       lshift_optab = vashl_optab;
2498       rshift_arith_optab = vashr_optab;
2499       rshift_uns_optab = vlshr_optab;
2500       lrotate_optab = vrotl_optab;
2501       rrotate_optab = vrotr_optab;
2502     }
2503
2504   /* Previously detected shift-counts computed by NEGATE_EXPR
2505      and shifted in the other direction; but that does not work
2506      on all machines.  */
2507
2508   if (SHIFT_COUNT_TRUNCATED)
2509     {
2510       if (CONST_INT_P (op1)
2511           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2512               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2513         op1 = gen_int_shift_amount (mode,
2514                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2515                                     % GET_MODE_BITSIZE (scalar_mode));
2516       else if (GET_CODE (op1) == SUBREG
2517                && subreg_lowpart_p (op1)
2518                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2519                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2520         op1 = SUBREG_REG (op1);
2521     }
2522
2523   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2524      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2525      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2526      amount instead.  */
2527   if (rotate
2528       && CONST_INT_P (op1)
2529       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2530                    GET_MODE_BITSIZE (scalar_mode) - 1))
2531     {
2532       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2533                                          - INTVAL (op1)));
2534       left = !left;
2535       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2536     }
2537
2538   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2539      Note that this is not the case for bigger values.  For instance a rotation
2540      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2541      0x04030201 (bswapsi).  */
2542   if (rotate
2543       && CONST_INT_P (op1)
2544       && INTVAL (op1) == BITS_PER_UNIT
2545       && GET_MODE_SIZE (scalar_mode) == 2
2546       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2547     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2548
2549   if (op1 == const0_rtx)
2550     return shifted;
2551
2552   /* Check whether its cheaper to implement a left shift by a constant
2553      bit count by a sequence of additions.  */
2554   if (code == LSHIFT_EXPR
2555       && CONST_INT_P (op1)
2556       && INTVAL (op1) > 0
2557       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2558       && INTVAL (op1) < MAX_BITS_PER_WORD
2559       && (shift_cost (speed, mode, INTVAL (op1))
2560           > INTVAL (op1) * add_cost (speed, mode))
2561       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2562     {
2563       int i;
2564       for (i = 0; i < INTVAL (op1); i++)
2565         {
2566           temp = force_reg (mode, shifted);
2567           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2568                                   unsignedp, OPTAB_LIB_WIDEN);
2569         }
2570       return shifted;
2571     }
2572
2573   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2574     {
2575       enum optab_methods methods;
2576
2577       if (attempt == 0)
2578         methods = OPTAB_DIRECT;
2579       else if (attempt == 1)
2580         methods = OPTAB_WIDEN;
2581       else
2582         methods = OPTAB_LIB_WIDEN;
2583
2584       if (rotate)
2585         {
2586           /* Widening does not work for rotation.  */
2587           if (methods == OPTAB_WIDEN)
2588             continue;
2589           else if (methods == OPTAB_LIB_WIDEN)
2590             {
2591               /* If we have been unable to open-code this by a rotation,
2592                  do it as the IOR of two shifts.  I.e., to rotate A
2593                  by N bits, compute
2594                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2595                  where C is the bitsize of A.
2596
2597                  It is theoretically possible that the target machine might
2598                  not be able to perform either shift and hence we would
2599                  be making two libcalls rather than just the one for the
2600                  shift (similarly if IOR could not be done).  We will allow
2601                  this extremely unlikely lossage to avoid complicating the
2602                  code below.  */
2603
2604               rtx subtarget = target == shifted ? 0 : target;
2605               rtx new_amount, other_amount;
2606               rtx temp1;
2607
2608               new_amount = op1;
2609               if (op1 == const0_rtx)
2610                 return shifted;
2611               else if (CONST_INT_P (op1))
2612                 other_amount = gen_int_shift_amount
2613                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2614               else
2615                 {
2616                   other_amount
2617                     = simplify_gen_unary (NEG, GET_MODE (op1),
2618                                           op1, GET_MODE (op1));
2619                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2620                   other_amount
2621                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2622                                            gen_int_mode (mask, GET_MODE (op1)));
2623                 }
2624
2625               shifted = force_reg (mode, shifted);
2626
2627               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2628                                      mode, shifted, new_amount, 0, 1);
2629               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2630                                       mode, shifted, other_amount,
2631                                       subtarget, 1);
2632               return expand_binop (mode, ior_optab, temp, temp1, target,
2633                                    unsignedp, methods);
2634             }
2635
2636           temp = expand_binop (mode,
2637                                left ? lrotate_optab : rrotate_optab,
2638                                shifted, op1, target, unsignedp, methods);
2639         }
2640       else if (unsignedp)
2641         temp = expand_binop (mode,
2642                              left ? lshift_optab : rshift_uns_optab,
2643                              shifted, op1, target, unsignedp, methods);
2644
2645       /* Do arithmetic shifts.
2646          Also, if we are going to widen the operand, we can just as well
2647          use an arithmetic right-shift instead of a logical one.  */
2648       if (temp == 0 && ! rotate
2649           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2650         {
2651           enum optab_methods methods1 = methods;
2652
2653           /* If trying to widen a log shift to an arithmetic shift,
2654              don't accept an arithmetic shift of the same size.  */
2655           if (unsignedp)
2656             methods1 = OPTAB_MUST_WIDEN;
2657
2658           /* Arithmetic shift */
2659
2660           temp = expand_binop (mode,
2661                                left ? lshift_optab : rshift_arith_optab,
2662                                shifted, op1, target, unsignedp, methods1);
2663         }
2664
2665       /* We used to try extzv here for logical right shifts, but that was
2666          only useful for one machine, the VAX, and caused poor code
2667          generation there for lshrdi3, so the code was deleted and a
2668          define_expand for lshrsi3 was added to vax.md.  */
2669     }
2670
2671   gcc_assert (temp != NULL_RTX || may_fail);
2672   return temp;
2673 }
2674
2675 /* Output a shift instruction for expression code CODE,
2676    with SHIFTED being the rtx for the value to shift,
2677    and AMOUNT the amount to shift by.
2678    Store the result in the rtx TARGET, if that is convenient.
2679    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2680    Return the rtx for where the value is.  */
2681
2682 rtx
2683 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2684               poly_int64 amount, rtx target, int unsignedp)
2685 {
2686   return expand_shift_1 (code, mode, shifted,
2687                          gen_int_shift_amount (mode, amount),
2688                          target, unsignedp);
2689 }
2690
2691 /* Likewise, but return 0 if that cannot be done.  */
2692
2693 static rtx
2694 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2695                     int amount, rtx target, int unsignedp)
2696 {
2697   return expand_shift_1 (code, mode,
2698                          shifted, GEN_INT (amount), target, unsignedp, true);
2699 }
2700
2701 /* Output a shift instruction for expression code CODE,
2702    with SHIFTED being the rtx for the value to shift,
2703    and AMOUNT the tree for the amount to shift by.
2704    Store the result in the rtx TARGET, if that is convenient.
2705    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2706    Return the rtx for where the value is.  */
2707
2708 rtx
2709 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2710                        tree amount, rtx target, int unsignedp)
2711 {
2712   return expand_shift_1 (code, mode,
2713                          shifted, expand_normal (amount), target, unsignedp);
2714 }
2715
2716 \f
2717 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2718                         const struct mult_cost *, machine_mode mode);
2719 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2720                               const struct algorithm *, enum mult_variant);
2721 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2722 static rtx extract_high_half (scalar_int_mode, rtx);
2723 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2724 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2725                                        int, int);
2726 /* Compute and return the best algorithm for multiplying by T.
2727    The algorithm must cost less than cost_limit
2728    If retval.cost >= COST_LIMIT, no algorithm was found and all
2729    other field of the returned struct are undefined.
2730    MODE is the machine mode of the multiplication.  */
2731
2732 static void
2733 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2734             const struct mult_cost *cost_limit, machine_mode mode)
2735 {
2736   int m;
2737   struct algorithm *alg_in, *best_alg;
2738   struct mult_cost best_cost;
2739   struct mult_cost new_limit;
2740   int op_cost, op_latency;
2741   unsigned HOST_WIDE_INT orig_t = t;
2742   unsigned HOST_WIDE_INT q;
2743   int maxm, hash_index;
2744   bool cache_hit = false;
2745   enum alg_code cache_alg = alg_zero;
2746   bool speed = optimize_insn_for_speed_p ();
2747   scalar_int_mode imode;
2748   struct alg_hash_entry *entry_ptr;
2749
2750   /* Indicate that no algorithm is yet found.  If no algorithm
2751      is found, this value will be returned and indicate failure.  */
2752   alg_out->cost.cost = cost_limit->cost + 1;
2753   alg_out->cost.latency = cost_limit->latency + 1;
2754
2755   if (cost_limit->cost < 0
2756       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2757     return;
2758
2759   /* Be prepared for vector modes.  */
2760   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2761
2762   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2763
2764   /* Restrict the bits of "t" to the multiplication's mode.  */
2765   t &= GET_MODE_MASK (imode);
2766
2767   /* t == 1 can be done in zero cost.  */
2768   if (t == 1)
2769     {
2770       alg_out->ops = 1;
2771       alg_out->cost.cost = 0;
2772       alg_out->cost.latency = 0;
2773       alg_out->op[0] = alg_m;
2774       return;
2775     }
2776
2777   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2778      fail now.  */
2779   if (t == 0)
2780     {
2781       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2782         return;
2783       else
2784         {
2785           alg_out->ops = 1;
2786           alg_out->cost.cost = zero_cost (speed);
2787           alg_out->cost.latency = zero_cost (speed);
2788           alg_out->op[0] = alg_zero;
2789           return;
2790         }
2791     }
2792
2793   /* We'll be needing a couple extra algorithm structures now.  */
2794
2795   alg_in = XALLOCA (struct algorithm);
2796   best_alg = XALLOCA (struct algorithm);
2797   best_cost = *cost_limit;
2798
2799   /* Compute the hash index.  */
2800   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2801
2802   /* See if we already know what to do for T.  */
2803   entry_ptr = alg_hash_entry_ptr (hash_index);
2804   if (entry_ptr->t == t
2805       && entry_ptr->mode == mode
2806       && entry_ptr->speed == speed
2807       && entry_ptr->alg != alg_unknown)
2808     {
2809       cache_alg = entry_ptr->alg;
2810
2811       if (cache_alg == alg_impossible)
2812         {
2813           /* The cache tells us that it's impossible to synthesize
2814              multiplication by T within entry_ptr->cost.  */
2815           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2816             /* COST_LIMIT is at least as restrictive as the one
2817                recorded in the hash table, in which case we have no
2818                hope of synthesizing a multiplication.  Just
2819                return.  */
2820             return;
2821
2822           /* If we get here, COST_LIMIT is less restrictive than the
2823              one recorded in the hash table, so we may be able to
2824              synthesize a multiplication.  Proceed as if we didn't
2825              have the cache entry.  */
2826         }
2827       else
2828         {
2829           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2830             /* The cached algorithm shows that this multiplication
2831                requires more cost than COST_LIMIT.  Just return.  This
2832                way, we don't clobber this cache entry with
2833                alg_impossible but retain useful information.  */
2834             return;
2835
2836           cache_hit = true;
2837
2838           switch (cache_alg)
2839             {
2840             case alg_shift:
2841               goto do_alg_shift;
2842
2843             case alg_add_t_m2:
2844             case alg_sub_t_m2:
2845               goto do_alg_addsub_t_m2;
2846
2847             case alg_add_factor:
2848             case alg_sub_factor:
2849               goto do_alg_addsub_factor;
2850
2851             case alg_add_t2_m:
2852               goto do_alg_add_t2_m;
2853
2854             case alg_sub_t2_m:
2855               goto do_alg_sub_t2_m;
2856
2857             default:
2858               gcc_unreachable ();
2859             }
2860         }
2861     }
2862
2863   /* If we have a group of zero bits at the low-order part of T, try
2864      multiplying by the remaining bits and then doing a shift.  */
2865
2866   if ((t & 1) == 0)
2867     {
2868     do_alg_shift:
2869       m = ctz_or_zero (t); /* m = number of low zero bits */
2870       if (m < maxm)
2871         {
2872           q = t >> m;
2873           /* The function expand_shift will choose between a shift and
2874              a sequence of additions, so the observed cost is given as
2875              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2876           op_cost = m * add_cost (speed, mode);
2877           if (shift_cost (speed, mode, m) < op_cost)
2878             op_cost = shift_cost (speed, mode, m);
2879           new_limit.cost = best_cost.cost - op_cost;
2880           new_limit.latency = best_cost.latency - op_cost;
2881           synth_mult (alg_in, q, &new_limit, mode);
2882
2883           alg_in->cost.cost += op_cost;
2884           alg_in->cost.latency += op_cost;
2885           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2886             {
2887               best_cost = alg_in->cost;
2888               std::swap (alg_in, best_alg);
2889               best_alg->log[best_alg->ops] = m;
2890               best_alg->op[best_alg->ops] = alg_shift;
2891             }
2892
2893           /* See if treating ORIG_T as a signed number yields a better
2894              sequence.  Try this sequence only for a negative ORIG_T
2895              as it would be useless for a non-negative ORIG_T.  */
2896           if ((HOST_WIDE_INT) orig_t < 0)
2897             {
2898               /* Shift ORIG_T as follows because a right shift of a
2899                  negative-valued signed type is implementation
2900                  defined.  */
2901               q = ~(~orig_t >> m);
2902               /* The function expand_shift will choose between a shift
2903                  and a sequence of additions, so the observed cost is
2904                  given as MIN (m * add_cost(speed, mode),
2905                  shift_cost(speed, mode, m)).  */
2906               op_cost = m * add_cost (speed, mode);
2907               if (shift_cost (speed, mode, m) < op_cost)
2908                 op_cost = shift_cost (speed, mode, m);
2909               new_limit.cost = best_cost.cost - op_cost;
2910               new_limit.latency = best_cost.latency - op_cost;
2911               synth_mult (alg_in, q, &new_limit, mode);
2912
2913               alg_in->cost.cost += op_cost;
2914               alg_in->cost.latency += op_cost;
2915               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2916                 {
2917                   best_cost = alg_in->cost;
2918                   std::swap (alg_in, best_alg);
2919                   best_alg->log[best_alg->ops] = m;
2920                   best_alg->op[best_alg->ops] = alg_shift;
2921                 }
2922             }
2923         }
2924       if (cache_hit)
2925         goto done;
2926     }
2927
2928   /* If we have an odd number, add or subtract one.  */
2929   if ((t & 1) != 0)
2930     {
2931       unsigned HOST_WIDE_INT w;
2932
2933     do_alg_addsub_t_m2:
2934       for (w = 1; (w & t) != 0; w <<= 1)
2935         ;
2936       /* If T was -1, then W will be zero after the loop.  This is another
2937          case where T ends with ...111.  Handling this with (T + 1) and
2938          subtract 1 produces slightly better code and results in algorithm
2939          selection much faster than treating it like the ...0111 case
2940          below.  */
2941       if (w == 0
2942           || (w > 2
2943               /* Reject the case where t is 3.
2944                  Thus we prefer addition in that case.  */
2945               && t != 3))
2946         {
2947           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2948
2949           op_cost = add_cost (speed, mode);
2950           new_limit.cost = best_cost.cost - op_cost;
2951           new_limit.latency = best_cost.latency - op_cost;
2952           synth_mult (alg_in, t + 1, &new_limit, mode);
2953
2954           alg_in->cost.cost += op_cost;
2955           alg_in->cost.latency += op_cost;
2956           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2957             {
2958               best_cost = alg_in->cost;
2959               std::swap (alg_in, best_alg);
2960               best_alg->log[best_alg->ops] = 0;
2961               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2962             }
2963         }
2964       else
2965         {
2966           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2967
2968           op_cost = add_cost (speed, mode);
2969           new_limit.cost = best_cost.cost - op_cost;
2970           new_limit.latency = best_cost.latency - op_cost;
2971           synth_mult (alg_in, t - 1, &new_limit, mode);
2972
2973           alg_in->cost.cost += op_cost;
2974           alg_in->cost.latency += op_cost;
2975           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2976             {
2977               best_cost = alg_in->cost;
2978               std::swap (alg_in, best_alg);
2979               best_alg->log[best_alg->ops] = 0;
2980               best_alg->op[best_alg->ops] = alg_add_t_m2;
2981             }
2982         }
2983
2984       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2985          quickly with a - a * n for some appropriate constant n.  */
2986       m = exact_log2 (-orig_t + 1);
2987       if (m >= 0 && m < maxm)
2988         {
2989           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2990           /* If the target has a cheap shift-and-subtract insn use
2991              that in preference to a shift insn followed by a sub insn.
2992              Assume that the shift-and-sub is "atomic" with a latency
2993              equal to it's cost, otherwise assume that on superscalar
2994              hardware the shift may be executed concurrently with the
2995              earlier steps in the algorithm.  */
2996           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2997             {
2998               op_cost = shiftsub1_cost (speed, mode, m);
2999               op_latency = op_cost;
3000             }
3001           else
3002             op_latency = add_cost (speed, mode);
3003
3004           new_limit.cost = best_cost.cost - op_cost;
3005           new_limit.latency = best_cost.latency - op_latency;
3006           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3007                       &new_limit, mode);
3008
3009           alg_in->cost.cost += op_cost;
3010           alg_in->cost.latency += op_latency;
3011           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3012             {
3013               best_cost = alg_in->cost;
3014               std::swap (alg_in, best_alg);
3015               best_alg->log[best_alg->ops] = m;
3016               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3017             }
3018         }
3019
3020       if (cache_hit)
3021         goto done;
3022     }
3023
3024   /* Look for factors of t of the form
3025      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3026      If we find such a factor, we can multiply by t using an algorithm that
3027      multiplies by q, shift the result by m and add/subtract it to itself.
3028
3029      We search for large factors first and loop down, even if large factors
3030      are less probable than small; if we find a large factor we will find a
3031      good sequence quickly, and therefore be able to prune (by decreasing
3032      COST_LIMIT) the search.  */
3033
3034  do_alg_addsub_factor:
3035   for (m = floor_log2 (t - 1); m >= 2; m--)
3036     {
3037       unsigned HOST_WIDE_INT d;
3038
3039       d = (HOST_WIDE_INT_1U << m) + 1;
3040       if (t % d == 0 && t > d && m < maxm
3041           && (!cache_hit || cache_alg == alg_add_factor))
3042         {
3043           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3044           if (shiftadd_cost (speed, mode, m) <= op_cost)
3045             op_cost = shiftadd_cost (speed, mode, m);
3046
3047           op_latency = op_cost;
3048
3049
3050           new_limit.cost = best_cost.cost - op_cost;
3051           new_limit.latency = best_cost.latency - op_latency;
3052           synth_mult (alg_in, t / d, &new_limit, mode);
3053
3054           alg_in->cost.cost += op_cost;
3055           alg_in->cost.latency += op_latency;
3056           if (alg_in->cost.latency < op_cost)
3057             alg_in->cost.latency = op_cost;
3058           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3059             {
3060               best_cost = alg_in->cost;
3061               std::swap (alg_in, best_alg);
3062               best_alg->log[best_alg->ops] = m;
3063               best_alg->op[best_alg->ops] = alg_add_factor;
3064             }
3065           /* Other factors will have been taken care of in the recursion.  */
3066           break;
3067         }
3068
3069       d = (HOST_WIDE_INT_1U << m) - 1;
3070       if (t % d == 0 && t > d && m < maxm
3071           && (!cache_hit || cache_alg == alg_sub_factor))
3072         {
3073           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3074           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3075             op_cost = shiftsub0_cost (speed, mode, m);
3076
3077           op_latency = op_cost;
3078
3079           new_limit.cost = best_cost.cost - op_cost;
3080           new_limit.latency = best_cost.latency - op_latency;
3081           synth_mult (alg_in, t / d, &new_limit, mode);
3082
3083           alg_in->cost.cost += op_cost;
3084           alg_in->cost.latency += op_latency;
3085           if (alg_in->cost.latency < op_cost)
3086             alg_in->cost.latency = op_cost;
3087           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3088             {
3089               best_cost = alg_in->cost;
3090               std::swap (alg_in, best_alg);
3091               best_alg->log[best_alg->ops] = m;
3092               best_alg->op[best_alg->ops] = alg_sub_factor;
3093             }
3094           break;
3095         }
3096     }
3097   if (cache_hit)
3098     goto done;
3099
3100   /* Try shift-and-add (load effective address) instructions,
3101      i.e. do a*3, a*5, a*9.  */
3102   if ((t & 1) != 0)
3103     {
3104     do_alg_add_t2_m:
3105       q = t - 1;
3106       m = ctz_hwi (q);
3107       if (q && m < maxm)
3108         {
3109           op_cost = shiftadd_cost (speed, mode, m);
3110           new_limit.cost = best_cost.cost - op_cost;
3111           new_limit.latency = best_cost.latency - op_cost;
3112           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3113
3114           alg_in->cost.cost += op_cost;
3115           alg_in->cost.latency += op_cost;
3116           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3117             {
3118               best_cost = alg_in->cost;
3119               std::swap (alg_in, best_alg);
3120               best_alg->log[best_alg->ops] = m;
3121               best_alg->op[best_alg->ops] = alg_add_t2_m;
3122             }
3123         }
3124       if (cache_hit)
3125         goto done;
3126
3127     do_alg_sub_t2_m:
3128       q = t + 1;
3129       m = ctz_hwi (q);
3130       if (q && m < maxm)
3131         {
3132           op_cost = shiftsub0_cost (speed, mode, m);
3133           new_limit.cost = best_cost.cost - op_cost;
3134           new_limit.latency = best_cost.latency - op_cost;
3135           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3136
3137           alg_in->cost.cost += op_cost;
3138           alg_in->cost.latency += op_cost;
3139           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3140             {
3141               best_cost = alg_in->cost;
3142               std::swap (alg_in, best_alg);
3143               best_alg->log[best_alg->ops] = m;
3144               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3145             }
3146         }
3147       if (cache_hit)
3148         goto done;
3149     }
3150
3151  done:
3152   /* If best_cost has not decreased, we have not found any algorithm.  */
3153   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3154     {
3155       /* We failed to find an algorithm.  Record alg_impossible for
3156          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3157          we are asked to find an algorithm for T within the same or
3158          lower COST_LIMIT, we can immediately return to the
3159          caller.  */
3160       entry_ptr->t = t;
3161       entry_ptr->mode = mode;
3162       entry_ptr->speed = speed;
3163       entry_ptr->alg = alg_impossible;
3164       entry_ptr->cost = *cost_limit;
3165       return;
3166     }
3167
3168   /* Cache the result.  */
3169   if (!cache_hit)
3170     {
3171       entry_ptr->t = t;
3172       entry_ptr->mode = mode;
3173       entry_ptr->speed = speed;
3174       entry_ptr->alg = best_alg->op[best_alg->ops];
3175       entry_ptr->cost.cost = best_cost.cost;
3176       entry_ptr->cost.latency = best_cost.latency;
3177     }
3178
3179   /* If we are getting a too long sequence for `struct algorithm'
3180      to record, make this search fail.  */
3181   if (best_alg->ops == MAX_BITS_PER_WORD)
3182     return;
3183
3184   /* Copy the algorithm from temporary space to the space at alg_out.
3185      We avoid using structure assignment because the majority of
3186      best_alg is normally undefined, and this is a critical function.  */
3187   alg_out->ops = best_alg->ops + 1;
3188   alg_out->cost = best_cost;
3189   memcpy (alg_out->op, best_alg->op,
3190           alg_out->ops * sizeof *alg_out->op);
3191   memcpy (alg_out->log, best_alg->log,
3192           alg_out->ops * sizeof *alg_out->log);
3193 }
3194 \f
3195 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3196    Try three variations:
3197
3198        - a shift/add sequence based on VAL itself
3199        - a shift/add sequence based on -VAL, followed by a negation
3200        - a shift/add sequence based on VAL - 1, followed by an addition.
3201
3202    Return true if the cheapest of these cost less than MULT_COST,
3203    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3204
3205 bool
3206 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3207                      struct algorithm *alg, enum mult_variant *variant,
3208                      int mult_cost)
3209 {
3210   struct algorithm alg2;
3211   struct mult_cost limit;
3212   int op_cost;
3213   bool speed = optimize_insn_for_speed_p ();
3214
3215   /* Fail quickly for impossible bounds.  */
3216   if (mult_cost < 0)
3217     return false;
3218
3219   /* Ensure that mult_cost provides a reasonable upper bound.
3220      Any constant multiplication can be performed with less
3221      than 2 * bits additions.  */
3222   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3223   if (mult_cost > op_cost)
3224     mult_cost = op_cost;
3225
3226   *variant = basic_variant;
3227   limit.cost = mult_cost;
3228   limit.latency = mult_cost;
3229   synth_mult (alg, val, &limit, mode);
3230
3231   /* This works only if the inverted value actually fits in an
3232      `unsigned int' */
3233   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3234     {
3235       op_cost = neg_cost (speed, mode);
3236       if (MULT_COST_LESS (&alg->cost, mult_cost))
3237         {
3238           limit.cost = alg->cost.cost - op_cost;
3239           limit.latency = alg->cost.latency - op_cost;
3240         }
3241       else
3242         {
3243           limit.cost = mult_cost - op_cost;
3244           limit.latency = mult_cost - op_cost;
3245         }
3246
3247       synth_mult (&alg2, -val, &limit, mode);
3248       alg2.cost.cost += op_cost;
3249       alg2.cost.latency += op_cost;
3250       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3251         *alg = alg2, *variant = negate_variant;
3252     }
3253
3254   /* This proves very useful for division-by-constant.  */
3255   op_cost = add_cost (speed, mode);
3256   if (MULT_COST_LESS (&alg->cost, mult_cost))
3257     {
3258       limit.cost = alg->cost.cost - op_cost;
3259       limit.latency = alg->cost.latency - op_cost;
3260     }
3261   else
3262     {
3263       limit.cost = mult_cost - op_cost;
3264       limit.latency = mult_cost - op_cost;
3265     }
3266
3267   synth_mult (&alg2, val - 1, &limit, mode);
3268   alg2.cost.cost += op_cost;
3269   alg2.cost.latency += op_cost;
3270   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3271     *alg = alg2, *variant = add_variant;
3272
3273   return MULT_COST_LESS (&alg->cost, mult_cost);
3274 }
3275
3276 /* A subroutine of expand_mult, used for constant multiplications.
3277    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3278    convenient.  Use the shift/add sequence described by ALG and apply
3279    the final fixup specified by VARIANT.  */
3280
3281 static rtx
3282 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3283                    rtx target, const struct algorithm *alg,
3284                    enum mult_variant variant)
3285 {
3286   unsigned HOST_WIDE_INT val_so_far;
3287   rtx_insn *insn;
3288   rtx accum, tem;
3289   int opno;
3290   machine_mode nmode;
3291
3292   /* Avoid referencing memory over and over and invalid sharing
3293      on SUBREGs.  */
3294   op0 = force_reg (mode, op0);
3295
3296   /* ACCUM starts out either as OP0 or as a zero, depending on
3297      the first operation.  */
3298
3299   if (alg->op[0] == alg_zero)
3300     {
3301       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3302       val_so_far = 0;
3303     }
3304   else if (alg->op[0] == alg_m)
3305     {
3306       accum = copy_to_mode_reg (mode, op0);
3307       val_so_far = 1;
3308     }
3309   else
3310     gcc_unreachable ();
3311
3312   for (opno = 1; opno < alg->ops; opno++)
3313     {
3314       int log = alg->log[opno];
3315       rtx shift_subtarget = optimize ? 0 : accum;
3316       rtx add_target
3317         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3318            && !optimize)
3319           ? target : 0;
3320       rtx accum_target = optimize ? 0 : accum;
3321       rtx accum_inner;
3322
3323       switch (alg->op[opno])
3324         {
3325         case alg_shift:
3326           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3327           /* REG_EQUAL note will be attached to the following insn.  */
3328           emit_move_insn (accum, tem);
3329           val_so_far <<= log;
3330           break;
3331
3332         case alg_add_t_m2:
3333           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3334           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3335                                  add_target ? add_target : accum_target);
3336           val_so_far += HOST_WIDE_INT_1U << log;
3337           break;
3338
3339         case alg_sub_t_m2:
3340           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3341           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3342                                  add_target ? add_target : accum_target);
3343           val_so_far -= HOST_WIDE_INT_1U << log;
3344           break;
3345
3346         case alg_add_t2_m:
3347           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3348                                 log, shift_subtarget, 0);
3349           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3350                                  add_target ? add_target : accum_target);
3351           val_so_far = (val_so_far << log) + 1;
3352           break;
3353
3354         case alg_sub_t2_m:
3355           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3356                                 log, shift_subtarget, 0);
3357           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3358                                  add_target ? add_target : accum_target);
3359           val_so_far = (val_so_far << log) - 1;
3360           break;
3361
3362         case alg_add_factor:
3363           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3364           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3365                                  add_target ? add_target : accum_target);
3366           val_so_far += val_so_far << log;
3367           break;
3368
3369         case alg_sub_factor:
3370           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3371           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3372                                  (add_target
3373                                   ? add_target : (optimize ? 0 : tem)));
3374           val_so_far = (val_so_far << log) - val_so_far;
3375           break;
3376
3377         default:
3378           gcc_unreachable ();
3379         }
3380
3381       if (SCALAR_INT_MODE_P (mode))
3382         {
3383           /* Write a REG_EQUAL note on the last insn so that we can cse
3384              multiplication sequences.  Note that if ACCUM is a SUBREG,
3385              we've set the inner register and must properly indicate that.  */
3386           tem = op0, nmode = mode;
3387           accum_inner = accum;
3388           if (GET_CODE (accum) == SUBREG)
3389             {
3390               accum_inner = SUBREG_REG (accum);
3391               nmode = GET_MODE (accum_inner);
3392               tem = gen_lowpart (nmode, op0);
3393             }
3394
3395           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3396              In that case, only the low bits of accum would be guaranteed to
3397              be equal to the content of the REG_EQUAL note, the upper bits
3398              can be anything.  */
3399           if (!paradoxical_subreg_p (tem))
3400             {
3401               insn = get_last_insn ();
3402               wide_int wval_so_far
3403                 = wi::uhwi (val_so_far,
3404                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3405               rtx c = immed_wide_int_const (wval_so_far, nmode);
3406               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3407                                 accum_inner);
3408             }
3409         }
3410     }
3411
3412   if (variant == negate_variant)
3413     {
3414       val_so_far = -val_so_far;
3415       accum = expand_unop (mode, neg_optab, accum, target, 0);
3416     }
3417   else if (variant == add_variant)
3418     {
3419       val_so_far = val_so_far + 1;
3420       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3421     }
3422
3423   /* Compare only the bits of val and val_so_far that are significant
3424      in the result mode, to avoid sign-/zero-extension confusion.  */
3425   nmode = GET_MODE_INNER (mode);
3426   val &= GET_MODE_MASK (nmode);
3427   val_so_far &= GET_MODE_MASK (nmode);
3428   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3429
3430   return accum;
3431 }
3432
3433 /* Perform a multiplication and return an rtx for the result.
3434    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3435    TARGET is a suggestion for where to store the result (an rtx).
3436
3437    We check specially for a constant integer as OP1.
3438    If you want this check for OP0 as well, then before calling
3439    you should swap the two operands if OP0 would be constant.  */
3440
3441 rtx
3442 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3443              int unsignedp, bool no_libcall)
3444 {
3445   enum mult_variant variant;
3446   struct algorithm algorithm;
3447   rtx scalar_op1;
3448   int max_cost;
3449   bool speed = optimize_insn_for_speed_p ();
3450   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3451
3452   if (CONSTANT_P (op0))
3453     std::swap (op0, op1);
3454
3455   /* For vectors, there are several simplifications that can be made if
3456      all elements of the vector constant are identical.  */
3457   scalar_op1 = unwrap_const_vec_duplicate (op1);
3458
3459   if (INTEGRAL_MODE_P (mode))
3460     {
3461       rtx fake_reg;
3462       HOST_WIDE_INT coeff;
3463       bool is_neg;
3464       int mode_bitsize;
3465
3466       if (op1 == CONST0_RTX (mode))
3467         return op1;
3468       if (op1 == CONST1_RTX (mode))
3469         return op0;
3470       if (op1 == CONSTM1_RTX (mode))
3471         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3472                             op0, target, 0);
3473
3474       if (do_trapv)
3475         goto skip_synth;
3476
3477       /* If mode is integer vector mode, check if the backend supports
3478          vector lshift (by scalar or vector) at all.  If not, we can't use
3479          synthetized multiply.  */
3480       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3481           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3482           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3483         goto skip_synth;
3484
3485       /* These are the operations that are potentially turned into
3486          a sequence of shifts and additions.  */
3487       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3488
3489       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3490          less than or equal in size to `unsigned int' this doesn't matter.
3491          If the mode is larger than `unsigned int', then synth_mult works
3492          only if the constant value exactly fits in an `unsigned int' without
3493          any truncation.  This means that multiplying by negative values does
3494          not work; results are off by 2^32 on a 32 bit machine.  */
3495       if (CONST_INT_P (scalar_op1))
3496         {
3497           coeff = INTVAL (scalar_op1);
3498           is_neg = coeff < 0;
3499         }
3500 #if TARGET_SUPPORTS_WIDE_INT
3501       else if (CONST_WIDE_INT_P (scalar_op1))
3502 #else
3503       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3504 #endif
3505         {
3506           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3507           /* Perfect power of 2 (other than 1, which is handled above).  */
3508           if (shift > 0)
3509             return expand_shift (LSHIFT_EXPR, mode, op0,
3510                                  shift, target, unsignedp);
3511           else
3512             goto skip_synth;
3513         }
3514       else
3515         goto skip_synth;
3516
3517       /* We used to test optimize here, on the grounds that it's better to
3518          produce a smaller program when -O is not used.  But this causes
3519          such a terrible slowdown sometimes that it seems better to always
3520          use synth_mult.  */
3521
3522       /* Special case powers of two.  */
3523       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3524           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3525         return expand_shift (LSHIFT_EXPR, mode, op0,
3526                              floor_log2 (coeff), target, unsignedp);
3527
3528       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3529
3530       /* Attempt to handle multiplication of DImode values by negative
3531          coefficients, by performing the multiplication by a positive
3532          multiplier and then inverting the result.  */
3533       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3534         {
3535           /* Its safe to use -coeff even for INT_MIN, as the
3536              result is interpreted as an unsigned coefficient.
3537              Exclude cost of op0 from max_cost to match the cost
3538              calculation of the synth_mult.  */
3539           coeff = -(unsigned HOST_WIDE_INT) coeff;
3540           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3541                                     mode, speed)
3542                       - neg_cost (speed, mode));
3543           if (max_cost <= 0)
3544             goto skip_synth;
3545
3546           /* Special case powers of two.  */
3547           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3548             {
3549               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3550                                        floor_log2 (coeff), target, unsignedp);
3551               return expand_unop (mode, neg_optab, temp, target, 0);
3552             }
3553
3554           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3555                                    max_cost))
3556             {
3557               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3558                                             &algorithm, variant);
3559               return expand_unop (mode, neg_optab, temp, target, 0);
3560             }
3561           goto skip_synth;
3562         }
3563
3564       /* Exclude cost of op0 from max_cost to match the cost
3565          calculation of the synth_mult.  */
3566       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3567       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3568         return expand_mult_const (mode, op0, coeff, target,
3569                                   &algorithm, variant);
3570     }
3571  skip_synth:
3572
3573   /* Expand x*2.0 as x+x.  */
3574   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3575       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3576     {
3577       op0 = force_reg (GET_MODE (op0), op0);
3578       return expand_binop (mode, add_optab, op0, op0,
3579                            target, unsignedp,
3580                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3581     }
3582
3583   /* This used to use umul_optab if unsigned, but for non-widening multiply
3584      there is no difference between signed and unsigned.  */
3585   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3586                       op0, op1, target, unsignedp,
3587                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3588   gcc_assert (op0 || no_libcall);
3589   return op0;
3590 }
3591
3592 /* Return a cost estimate for multiplying a register by the given
3593    COEFFicient in the given MODE and SPEED.  */
3594
3595 int
3596 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3597 {
3598   int max_cost;
3599   struct algorithm algorithm;
3600   enum mult_variant variant;
3601
3602   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3603   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3604                            mode, speed);
3605   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3606     return algorithm.cost.cost;
3607   else
3608     return max_cost;
3609 }
3610
3611 /* Perform a widening multiplication and return an rtx for the result.
3612    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3613    TARGET is a suggestion for where to store the result (an rtx).
3614    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3615    or smul_widen_optab.
3616
3617    We check specially for a constant integer as OP1, comparing the
3618    cost of a widening multiply against the cost of a sequence of shifts
3619    and adds.  */
3620
3621 rtx
3622 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3623                       int unsignedp, optab this_optab)
3624 {
3625   bool speed = optimize_insn_for_speed_p ();
3626   rtx cop1;
3627
3628   if (CONST_INT_P (op1)
3629       && GET_MODE (op0) != VOIDmode
3630       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3631                                 this_optab == umul_widen_optab))
3632       && CONST_INT_P (cop1)
3633       && (INTVAL (cop1) >= 0
3634           || HWI_COMPUTABLE_MODE_P (mode)))
3635     {
3636       HOST_WIDE_INT coeff = INTVAL (cop1);
3637       int max_cost;
3638       enum mult_variant variant;
3639       struct algorithm algorithm;
3640
3641       if (coeff == 0)
3642         return CONST0_RTX (mode);
3643
3644       /* Special case powers of two.  */
3645       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3646         {
3647           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3648           return expand_shift (LSHIFT_EXPR, mode, op0,
3649                                floor_log2 (coeff), target, unsignedp);
3650         }
3651
3652       /* Exclude cost of op0 from max_cost to match the cost
3653          calculation of the synth_mult.  */
3654       max_cost = mul_widen_cost (speed, mode);
3655       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3656                                max_cost))
3657         {
3658           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3659           return expand_mult_const (mode, op0, coeff, target,
3660                                     &algorithm, variant);
3661         }
3662     }
3663   return expand_binop (mode, this_optab, op0, op1, target,
3664                        unsignedp, OPTAB_LIB_WIDEN);
3665 }
3666 \f
3667 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3668    replace division by D, and put the least significant N bits of the result
3669    in *MULTIPLIER_PTR and return the most significant bit.
3670
3671    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3672    needed precision is in PRECISION (should be <= N).
3673
3674    PRECISION should be as small as possible so this function can choose
3675    multiplier more freely.
3676
3677    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3678    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3679
3680    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3681    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3682
3683 unsigned HOST_WIDE_INT
3684 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3685                    unsigned HOST_WIDE_INT *multiplier_ptr,
3686                    int *post_shift_ptr, int *lgup_ptr)
3687 {
3688   int lgup, post_shift;
3689   int pow, pow2;
3690
3691   /* lgup = ceil(log2(divisor)); */
3692   lgup = ceil_log2 (d);
3693
3694   gcc_assert (lgup <= n);
3695
3696   pow = n + lgup;
3697   pow2 = n + lgup - precision;
3698
3699   /* mlow = 2^(N + lgup)/d */
3700   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3701   wide_int mlow = wi::udiv_trunc (val, d);
3702
3703   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3704   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3705   wide_int mhigh = wi::udiv_trunc (val, d);
3706
3707   /* If precision == N, then mlow, mhigh exceed 2^N
3708      (but they do not exceed 2^(N+1)).  */
3709
3710   /* Reduce to lowest terms.  */
3711   for (post_shift = lgup; post_shift > 0; post_shift--)
3712     {
3713       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3714                                                        HOST_BITS_PER_WIDE_INT);
3715       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3716                                                        HOST_BITS_PER_WIDE_INT);
3717       if (ml_lo >= mh_lo)
3718         break;
3719
3720       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3721       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3722     }
3723
3724   *post_shift_ptr = post_shift;
3725   *lgup_ptr = lgup;
3726   if (n < HOST_BITS_PER_WIDE_INT)
3727     {
3728       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3729       *multiplier_ptr = mhigh.to_uhwi () & mask;
3730       return mhigh.to_uhwi () > mask;
3731     }
3732   else
3733     {
3734       *multiplier_ptr = mhigh.to_uhwi ();
3735       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3736     }
3737 }
3738
3739 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3740    congruent to 1 (mod 2**N).  */
3741
3742 static unsigned HOST_WIDE_INT
3743 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3744 {
3745   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3746
3747   /* The algorithm notes that the choice y = x satisfies
3748      x*y == 1 mod 2^3, since x is assumed odd.
3749      Each iteration doubles the number of bits of significance in y.  */
3750
3751   unsigned HOST_WIDE_INT mask;
3752   unsigned HOST_WIDE_INT y = x;
3753   int nbit = 3;
3754
3755   mask = (n == HOST_BITS_PER_WIDE_INT
3756           ? HOST_WIDE_INT_M1U
3757           : (HOST_WIDE_INT_1U << n) - 1);
3758
3759   while (nbit < n)
3760     {
3761       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3762       nbit *= 2;
3763     }
3764   return y;
3765 }
3766
3767 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3768    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3769    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3770    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3771    become signed.
3772
3773    The result is put in TARGET if that is convenient.
3774
3775    MODE is the mode of operation.  */
3776
3777 rtx
3778 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3779                              rtx op1, rtx target, int unsignedp)
3780 {
3781   rtx tem;
3782   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3783
3784   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3785                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3786   tem = expand_and (mode, tem, op1, NULL_RTX);
3787   adj_operand
3788     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3789                      adj_operand);
3790
3791   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3792                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3793   tem = expand_and (mode, tem, op0, NULL_RTX);
3794   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3795                           target);
3796
3797   return target;
3798 }
3799
3800 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3801
3802 static rtx
3803 extract_high_half (scalar_int_mode mode, rtx op)
3804 {
3805   if (mode == word_mode)
3806     return gen_highpart (mode, op);
3807
3808   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3809
3810   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3811                      GET_MODE_BITSIZE (mode), 0, 1);
3812   return convert_modes (mode, wider_mode, op, 0);
3813 }
3814
3815 /* Like expmed_mult_highpart, but only consider using a multiplication
3816    optab.  OP1 is an rtx for the constant operand.  */
3817
3818 static rtx
3819 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3820                             rtx target, int unsignedp, int max_cost)
3821 {
3822   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3823   optab moptab;
3824   rtx tem;
3825   int size;
3826   bool speed = optimize_insn_for_speed_p ();
3827
3828   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3829
3830   size = GET_MODE_BITSIZE (mode);
3831
3832   /* Firstly, try using a multiplication insn that only generates the needed
3833      high part of the product, and in the sign flavor of unsignedp.  */
3834   if (mul_highpart_cost (speed, mode) < max_cost)
3835     {
3836       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3837       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3838                           unsignedp, OPTAB_DIRECT);
3839       if (tem)
3840         return tem;
3841     }
3842
3843   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3844      Need to adjust the result after the multiplication.  */
3845   if (size - 1 < BITS_PER_WORD
3846       && (mul_highpart_cost (speed, mode)
3847           + 2 * shift_cost (speed, mode, size-1)
3848           + 4 * add_cost (speed, mode) < max_cost))
3849     {
3850       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3851       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3852                           unsignedp, OPTAB_DIRECT);
3853       if (tem)
3854         /* We used the wrong signedness.  Adjust the result.  */
3855         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3856                                             tem, unsignedp);
3857     }
3858
3859   /* Try widening multiplication.  */
3860   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3861   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3862       && mul_widen_cost (speed, wider_mode) < max_cost)
3863     {
3864       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3865                           unsignedp, OPTAB_WIDEN);
3866       if (tem)
3867         return extract_high_half (mode, tem);
3868     }
3869
3870   /* Try widening the mode and perform a non-widening multiplication.  */
3871   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3872       && size - 1 < BITS_PER_WORD
3873       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3874           < max_cost))
3875     {
3876       rtx_insn *insns;
3877       rtx wop0, wop1;
3878
3879       /* We need to widen the operands, for example to ensure the
3880          constant multiplier is correctly sign or zero extended.
3881          Use a sequence to clean-up any instructions emitted by
3882          the conversions if things don't work out.  */
3883       start_sequence ();
3884       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3885       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3886       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3887                           unsignedp, OPTAB_WIDEN);
3888       insns = get_insns ();
3889       end_sequence ();
3890
3891       if (tem)
3892         {
3893           emit_insn (insns);
3894           return extract_high_half (mode, tem);
3895         }
3896     }
3897
3898   /* Try widening multiplication of opposite signedness, and adjust.  */
3899   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3900   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3901       && size - 1 < BITS_PER_WORD
3902       && (mul_widen_cost (speed, wider_mode)
3903           + 2 * shift_cost (speed, mode, size-1)
3904           + 4 * add_cost (speed, mode) < max_cost))
3905     {
3906       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3907                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3908       if (tem != 0)
3909         {
3910           tem = extract_high_half (mode, tem);
3911           /* We used the wrong signedness.  Adjust the result.  */
3912           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3913                                               target, unsignedp);
3914         }
3915     }
3916
3917   return 0;
3918 }
3919
3920 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3921    putting the high half of the result in TARGET if that is convenient,
3922    and return where the result is.  If the operation cannot be performed,
3923    0 is returned.
3924
3925    MODE is the mode of operation and result.
3926
3927    UNSIGNEDP nonzero means unsigned multiply.
3928
3929    MAX_COST is the total allowed cost for the expanded RTL.  */
3930
3931 static rtx
3932 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3933                       rtx target, int unsignedp, int max_cost)
3934 {
3935   unsigned HOST_WIDE_INT cnst1;
3936   int extra_cost;
3937   bool sign_adjust = false;
3938   enum mult_variant variant;
3939   struct algorithm alg;
3940   rtx tem;
3941   bool speed = optimize_insn_for_speed_p ();
3942
3943   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3944   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3945
3946   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3947
3948   /* We can't optimize modes wider than BITS_PER_WORD.
3949      ??? We might be able to perform double-word arithmetic if
3950      mode == word_mode, however all the cost calculations in
3951      synth_mult etc. assume single-word operations.  */
3952   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3953   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3954     return expmed_mult_highpart_optab (mode, op0, op1, target,
3955                                        unsignedp, max_cost);
3956
3957   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3958
3959   /* Check whether we try to multiply by a negative constant.  */
3960   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3961     {
3962       sign_adjust = true;
3963       extra_cost += add_cost (speed, mode);
3964     }
3965
3966   /* See whether shift/add multiplication is cheap enough.  */
3967   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3968                            max_cost - extra_cost))
3969     {
3970       /* See whether the specialized multiplication optabs are
3971          cheaper than the shift/add version.  */
3972       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3973                                         alg.cost.cost + extra_cost);
3974       if (tem)
3975         return tem;
3976
3977       tem = convert_to_mode (wider_mode, op0, unsignedp);
3978       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3979       tem = extract_high_half (mode, tem);
3980
3981       /* Adjust result for signedness.  */
3982       if (sign_adjust)
3983         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3984
3985       return tem;
3986     }
3987   return expmed_mult_highpart_optab (mode, op0, op1, target,
3988                                      unsignedp, max_cost);
3989 }
3990
3991
3992 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3993
3994 static rtx
3995 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3996 {
3997   rtx result, temp, shift;
3998   rtx_code_label *label;
3999   int logd;
4000   int prec = GET_MODE_PRECISION (mode);
4001
4002   logd = floor_log2 (d);
4003   result = gen_reg_rtx (mode);
4004
4005   /* Avoid conditional branches when they're expensive.  */
4006   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4007       && optimize_insn_for_speed_p ())
4008     {
4009       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4010                                       mode, 0, -1);
4011       if (signmask)
4012         {
4013           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4014           signmask = force_reg (mode, signmask);
4015           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4016
4017           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4018              which instruction sequence to use.  If logical right shifts
4019              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4020              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4021
4022           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4023           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4024               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4025                   > COSTS_N_INSNS (2)))
4026             {
4027               temp = expand_binop (mode, xor_optab, op0, signmask,
4028                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4029               temp = expand_binop (mode, sub_optab, temp, signmask,
4030                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4031               temp = expand_binop (mode, and_optab, temp,
4032                                    gen_int_mode (masklow, mode),
4033                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4034               temp = expand_binop (mode, xor_optab, temp, signmask,
4035                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4036               temp = expand_binop (mode, sub_optab, temp, signmask,
4037                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4038             }
4039           else
4040             {
4041               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4042                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4043               signmask = force_reg (mode, signmask);
4044
4045               temp = expand_binop (mode, add_optab, op0, signmask,
4046                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4047               temp = expand_binop (mode, and_optab, temp,
4048                                    gen_int_mode (masklow, mode),
4049                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4050               temp = expand_binop (mode, sub_optab, temp, signmask,
4051                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4052             }
4053           return temp;
4054         }
4055     }
4056
4057   /* Mask contains the mode's signbit and the significant bits of the
4058      modulus.  By including the signbit in the operation, many targets
4059      can avoid an explicit compare operation in the following comparison
4060      against zero.  */
4061   wide_int mask = wi::mask (logd, false, prec);
4062   mask = wi::set_bit (mask, prec - 1);
4063
4064   temp = expand_binop (mode, and_optab, op0,
4065                        immed_wide_int_const (mask, mode),
4066                        result, 1, OPTAB_LIB_WIDEN);
4067   if (temp != result)
4068     emit_move_insn (result, temp);
4069
4070   label = gen_label_rtx ();
4071   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4072
4073   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4074                        0, OPTAB_LIB_WIDEN);
4075
4076   mask = wi::mask (logd, true, prec);
4077   temp = expand_binop (mode, ior_optab, temp,
4078                        immed_wide_int_const (mask, mode),
4079                        result, 1, OPTAB_LIB_WIDEN);
4080   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4081                        0, OPTAB_LIB_WIDEN);
4082   if (temp != result)
4083     emit_move_insn (result, temp);
4084   emit_label (label);
4085   return result;
4086 }
4087
4088 /* Expand signed division of OP0 by a power of two D in mode MODE.
4089    This routine is only called for positive values of D.  */
4090
4091 static rtx
4092 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4093 {
4094   rtx temp;
4095   rtx_code_label *label;
4096   int logd;
4097
4098   logd = floor_log2 (d);
4099
4100   if (d == 2
4101       && BRANCH_COST (optimize_insn_for_speed_p (),
4102                       false) >= 1)
4103     {
4104       temp = gen_reg_rtx (mode);
4105       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4106       if (temp != NULL_RTX)
4107         {
4108           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4109                                0, OPTAB_LIB_WIDEN);
4110           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4111         }
4112     }
4113
4114   if (HAVE_conditional_move
4115       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4116     {
4117       rtx temp2;
4118
4119       start_sequence ();
4120       temp2 = copy_to_mode_reg (mode, op0);
4121       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4122                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4123       temp = force_reg (mode, temp);
4124
4125       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4126       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4127                                      mode, temp, temp2, mode, 0);
4128       if (temp2)
4129         {
4130           rtx_insn *seq = get_insns ();
4131           end_sequence ();
4132           emit_insn (seq);
4133           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4134         }
4135       end_sequence ();
4136     }
4137
4138   if (BRANCH_COST (optimize_insn_for_speed_p (),
4139                    false) >= 2)
4140     {
4141       int ushift = GET_MODE_BITSIZE (mode) - logd;
4142
4143       temp = gen_reg_rtx (mode);
4144       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4145       if (temp != NULL_RTX)
4146         {
4147           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4148               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4149               > COSTS_N_INSNS (1))
4150             temp = expand_binop (mode, and_optab, temp,
4151                                  gen_int_mode (d - 1, mode),
4152                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4153           else
4154             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4155                                  ushift, NULL_RTX, 1);
4156           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4157                                0, OPTAB_LIB_WIDEN);
4158           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4159         }
4160     }
4161
4162   label = gen_label_rtx ();
4163   temp = copy_to_mode_reg (mode, op0);
4164   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4165   expand_inc (temp, gen_int_mode (d - 1, mode));
4166   emit_label (label);
4167   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4168 }
4169 \f
4170 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4171    if that is convenient, and returning where the result is.
4172    You may request either the quotient or the remainder as the result;
4173    specify REM_FLAG nonzero to get the remainder.
4174
4175    CODE is the expression code for which kind of division this is;
4176    it controls how rounding is done.  MODE is the machine mode to use.
4177    UNSIGNEDP nonzero means do unsigned division.  */
4178
4179 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4180    and then correct it by or'ing in missing high bits
4181    if result of ANDI is nonzero.
4182    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4183    This could optimize to a bfexts instruction.
4184    But C doesn't use these operations, so their optimizations are
4185    left for later.  */
4186 /* ??? For modulo, we don't actually need the highpart of the first product,
4187    the low part will do nicely.  And for small divisors, the second multiply
4188    can also be a low-part only multiply or even be completely left out.
4189    E.g. to calculate the remainder of a division by 3 with a 32 bit
4190    multiply, multiply with 0x55555556 and extract the upper two bits;
4191    the result is exact for inputs up to 0x1fffffff.
4192    The input range can be reduced by using cross-sum rules.
4193    For odd divisors >= 3, the following table gives right shift counts
4194    so that if a number is shifted by an integer multiple of the given
4195    amount, the remainder stays the same:
4196    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4197    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4198    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4199    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4200    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4201
4202    Cross-sum rules for even numbers can be derived by leaving as many bits
4203    to the right alone as the divisor has zeros to the right.
4204    E.g. if x is an unsigned 32 bit number:
4205    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4206    */
4207
4208 rtx
4209 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4210                rtx op0, rtx op1, rtx target, int unsignedp,
4211                enum optab_methods methods)
4212 {
4213   machine_mode compute_mode;
4214   rtx tquotient;
4215   rtx quotient = 0, remainder = 0;
4216   rtx_insn *last;
4217   rtx_insn *insn;
4218   optab optab1, optab2;
4219   int op1_is_constant, op1_is_pow2 = 0;
4220   int max_cost, extra_cost;
4221   static HOST_WIDE_INT last_div_const = 0;
4222   bool speed = optimize_insn_for_speed_p ();
4223
4224   op1_is_constant = CONST_INT_P (op1);
4225   if (op1_is_constant)
4226     {
4227       wide_int ext_op1 = rtx_mode_t (op1, mode);
4228       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4229                      || (! unsignedp
4230                          && wi::popcount (wi::neg (ext_op1)) == 1));
4231     }
4232
4233   /*
4234      This is the structure of expand_divmod:
4235
4236      First comes code to fix up the operands so we can perform the operations
4237      correctly and efficiently.
4238
4239      Second comes a switch statement with code specific for each rounding mode.
4240      For some special operands this code emits all RTL for the desired
4241      operation, for other cases, it generates only a quotient and stores it in
4242      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4243      to indicate that it has not done anything.
4244
4245      Last comes code that finishes the operation.  If QUOTIENT is set and
4246      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4247      QUOTIENT is not set, it is computed using trunc rounding.
4248
4249      We try to generate special code for division and remainder when OP1 is a
4250      constant.  If |OP1| = 2**n we can use shifts and some other fast
4251      operations.  For other values of OP1, we compute a carefully selected
4252      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4253      by m.
4254
4255      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4256      half of the product.  Different strategies for generating the product are
4257      implemented in expmed_mult_highpart.
4258
4259      If what we actually want is the remainder, we generate that by another
4260      by-constant multiplication and a subtraction.  */
4261
4262   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4263      code below will malfunction if we are, so check here and handle
4264      the special case if so.  */
4265   if (op1 == const1_rtx)
4266     return rem_flag ? const0_rtx : op0;
4267
4268     /* When dividing by -1, we could get an overflow.
4269      negv_optab can handle overflows.  */
4270   if (! unsignedp && op1 == constm1_rtx)
4271     {
4272       if (rem_flag)
4273         return const0_rtx;
4274       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4275                           ? negv_optab : neg_optab, op0, target, 0);
4276     }
4277
4278   if (target
4279       /* Don't use the function value register as a target
4280          since we have to read it as well as write it,
4281          and function-inlining gets confused by this.  */
4282       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4283           /* Don't clobber an operand while doing a multi-step calculation.  */
4284           || ((rem_flag || op1_is_constant)
4285               && (reg_mentioned_p (target, op0)
4286                   || (MEM_P (op0) && MEM_P (target))))
4287           || reg_mentioned_p (target, op1)
4288           || (MEM_P (op1) && MEM_P (target))))
4289     target = 0;
4290
4291   /* Get the mode in which to perform this computation.  Normally it will
4292      be MODE, but sometimes we can't do the desired operation in MODE.
4293      If so, pick a wider mode in which we can do the operation.  Convert
4294      to that mode at the start to avoid repeated conversions.
4295
4296      First see what operations we need.  These depend on the expression
4297      we are evaluating.  (We assume that divxx3 insns exist under the
4298      same conditions that modxx3 insns and that these insns don't normally
4299      fail.  If these assumptions are not correct, we may generate less
4300      efficient code in some cases.)
4301
4302      Then see if we find a mode in which we can open-code that operation
4303      (either a division, modulus, or shift).  Finally, check for the smallest
4304      mode for which we can do the operation with a library call.  */
4305
4306   /* We might want to refine this now that we have division-by-constant
4307      optimization.  Since expmed_mult_highpart tries so many variants, it is
4308      not straightforward to generalize this.  Maybe we should make an array
4309      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4310
4311   optab1 = (op1_is_pow2
4312             ? (unsignedp ? lshr_optab : ashr_optab)
4313             : (unsignedp ? udiv_optab : sdiv_optab));
4314   optab2 = (op1_is_pow2 ? optab1
4315             : (unsignedp ? udivmod_optab : sdivmod_optab));
4316
4317   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4318     {
4319       FOR_EACH_MODE_FROM (compute_mode, mode)
4320       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4321           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4322         break;
4323
4324       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4325         FOR_EACH_MODE_FROM (compute_mode, mode)
4326           if (optab_libfunc (optab1, compute_mode)
4327               || optab_libfunc (optab2, compute_mode))
4328             break;
4329     }
4330   else
4331     compute_mode = mode;
4332
4333   /* If we still couldn't find a mode, use MODE, but expand_binop will
4334      probably die.  */
4335   if (compute_mode == VOIDmode)
4336     compute_mode = mode;
4337
4338   if (target && GET_MODE (target) == compute_mode)
4339     tquotient = target;
4340   else
4341     tquotient = gen_reg_rtx (compute_mode);
4342
4343 #if 0
4344   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4345      (mode), and thereby get better code when OP1 is a constant.  Do that
4346      later.  It will require going over all usages of SIZE below.  */
4347   size = GET_MODE_BITSIZE (mode);
4348 #endif
4349
4350   /* Only deduct something for a REM if the last divide done was
4351      for a different constant.   Then set the constant of the last
4352      divide.  */
4353   max_cost = (unsignedp
4354               ? udiv_cost (speed, compute_mode)
4355               : sdiv_cost (speed, compute_mode));
4356   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4357                      && INTVAL (op1) == last_div_const))
4358     max_cost -= (mul_cost (speed, compute_mode)
4359                  + add_cost (speed, compute_mode));
4360
4361   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4362
4363   /* Now convert to the best mode to use.  */
4364   if (compute_mode != mode)
4365     {
4366       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4367       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4368
4369       /* convert_modes may have placed op1 into a register, so we
4370          must recompute the following.  */
4371       op1_is_constant = CONST_INT_P (op1);
4372       if (op1_is_constant)
4373         {
4374           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4375           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4376                          || (! unsignedp
4377                              && wi::popcount (wi::neg (ext_op1)) == 1));
4378         }
4379       else
4380         op1_is_pow2 = 0;
4381     }
4382
4383   /* If one of the operands is a volatile MEM, copy it into a register.  */
4384
4385   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4386     op0 = force_reg (compute_mode, op0);
4387   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4388     op1 = force_reg (compute_mode, op1);
4389
4390   /* If we need the remainder or if OP1 is constant, we need to
4391      put OP0 in a register in case it has any queued subexpressions.  */
4392   if (rem_flag || op1_is_constant)
4393     op0 = force_reg (compute_mode, op0);
4394
4395   last = get_last_insn ();
4396
4397   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4398   if (unsignedp)
4399     {
4400       if (code == FLOOR_DIV_EXPR)
4401         code = TRUNC_DIV_EXPR;
4402       if (code == FLOOR_MOD_EXPR)
4403         code = TRUNC_MOD_EXPR;
4404       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4405         code = TRUNC_DIV_EXPR;
4406     }
4407
4408   if (op1 != const0_rtx)
4409     switch (code)
4410       {
4411       case TRUNC_MOD_EXPR:
4412       case TRUNC_DIV_EXPR:
4413         if (op1_is_constant)
4414           {
4415             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4416             int size = GET_MODE_BITSIZE (int_mode);
4417             if (unsignedp)
4418               {
4419                 unsigned HOST_WIDE_INT mh, ml;
4420                 int pre_shift, post_shift;
4421                 int dummy;
4422                 wide_int wd = rtx_mode_t (op1, int_mode);
4423                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4424
4425                 if (wi::popcount (wd) == 1)
4426                   {
4427                     pre_shift = floor_log2 (d);
4428                     if (rem_flag)
4429                       {
4430                         unsigned HOST_WIDE_INT mask
4431                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4432                         remainder
4433                           = expand_binop (int_mode, and_optab, op0,
4434                                           gen_int_mode (mask, int_mode),
4435                                           remainder, 1, methods);
4436                         if (remainder)
4437                           return gen_lowpart (mode, remainder);
4438                       }
4439                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4440                                              pre_shift, tquotient, 1);
4441                   }
4442                 else if (size <= HOST_BITS_PER_WIDE_INT)
4443                   {
4444                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4445                       {
4446                         /* Most significant bit of divisor is set; emit an scc
4447                            insn.  */
4448                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4449                                                           int_mode, 1, 1);
4450                       }
4451                     else
4452                       {
4453                         /* Find a suitable multiplier and right shift count
4454                            instead of multiplying with D.  */
4455
4456                         mh = choose_multiplier (d, size, size,
4457                                                 &ml, &post_shift, &dummy);
4458
4459                         /* If the suggested multiplier is more than SIZE bits,
4460                            we can do better for even divisors, using an
4461                            initial right shift.  */
4462                         if (mh != 0 && (d & 1) == 0)
4463                           {
4464                             pre_shift = ctz_or_zero (d);
4465                             mh = choose_multiplier (d >> pre_shift, size,
4466                                                     size - pre_shift,
4467                                                     &ml, &post_shift, &dummy);
4468                             gcc_assert (!mh);
4469                           }
4470                         else
4471                           pre_shift = 0;
4472
4473                         if (mh != 0)
4474                           {
4475                             rtx t1, t2, t3, t4;
4476
4477                             if (post_shift - 1 >= BITS_PER_WORD)
4478                               goto fail1;
4479
4480                             extra_cost
4481                               = (shift_cost (speed, int_mode, post_shift - 1)
4482                                  + shift_cost (speed, int_mode, 1)
4483                                  + 2 * add_cost (speed, int_mode));
4484                             t1 = expmed_mult_highpart
4485                               (int_mode, op0, gen_int_mode (ml, int_mode),
4486                                NULL_RTX, 1, max_cost - extra_cost);
4487                             if (t1 == 0)
4488                               goto fail1;
4489                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4490                                                                op0, t1),
4491                                                 NULL_RTX);
4492                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4493                                                t2, 1, NULL_RTX, 1);
4494                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4495                                                               t1, t3),
4496                                                 NULL_RTX);
4497                             quotient = expand_shift
4498                               (RSHIFT_EXPR, int_mode, t4,
4499                                post_shift - 1, tquotient, 1);
4500                           }
4501                         else
4502                           {
4503                             rtx t1, t2;
4504
4505                             if (pre_shift >= BITS_PER_WORD
4506                                 || post_shift >= BITS_PER_WORD)
4507                               goto fail1;
4508
4509                             t1 = expand_shift
4510                               (RSHIFT_EXPR, int_mode, op0,
4511                                pre_shift, NULL_RTX, 1);
4512                             extra_cost
4513                               = (shift_cost (speed, int_mode, pre_shift)
4514                                  + shift_cost (speed, int_mode, post_shift));
4515                             t2 = expmed_mult_highpart
4516                               (int_mode, t1,
4517                                gen_int_mode (ml, int_mode),
4518                                NULL_RTX, 1, max_cost - extra_cost);
4519                             if (t2 == 0)
4520                               goto fail1;
4521                             quotient = expand_shift
4522                               (RSHIFT_EXPR, int_mode, t2,
4523                                post_shift, tquotient, 1);
4524                           }
4525                       }
4526                   }
4527                 else            /* Too wide mode to use tricky code */
4528                   break;
4529
4530                 insn = get_last_insn ();
4531                 if (insn != last)
4532                   set_dst_reg_note (insn, REG_EQUAL,
4533                                     gen_rtx_UDIV (int_mode, op0, op1),
4534                                     quotient);
4535               }
4536             else                /* TRUNC_DIV, signed */
4537               {
4538                 unsigned HOST_WIDE_INT ml;
4539                 int lgup, post_shift;
4540                 rtx mlr;
4541                 HOST_WIDE_INT d = INTVAL (op1);
4542                 unsigned HOST_WIDE_INT abs_d;
4543
4544                 /* Not prepared to handle division/remainder by
4545                    0xffffffffffffffff8000000000000000 etc.  */
4546                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4547                   break;
4548
4549                 /* Since d might be INT_MIN, we have to cast to
4550                    unsigned HOST_WIDE_INT before negating to avoid
4551                    undefined signed overflow.  */
4552                 abs_d = (d >= 0
4553                          ? (unsigned HOST_WIDE_INT) d
4554                          : - (unsigned HOST_WIDE_INT) d);
4555
4556                 /* n rem d = n rem -d */
4557                 if (rem_flag && d < 0)
4558                   {
4559                     d = abs_d;
4560                     op1 = gen_int_mode (abs_d, int_mode);
4561                   }
4562
4563                 if (d == 1)
4564                   quotient = op0;
4565                 else if (d == -1)
4566                   quotient = expand_unop (int_mode, neg_optab, op0,
4567                                           tquotient, 0);
4568                 else if (size <= HOST_BITS_PER_WIDE_INT
4569                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4570                   {
4571                     /* This case is not handled correctly below.  */
4572                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4573                                                 int_mode, 1, 1);
4574                     if (quotient == 0)
4575                       goto fail1;
4576                   }
4577                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4578                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4579                          && (rem_flag
4580                              ? smod_pow2_cheap (speed, int_mode)
4581                              : sdiv_pow2_cheap (speed, int_mode))
4582                          /* We assume that cheap metric is true if the
4583                             optab has an expander for this mode.  */
4584                          && ((optab_handler ((rem_flag ? smod_optab
4585                                               : sdiv_optab),
4586                                              int_mode)
4587                               != CODE_FOR_nothing)
4588                              || (optab_handler (sdivmod_optab, int_mode)
4589                                  != CODE_FOR_nothing)))
4590                   ;
4591                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4592                   {
4593                     if (rem_flag)
4594                       {
4595                         remainder = expand_smod_pow2 (int_mode, op0, d);
4596                         if (remainder)
4597                           return gen_lowpart (mode, remainder);
4598                       }
4599
4600                     if (sdiv_pow2_cheap (speed, int_mode)
4601                         && ((optab_handler (sdiv_optab, int_mode)
4602                              != CODE_FOR_nothing)
4603                             || (optab_handler (sdivmod_optab, int_mode)
4604                                 != CODE_FOR_nothing)))
4605                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4606                                                 int_mode, op0,
4607                                                 gen_int_mode (abs_d,
4608                                                               int_mode),
4609                                                 NULL_RTX, 0);
4610                     else
4611                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4612
4613                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4614                        negate the quotient.  */
4615                     if (d < 0)
4616                       {
4617                         insn = get_last_insn ();
4618                         if (insn != last
4619                             && abs_d < (HOST_WIDE_INT_1U
4620                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4621                           set_dst_reg_note (insn, REG_EQUAL,
4622                                             gen_rtx_DIV (int_mode, op0,
4623                                                          gen_int_mode
4624                                                            (abs_d,
4625                                                             int_mode)),
4626                                             quotient);
4627
4628                         quotient = expand_unop (int_mode, neg_optab,
4629                                                 quotient, quotient, 0);
4630                       }
4631                   }
4632                 else if (size <= HOST_BITS_PER_WIDE_INT)
4633                   {
4634                     choose_multiplier (abs_d, size, size - 1,
4635                                        &ml, &post_shift, &lgup);
4636                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4637                       {
4638                         rtx t1, t2, t3;
4639
4640                         if (post_shift >= BITS_PER_WORD
4641                             || size - 1 >= BITS_PER_WORD)
4642                           goto fail1;
4643
4644                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4645                                       + shift_cost (speed, int_mode, size - 1)
4646                                       + add_cost (speed, int_mode));
4647                         t1 = expmed_mult_highpart
4648                           (int_mode, op0, gen_int_mode (ml, int_mode),
4649                            NULL_RTX, 0, max_cost - extra_cost);
4650                         if (t1 == 0)
4651                           goto fail1;
4652                         t2 = expand_shift
4653                           (RSHIFT_EXPR, int_mode, t1,
4654                            post_shift, NULL_RTX, 0);
4655                         t3 = expand_shift
4656                           (RSHIFT_EXPR, int_mode, op0,
4657                            size - 1, NULL_RTX, 0);
4658                         if (d < 0)
4659                           quotient
4660                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4661                                              tquotient);
4662                         else
4663                           quotient
4664                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4665                                              tquotient);
4666                       }
4667                     else
4668                       {
4669                         rtx t1, t2, t3, t4;
4670
4671                         if (post_shift >= BITS_PER_WORD
4672                             || size - 1 >= BITS_PER_WORD)
4673                           goto fail1;
4674
4675                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4676                         mlr = gen_int_mode (ml, int_mode);
4677                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4678                                       + shift_cost (speed, int_mode, size - 1)
4679                                       + 2 * add_cost (speed, int_mode));
4680                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4681                                                    NULL_RTX, 0,
4682                                                    max_cost - extra_cost);
4683                         if (t1 == 0)
4684                           goto fail1;
4685                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4686                                             NULL_RTX);
4687                         t3 = expand_shift
4688                           (RSHIFT_EXPR, int_mode, t2,
4689                            post_shift, NULL_RTX, 0);
4690                         t4 = expand_shift
4691                           (RSHIFT_EXPR, int_mode, op0,
4692                            size - 1, NULL_RTX, 0);
4693                         if (d < 0)
4694                           quotient
4695                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4696                                              tquotient);
4697                         else
4698                           quotient
4699                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4700                                              tquotient);
4701                       }
4702                   }
4703                 else            /* Too wide mode to use tricky code */
4704                   break;
4705
4706                 insn = get_last_insn ();
4707                 if (insn != last)
4708                   set_dst_reg_note (insn, REG_EQUAL,
4709                                     gen_rtx_DIV (int_mode, op0, op1),
4710                                     quotient);
4711               }
4712             break;
4713           }
4714       fail1:
4715         delete_insns_since (last);
4716         break;
4717
4718       case FLOOR_DIV_EXPR:
4719       case FLOOR_MOD_EXPR:
4720       /* We will come here only for signed operations.  */
4721         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4722           {
4723             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4724             int size = GET_MODE_BITSIZE (int_mode);
4725             unsigned HOST_WIDE_INT mh, ml;
4726             int pre_shift, lgup, post_shift;
4727             HOST_WIDE_INT d = INTVAL (op1);
4728
4729             if (d > 0)
4730               {
4731                 /* We could just as easily deal with negative constants here,
4732                    but it does not seem worth the trouble for GCC 2.6.  */
4733                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4734                   {
4735                     pre_shift = floor_log2 (d);
4736                     if (rem_flag)
4737                       {
4738                         unsigned HOST_WIDE_INT mask
4739                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4740                         remainder = expand_binop
4741                           (int_mode, and_optab, op0,
4742                            gen_int_mode (mask, int_mode),
4743                            remainder, 0, methods);
4744                         if (remainder)
4745                           return gen_lowpart (mode, remainder);
4746                       }
4747                     quotient = expand_shift
4748                       (RSHIFT_EXPR, int_mode, op0,
4749                        pre_shift, tquotient, 0);
4750                   }
4751                 else
4752                   {
4753                     rtx t1, t2, t3, t4;
4754
4755                     mh = choose_multiplier (d, size, size - 1,
4756                                             &ml, &post_shift, &lgup);
4757                     gcc_assert (!mh);
4758
4759                     if (post_shift < BITS_PER_WORD
4760                         && size - 1 < BITS_PER_WORD)
4761                       {
4762                         t1 = expand_shift
4763                           (RSHIFT_EXPR, int_mode, op0,
4764                            size - 1, NULL_RTX, 0);
4765                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4766                                            NULL_RTX, 0, OPTAB_WIDEN);
4767                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4768                                       + shift_cost (speed, int_mode, size - 1)
4769                                       + 2 * add_cost (speed, int_mode));
4770                         t3 = expmed_mult_highpart
4771                           (int_mode, t2, gen_int_mode (ml, int_mode),
4772                            NULL_RTX, 1, max_cost - extra_cost);
4773                         if (t3 != 0)
4774                           {
4775                             t4 = expand_shift
4776                               (RSHIFT_EXPR, int_mode, t3,
4777                                post_shift, NULL_RTX, 1);
4778                             quotient = expand_binop (int_mode, xor_optab,
4779                                                      t4, t1, tquotient, 0,
4780                                                      OPTAB_WIDEN);
4781                           }
4782                       }
4783                   }
4784               }
4785             else
4786               {
4787                 rtx nsign, t1, t2, t3, t4;
4788                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4789                                                   op0, constm1_rtx), NULL_RTX);
4790                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4791                                    0, OPTAB_WIDEN);
4792                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4793                                       size - 1, NULL_RTX, 0);
4794                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4795                                     NULL_RTX);
4796                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4797                                     NULL_RTX, 0);
4798                 if (t4)
4799                   {
4800                     rtx t5;
4801                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4802                                       NULL_RTX, 0);
4803                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4804                                               tquotient);
4805                   }
4806               }
4807           }
4808
4809         if (quotient != 0)
4810           break;
4811         delete_insns_since (last);
4812
4813         /* Try using an instruction that produces both the quotient and
4814            remainder, using truncation.  We can easily compensate the quotient
4815            or remainder to get floor rounding, once we have the remainder.
4816            Notice that we compute also the final remainder value here,
4817            and return the result right away.  */
4818         if (target == 0 || GET_MODE (target) != compute_mode)
4819           target = gen_reg_rtx (compute_mode);
4820
4821         if (rem_flag)
4822           {
4823             remainder
4824               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4825             quotient = gen_reg_rtx (compute_mode);
4826           }
4827         else
4828           {
4829             quotient
4830               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4831             remainder = gen_reg_rtx (compute_mode);
4832           }
4833
4834         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4835                                  quotient, remainder, 0))
4836           {
4837             /* This could be computed with a branch-less sequence.
4838                Save that for later.  */
4839             rtx tem;
4840             rtx_code_label *label = gen_label_rtx ();
4841             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4842             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4843                                 NULL_RTX, 0, OPTAB_WIDEN);
4844             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4845             expand_dec (quotient, const1_rtx);
4846             expand_inc (remainder, op1);
4847             emit_label (label);
4848             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4849           }
4850
4851         /* No luck with division elimination or divmod.  Have to do it
4852            by conditionally adjusting op0 *and* the result.  */
4853         {
4854           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4855           rtx adjusted_op0;
4856           rtx tem;
4857
4858           quotient = gen_reg_rtx (compute_mode);
4859           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4860           label1 = gen_label_rtx ();
4861           label2 = gen_label_rtx ();
4862           label3 = gen_label_rtx ();
4863           label4 = gen_label_rtx ();
4864           label5 = gen_label_rtx ();
4865           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4866           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4867           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4868                               quotient, 0, methods);
4869           if (tem != quotient)
4870             emit_move_insn (quotient, tem);
4871           emit_jump_insn (targetm.gen_jump (label5));
4872           emit_barrier ();
4873           emit_label (label1);
4874           expand_inc (adjusted_op0, const1_rtx);
4875           emit_jump_insn (targetm.gen_jump (label4));
4876           emit_barrier ();
4877           emit_label (label2);
4878           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4879           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4880                               quotient, 0, methods);
4881           if (tem != quotient)
4882             emit_move_insn (quotient, tem);
4883           emit_jump_insn (targetm.gen_jump (label5));
4884           emit_barrier ();
4885           emit_label (label3);
4886           expand_dec (adjusted_op0, const1_rtx);
4887           emit_label (label4);
4888           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4889                               quotient, 0, methods);
4890           if (tem != quotient)
4891             emit_move_insn (quotient, tem);
4892           expand_dec (quotient, const1_rtx);
4893           emit_label (label5);
4894         }
4895         break;
4896
4897       case CEIL_DIV_EXPR:
4898       case CEIL_MOD_EXPR:
4899         if (unsignedp)
4900           {
4901             if (op1_is_constant
4902                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4903                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4904                     || INTVAL (op1) >= 0))
4905               {
4906                 scalar_int_mode int_mode
4907                   = as_a <scalar_int_mode> (compute_mode);
4908                 rtx t1, t2, t3;
4909                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4910                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4911                                    floor_log2 (d), tquotient, 1);
4912                 t2 = expand_binop (int_mode, and_optab, op0,
4913                                    gen_int_mode (d - 1, int_mode),
4914                                    NULL_RTX, 1, methods);
4915                 t3 = gen_reg_rtx (int_mode);
4916                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4917                 if (t3 == 0)
4918                   {
4919                     rtx_code_label *lab;
4920                     lab = gen_label_rtx ();
4921                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4922                     expand_inc (t1, const1_rtx);
4923                     emit_label (lab);
4924                     quotient = t1;
4925                   }
4926                 else
4927                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4928                                             tquotient);
4929                 break;
4930               }
4931
4932             /* Try using an instruction that produces both the quotient and
4933                remainder, using truncation.  We can easily compensate the
4934                quotient or remainder to get ceiling rounding, once we have the
4935                remainder.  Notice that we compute also the final remainder
4936                value here, and return the result right away.  */
4937             if (target == 0 || GET_MODE (target) != compute_mode)
4938               target = gen_reg_rtx (compute_mode);
4939
4940             if (rem_flag)
4941               {
4942                 remainder = (REG_P (target)
4943                              ? target : gen_reg_rtx (compute_mode));
4944                 quotient = gen_reg_rtx (compute_mode);
4945               }
4946             else
4947               {
4948                 quotient = (REG_P (target)
4949                             ? target : gen_reg_rtx (compute_mode));
4950                 remainder = gen_reg_rtx (compute_mode);
4951               }
4952
4953             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4954                                      remainder, 1))
4955               {
4956                 /* This could be computed with a branch-less sequence.
4957                    Save that for later.  */
4958                 rtx_code_label *label = gen_label_rtx ();
4959                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4960                                  compute_mode, label);
4961                 expand_inc (quotient, const1_rtx);
4962                 expand_dec (remainder, op1);
4963                 emit_label (label);
4964                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4965               }
4966
4967             /* No luck with division elimination or divmod.  Have to do it
4968                by conditionally adjusting op0 *and* the result.  */
4969             {
4970               rtx_code_label *label1, *label2;
4971               rtx adjusted_op0, tem;
4972
4973               quotient = gen_reg_rtx (compute_mode);
4974               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4975               label1 = gen_label_rtx ();
4976               label2 = gen_label_rtx ();
4977               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4978                                compute_mode, label1);
4979               emit_move_insn  (quotient, const0_rtx);
4980               emit_jump_insn (targetm.gen_jump (label2));
4981               emit_barrier ();
4982               emit_label (label1);
4983               expand_dec (adjusted_op0, const1_rtx);
4984               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4985                                   quotient, 1, methods);
4986               if (tem != quotient)
4987                 emit_move_insn (quotient, tem);
4988               expand_inc (quotient, const1_rtx);
4989               emit_label (label2);
4990             }
4991           }
4992         else /* signed */
4993           {
4994             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4995                 && INTVAL (op1) >= 0)
4996               {
4997                 /* This is extremely similar to the code for the unsigned case
4998                    above.  For 2.7 we should merge these variants, but for
4999                    2.6.1 I don't want to touch the code for unsigned since that
5000                    get used in C.  The signed case will only be used by other
5001                    languages (Ada).  */
5002
5003                 rtx t1, t2, t3;
5004                 unsigned HOST_WIDE_INT d = INTVAL (op1);
5005                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5006                                    floor_log2 (d), tquotient, 0);
5007                 t2 = expand_binop (compute_mode, and_optab, op0,
5008                                    gen_int_mode (d - 1, compute_mode),
5009                                    NULL_RTX, 1, methods);
5010                 t3 = gen_reg_rtx (compute_mode);
5011                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5012                                       compute_mode, 1, 1);
5013                 if (t3 == 0)
5014                   {
5015                     rtx_code_label *lab;
5016                     lab = gen_label_rtx ();
5017                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5018                     expand_inc (t1, const1_rtx);
5019                     emit_label (lab);
5020                     quotient = t1;
5021                   }
5022                 else
5023                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5024                                                           t1, t3),
5025                                             tquotient);
5026                 break;
5027               }
5028
5029             /* Try using an instruction that produces both the quotient and
5030                remainder, using truncation.  We can easily compensate the
5031                quotient or remainder to get ceiling rounding, once we have the
5032                remainder.  Notice that we compute also the final remainder
5033                value here, and return the result right away.  */
5034             if (target == 0 || GET_MODE (target) != compute_mode)
5035               target = gen_reg_rtx (compute_mode);
5036             if (rem_flag)
5037               {
5038                 remainder= (REG_P (target)
5039                             ? target : gen_reg_rtx (compute_mode));
5040                 quotient = gen_reg_rtx (compute_mode);
5041               }
5042             else
5043               {
5044                 quotient = (REG_P (target)
5045                             ? target : gen_reg_rtx (compute_mode));
5046                 remainder = gen_reg_rtx (compute_mode);
5047               }
5048
5049             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5050                                      remainder, 0))
5051               {
5052                 /* This could be computed with a branch-less sequence.
5053                    Save that for later.  */
5054                 rtx tem;
5055                 rtx_code_label *label = gen_label_rtx ();
5056                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5057                                  compute_mode, label);
5058                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5059                                     NULL_RTX, 0, OPTAB_WIDEN);
5060                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5061                 expand_inc (quotient, const1_rtx);
5062                 expand_dec (remainder, op1);
5063                 emit_label (label);
5064                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5065               }
5066
5067             /* No luck with division elimination or divmod.  Have to do it
5068                by conditionally adjusting op0 *and* the result.  */
5069             {
5070               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5071               rtx adjusted_op0;
5072               rtx tem;
5073
5074               quotient = gen_reg_rtx (compute_mode);
5075               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5076               label1 = gen_label_rtx ();
5077               label2 = gen_label_rtx ();
5078               label3 = gen_label_rtx ();
5079               label4 = gen_label_rtx ();
5080               label5 = gen_label_rtx ();
5081               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5082               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5083                                compute_mode, label1);
5084               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5085                                   quotient, 0, methods);
5086               if (tem != quotient)
5087                 emit_move_insn (quotient, tem);
5088               emit_jump_insn (targetm.gen_jump (label5));
5089               emit_barrier ();
5090               emit_label (label1);
5091               expand_dec (adjusted_op0, const1_rtx);
5092               emit_jump_insn (targetm.gen_jump (label4));
5093               emit_barrier ();
5094               emit_label (label2);
5095               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5096                                compute_mode, label3);
5097               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5098                                   quotient, 0, methods);
5099               if (tem != quotient)
5100                 emit_move_insn (quotient, tem);
5101               emit_jump_insn (targetm.gen_jump (label5));
5102               emit_barrier ();
5103               emit_label (label3);
5104               expand_inc (adjusted_op0, const1_rtx);
5105               emit_label (label4);
5106               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5107                                   quotient, 0, methods);
5108               if (tem != quotient)
5109                 emit_move_insn (quotient, tem);
5110               expand_inc (quotient, const1_rtx);
5111               emit_label (label5);
5112             }
5113           }
5114         break;
5115
5116       case EXACT_DIV_EXPR:
5117         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5118           {
5119             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5120             int size = GET_MODE_BITSIZE (int_mode);
5121             HOST_WIDE_INT d = INTVAL (op1);
5122             unsigned HOST_WIDE_INT ml;
5123             int pre_shift;
5124             rtx t1;
5125
5126             pre_shift = ctz_or_zero (d);
5127             ml = invert_mod2n (d >> pre_shift, size);
5128             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5129                                pre_shift, NULL_RTX, unsignedp);
5130             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5131                                     NULL_RTX, 1);
5132
5133             insn = get_last_insn ();
5134             set_dst_reg_note (insn, REG_EQUAL,
5135                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5136                                               int_mode, op0, op1),
5137                               quotient);
5138           }
5139         break;
5140
5141       case ROUND_DIV_EXPR:
5142       case ROUND_MOD_EXPR:
5143         if (unsignedp)
5144           {
5145             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5146             rtx tem;
5147             rtx_code_label *label;
5148             label = gen_label_rtx ();
5149             quotient = gen_reg_rtx (int_mode);
5150             remainder = gen_reg_rtx (int_mode);
5151             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5152               {
5153                 rtx tem;
5154                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5155                                          quotient, 1, methods);
5156                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5157                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5158                                           remainder, 1, methods);
5159               }
5160             tem = plus_constant (int_mode, op1, -1);
5161             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5162             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5163             expand_inc (quotient, const1_rtx);
5164             expand_dec (remainder, op1);
5165             emit_label (label);
5166           }
5167         else
5168           {
5169             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5170             int size = GET_MODE_BITSIZE (int_mode);
5171             rtx abs_rem, abs_op1, tem, mask;
5172             rtx_code_label *label;
5173             label = gen_label_rtx ();
5174             quotient = gen_reg_rtx (int_mode);
5175             remainder = gen_reg_rtx (int_mode);
5176             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5177               {
5178                 rtx tem;
5179                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5180                                          quotient, 0, methods);
5181                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5182                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5183                                           remainder, 0, methods);
5184               }
5185             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5186             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5187             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5188                                 1, NULL_RTX, 1);
5189             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5190             tem = expand_binop (int_mode, xor_optab, op0, op1,
5191                                 NULL_RTX, 0, OPTAB_WIDEN);
5192             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5193                                  size - 1, NULL_RTX, 0);
5194             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5195                                 NULL_RTX, 0, OPTAB_WIDEN);
5196             tem = expand_binop (int_mode, sub_optab, tem, mask,
5197                                 NULL_RTX, 0, OPTAB_WIDEN);
5198             expand_inc (quotient, tem);
5199             tem = expand_binop (int_mode, xor_optab, mask, op1,
5200                                 NULL_RTX, 0, OPTAB_WIDEN);
5201             tem = expand_binop (int_mode, sub_optab, tem, mask,
5202                                 NULL_RTX, 0, OPTAB_WIDEN);
5203             expand_dec (remainder, tem);
5204             emit_label (label);
5205           }
5206         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5207
5208       default:
5209         gcc_unreachable ();
5210       }
5211
5212   if (quotient == 0)
5213     {
5214       if (target && GET_MODE (target) != compute_mode)
5215         target = 0;
5216
5217       if (rem_flag)
5218         {
5219           /* Try to produce the remainder without producing the quotient.
5220              If we seem to have a divmod pattern that does not require widening,
5221              don't try widening here.  We should really have a WIDEN argument
5222              to expand_twoval_binop, since what we'd really like to do here is
5223              1) try a mod insn in compute_mode
5224              2) try a divmod insn in compute_mode
5225              3) try a div insn in compute_mode and multiply-subtract to get
5226                 remainder
5227              4) try the same things with widening allowed.  */
5228           remainder
5229             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5230                                  op0, op1, target,
5231                                  unsignedp,
5232                                  ((optab_handler (optab2, compute_mode)
5233                                    != CODE_FOR_nothing)
5234                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5235           if (remainder == 0)
5236             {
5237               /* No luck there.  Can we do remainder and divide at once
5238                  without a library call?  */
5239               remainder = gen_reg_rtx (compute_mode);
5240               if (! expand_twoval_binop ((unsignedp
5241                                           ? udivmod_optab
5242                                           : sdivmod_optab),
5243                                          op0, op1,
5244                                          NULL_RTX, remainder, unsignedp))
5245                 remainder = 0;
5246             }
5247
5248           if (remainder)
5249             return gen_lowpart (mode, remainder);
5250         }
5251
5252       /* Produce the quotient.  Try a quotient insn, but not a library call.
5253          If we have a divmod in this mode, use it in preference to widening
5254          the div (for this test we assume it will not fail). Note that optab2
5255          is set to the one of the two optabs that the call below will use.  */
5256       quotient
5257         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5258                              op0, op1, rem_flag ? NULL_RTX : target,
5259                              unsignedp,
5260                              ((optab_handler (optab2, compute_mode)
5261                                != CODE_FOR_nothing)
5262                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5263
5264       if (quotient == 0)
5265         {
5266           /* No luck there.  Try a quotient-and-remainder insn,
5267              keeping the quotient alone.  */
5268           quotient = gen_reg_rtx (compute_mode);
5269           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5270                                      op0, op1,
5271                                      quotient, NULL_RTX, unsignedp))
5272             {
5273               quotient = 0;
5274               if (! rem_flag)
5275                 /* Still no luck.  If we are not computing the remainder,
5276                    use a library call for the quotient.  */
5277                 quotient = sign_expand_binop (compute_mode,
5278                                               udiv_optab, sdiv_optab,
5279                                               op0, op1, target,
5280                                               unsignedp, methods);
5281             }
5282         }
5283     }
5284
5285   if (rem_flag)
5286     {
5287       if (target && GET_MODE (target) != compute_mode)
5288         target = 0;
5289
5290       if (quotient == 0)
5291         {
5292           /* No divide instruction either.  Use library for remainder.  */
5293           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5294                                          op0, op1, target,
5295                                          unsignedp, methods);
5296           /* No remainder function.  Try a quotient-and-remainder
5297              function, keeping the remainder.  */
5298           if (!remainder
5299               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5300             {
5301               remainder = gen_reg_rtx (compute_mode);
5302               if (!expand_twoval_binop_libfunc
5303                   (unsignedp ? udivmod_optab : sdivmod_optab,
5304                    op0, op1,
5305                    NULL_RTX, remainder,
5306                    unsignedp ? UMOD : MOD))
5307                 remainder = NULL_RTX;
5308             }
5309         }
5310       else
5311         {
5312           /* We divided.  Now finish doing X - Y * (X / Y).  */
5313           remainder = expand_mult (compute_mode, quotient, op1,
5314                                    NULL_RTX, unsignedp);
5315           remainder = expand_binop (compute_mode, sub_optab, op0,
5316                                     remainder, target, unsignedp,
5317                                     methods);
5318         }
5319     }
5320
5321   if (methods != OPTAB_LIB_WIDEN
5322       && (rem_flag ? remainder : quotient) == NULL_RTX)
5323     return NULL_RTX;
5324
5325   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5326 }
5327 \f
5328 /* Return a tree node with data type TYPE, describing the value of X.
5329    Usually this is an VAR_DECL, if there is no obvious better choice.
5330    X may be an expression, however we only support those expressions
5331    generated by loop.c.  */
5332
5333 tree
5334 make_tree (tree type, rtx x)
5335 {
5336   tree t;
5337
5338   switch (GET_CODE (x))
5339     {
5340     case CONST_INT:
5341     case CONST_WIDE_INT:
5342       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5343       return t;
5344
5345     case CONST_DOUBLE:
5346       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5347       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5348         t = wide_int_to_tree (type,
5349                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5350                                                     HOST_BITS_PER_WIDE_INT * 2));
5351       else
5352         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5353
5354       return t;
5355
5356     case CONST_VECTOR:
5357       {
5358         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5359         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5360         tree itype = TREE_TYPE (type);
5361
5362         /* Build a tree with vector elements.  */
5363         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5364         unsigned int count = elts.encoded_nelts ();
5365         for (unsigned int i = 0; i < count; ++i)
5366           {
5367             rtx elt = CONST_VECTOR_ELT (x, i);
5368             elts.quick_push (make_tree (itype, elt));
5369           }
5370
5371         return elts.build ();
5372       }
5373
5374     case PLUS:
5375       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5376                           make_tree (type, XEXP (x, 1)));
5377
5378     case MINUS:
5379       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5380                           make_tree (type, XEXP (x, 1)));
5381
5382     case NEG:
5383       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5384
5385     case MULT:
5386       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5387                           make_tree (type, XEXP (x, 1)));
5388
5389     case ASHIFT:
5390       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5391                           make_tree (type, XEXP (x, 1)));
5392
5393     case LSHIFTRT:
5394       t = unsigned_type_for (type);
5395       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5396                                          make_tree (t, XEXP (x, 0)),
5397                                          make_tree (type, XEXP (x, 1))));
5398
5399     case ASHIFTRT:
5400       t = signed_type_for (type);
5401       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5402                                          make_tree (t, XEXP (x, 0)),
5403                                          make_tree (type, XEXP (x, 1))));
5404
5405     case DIV:
5406       if (TREE_CODE (type) != REAL_TYPE)
5407         t = signed_type_for (type);
5408       else
5409         t = type;
5410
5411       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5412                                          make_tree (t, XEXP (x, 0)),
5413                                          make_tree (t, XEXP (x, 1))));
5414     case UDIV:
5415       t = unsigned_type_for (type);
5416       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5417                                          make_tree (t, XEXP (x, 0)),
5418                                          make_tree (t, XEXP (x, 1))));
5419
5420     case SIGN_EXTEND:
5421     case ZERO_EXTEND:
5422       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5423                                           GET_CODE (x) == ZERO_EXTEND);
5424       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5425
5426     case CONST:
5427       return make_tree (type, XEXP (x, 0));
5428
5429     case SYMBOL_REF:
5430       t = SYMBOL_REF_DECL (x);
5431       if (t)
5432         return fold_convert (type, build_fold_addr_expr (t));
5433       /* fall through.  */
5434
5435     default:
5436       if (CONST_POLY_INT_P (x))
5437         return wide_int_to_tree (t, const_poly_int_value (x));
5438
5439       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5440
5441       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5442          address mode to pointer mode.  */
5443       if (POINTER_TYPE_P (type))
5444         x = convert_memory_address_addr_space
5445           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5446
5447       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5448          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5449       t->decl_with_rtl.rtl = x;
5450
5451       return t;
5452     }
5453 }
5454 \f
5455 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5456    and returning TARGET.
5457
5458    If TARGET is 0, a pseudo-register or constant is returned.  */
5459
5460 rtx
5461 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5462 {
5463   rtx tem = 0;
5464
5465   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5466     tem = simplify_binary_operation (AND, mode, op0, op1);
5467   if (tem == 0)
5468     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5469
5470   if (target == 0)
5471     target = tem;
5472   else if (tem != target)
5473     emit_move_insn (target, tem);
5474   return target;
5475 }
5476
5477 /* Helper function for emit_store_flag.  */
5478 rtx
5479 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5480              machine_mode mode, machine_mode compare_mode,
5481              int unsignedp, rtx x, rtx y, int normalizep,
5482              machine_mode target_mode)
5483 {
5484   class expand_operand ops[4];
5485   rtx op0, comparison, subtarget;
5486   rtx_insn *last;
5487   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5488   scalar_int_mode int_target_mode;
5489
5490   last = get_last_insn ();
5491   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5492   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5493   if (!x || !y)
5494     {
5495       delete_insns_since (last);
5496       return NULL_RTX;
5497     }
5498
5499   if (target_mode == VOIDmode)
5500     int_target_mode = result_mode;
5501   else
5502     int_target_mode = as_a <scalar_int_mode> (target_mode);
5503   if (!target)
5504     target = gen_reg_rtx (int_target_mode);
5505
5506   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5507
5508   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5509   create_fixed_operand (&ops[1], comparison);
5510   create_fixed_operand (&ops[2], x);
5511   create_fixed_operand (&ops[3], y);
5512   if (!maybe_expand_insn (icode, 4, ops))
5513     {
5514       delete_insns_since (last);
5515       return NULL_RTX;
5516     }
5517   subtarget = ops[0].value;
5518
5519   /* If we are converting to a wider mode, first convert to
5520      INT_TARGET_MODE, then normalize.  This produces better combining
5521      opportunities on machines that have a SIGN_EXTRACT when we are
5522      testing a single bit.  This mostly benefits the 68k.
5523
5524      If STORE_FLAG_VALUE does not have the sign bit set when
5525      interpreted in MODE, we can do this conversion as unsigned, which
5526      is usually more efficient.  */
5527   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5528     {
5529       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5530                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5531
5532       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5533       convert_move (target, subtarget, unsignedp);
5534
5535       op0 = target;
5536       result_mode = int_target_mode;
5537     }
5538   else
5539     op0 = subtarget;
5540
5541   /* If we want to keep subexpressions around, don't reuse our last
5542      target.  */
5543   if (optimize)
5544     subtarget = 0;
5545
5546   /* Now normalize to the proper value in MODE.  Sometimes we don't
5547      have to do anything.  */
5548   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5549     ;
5550   /* STORE_FLAG_VALUE might be the most negative number, so write
5551      the comparison this way to avoid a compiler-time warning.  */
5552   else if (- normalizep == STORE_FLAG_VALUE)
5553     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5554
5555   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5556      it hard to use a value of just the sign bit due to ANSI integer
5557      constant typing rules.  */
5558   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5559     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5560                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5561                         normalizep == 1);
5562   else
5563     {
5564       gcc_assert (STORE_FLAG_VALUE & 1);
5565
5566       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5567       if (normalizep == -1)
5568         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5569     }
5570
5571   /* If we were converting to a smaller mode, do the conversion now.  */
5572   if (int_target_mode != result_mode)
5573     {
5574       convert_move (target, op0, 0);
5575       return target;
5576     }
5577   else
5578     return op0;
5579 }
5580
5581
5582 /* A subroutine of emit_store_flag only including "tricks" that do not
5583    need a recursive call.  These are kept separate to avoid infinite
5584    loops.  */
5585
5586 static rtx
5587 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5588                    machine_mode mode, int unsignedp, int normalizep,
5589                    machine_mode target_mode)
5590 {
5591   rtx subtarget;
5592   enum insn_code icode;
5593   machine_mode compare_mode;
5594   enum mode_class mclass;
5595   enum rtx_code scode;
5596
5597   if (unsignedp)
5598     code = unsigned_condition (code);
5599   scode = swap_condition (code);
5600
5601   /* If one operand is constant, make it the second one.  Only do this
5602      if the other operand is not constant as well.  */
5603
5604   if (swap_commutative_operands_p (op0, op1))
5605     {
5606       std::swap (op0, op1);
5607       code = swap_condition (code);
5608     }
5609
5610   if (mode == VOIDmode)
5611     mode = GET_MODE (op0);
5612
5613   if (CONST_SCALAR_INT_P (op1))
5614     canonicalize_comparison (mode, &code, &op1);
5615
5616   /* For some comparisons with 1 and -1, we can convert this to
5617      comparisons with zero.  This will often produce more opportunities for
5618      store-flag insns.  */
5619
5620   switch (code)
5621     {
5622     case LT:
5623       if (op1 == const1_rtx)
5624         op1 = const0_rtx, code = LE;
5625       break;
5626     case LE:
5627       if (op1 == constm1_rtx)
5628         op1 = const0_rtx, code = LT;
5629       break;
5630     case GE:
5631       if (op1 == const1_rtx)
5632         op1 = const0_rtx, code = GT;
5633       break;
5634     case GT:
5635       if (op1 == constm1_rtx)
5636         op1 = const0_rtx, code = GE;
5637       break;
5638     case GEU:
5639       if (op1 == const1_rtx)
5640         op1 = const0_rtx, code = NE;
5641       break;
5642     case LTU:
5643       if (op1 == const1_rtx)
5644         op1 = const0_rtx, code = EQ;
5645       break;
5646     default:
5647       break;
5648     }
5649
5650   /* If we are comparing a double-word integer with zero or -1, we can
5651      convert the comparison into one involving a single word.  */
5652   scalar_int_mode int_mode;
5653   if (is_int_mode (mode, &int_mode)
5654       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5655       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5656     {
5657       rtx tem;
5658       if ((code == EQ || code == NE)
5659           && (op1 == const0_rtx || op1 == constm1_rtx))
5660         {
5661           rtx op00, op01;
5662
5663           /* Do a logical OR or AND of the two words and compare the
5664              result.  */
5665           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5666           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5667           tem = expand_binop (word_mode,
5668                               op1 == const0_rtx ? ior_optab : and_optab,
5669                               op00, op01, NULL_RTX, unsignedp,
5670                               OPTAB_DIRECT);
5671
5672           if (tem != 0)
5673             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5674                                    unsignedp, normalizep);
5675         }
5676       else if ((code == LT || code == GE) && op1 == const0_rtx)
5677         {
5678           rtx op0h;
5679
5680           /* If testing the sign bit, can just test on high word.  */
5681           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5682                                       subreg_highpart_offset (word_mode,
5683                                                               int_mode));
5684           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5685                                  unsignedp, normalizep);
5686         }
5687       else
5688         tem = NULL_RTX;
5689
5690       if (tem)
5691         {
5692           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5693             return tem;
5694           if (!target)
5695             target = gen_reg_rtx (target_mode);
5696
5697           convert_move (target, tem,
5698                         !val_signbit_known_set_p (word_mode,
5699                                                   (normalizep ? normalizep
5700                                                    : STORE_FLAG_VALUE)));
5701           return target;
5702         }
5703     }
5704
5705   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5706      complement of A (for GE) and shifting the sign bit to the low bit.  */
5707   if (op1 == const0_rtx && (code == LT || code == GE)
5708       && is_int_mode (mode, &int_mode)
5709       && (normalizep || STORE_FLAG_VALUE == 1
5710           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5711     {
5712       scalar_int_mode int_target_mode;
5713       subtarget = target;
5714
5715       if (!target)
5716         int_target_mode = int_mode;
5717       else
5718         {
5719           /* If the result is to be wider than OP0, it is best to convert it
5720              first.  If it is to be narrower, it is *incorrect* to convert it
5721              first.  */
5722           int_target_mode = as_a <scalar_int_mode> (target_mode);
5723           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5724             {
5725               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5726               int_mode = int_target_mode;
5727             }
5728         }
5729
5730       if (int_target_mode != int_mode)
5731         subtarget = 0;
5732
5733       if (code == GE)
5734         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5735                            ((STORE_FLAG_VALUE == 1 || normalizep)
5736                             ? 0 : subtarget), 0);
5737
5738       if (STORE_FLAG_VALUE == 1 || normalizep)
5739         /* If we are supposed to produce a 0/1 value, we want to do
5740            a logical shift from the sign bit to the low-order bit; for
5741            a -1/0 value, we do an arithmetic shift.  */
5742         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5743                             GET_MODE_BITSIZE (int_mode) - 1,
5744                             subtarget, normalizep != -1);
5745
5746       if (int_mode != int_target_mode)
5747         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5748
5749       return op0;
5750     }
5751
5752   mclass = GET_MODE_CLASS (mode);
5753   FOR_EACH_MODE_FROM (compare_mode, mode)
5754     {
5755      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5756      icode = optab_handler (cstore_optab, optab_mode);
5757      if (icode != CODE_FOR_nothing)
5758         {
5759           do_pending_stack_adjust ();
5760           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5761                                  unsignedp, op0, op1, normalizep, target_mode);
5762           if (tem)
5763             return tem;
5764
5765           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5766             {
5767               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5768                                  unsignedp, op1, op0, normalizep, target_mode);
5769               if (tem)
5770                 return tem;
5771             }
5772           break;
5773         }
5774     }
5775
5776   return 0;
5777 }
5778
5779 /* Subroutine of emit_store_flag that handles cases in which the operands
5780    are scalar integers.  SUBTARGET is the target to use for temporary
5781    operations and TRUEVAL is the value to store when the condition is
5782    true.  All other arguments are as for emit_store_flag.  */
5783
5784 rtx
5785 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5786                      rtx op1, scalar_int_mode mode, int unsignedp,
5787                      int normalizep, rtx trueval)
5788 {
5789   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5790   rtx_insn *last = get_last_insn ();
5791
5792   /* If this is an equality comparison of integers, we can try to exclusive-or
5793      (or subtract) the two operands and use a recursive call to try the
5794      comparison with zero.  Don't do any of these cases if branches are
5795      very cheap.  */
5796
5797   if ((code == EQ || code == NE) && op1 != const0_rtx)
5798     {
5799       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5800                               OPTAB_WIDEN);
5801
5802       if (tem == 0)
5803         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5804                             OPTAB_WIDEN);
5805       if (tem != 0)
5806         tem = emit_store_flag (target, code, tem, const0_rtx,
5807                                mode, unsignedp, normalizep);
5808       if (tem != 0)
5809         return tem;
5810
5811       delete_insns_since (last);
5812     }
5813
5814   /* For integer comparisons, try the reverse comparison.  However, for
5815      small X and if we'd have anyway to extend, implementing "X != 0"
5816      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5817   rtx_code rcode = reverse_condition (code);
5818   if (can_compare_p (rcode, mode, ccp_store_flag)
5819       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5820             && code == NE
5821             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5822             && op1 == const0_rtx))
5823     {
5824       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5825                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5826
5827       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5828       if (want_add
5829           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5830                        optimize_insn_for_speed_p ()) == 0)
5831         {
5832           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5833                                        STORE_FLAG_VALUE, target_mode);
5834           if (tem != 0)
5835             tem = expand_binop (target_mode, add_optab, tem,
5836                                 gen_int_mode (normalizep, target_mode),
5837                                 target, 0, OPTAB_WIDEN);
5838           if (tem != 0)
5839             return tem;
5840         }
5841       else if (!want_add
5842                && rtx_cost (trueval, mode, XOR, 1,
5843                             optimize_insn_for_speed_p ()) == 0)
5844         {
5845           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5846                                        normalizep, target_mode);
5847           if (tem != 0)
5848             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5849                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5850           if (tem != 0)
5851             return tem;
5852         }
5853
5854       delete_insns_since (last);
5855     }
5856
5857   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5858      the constant zero.  Reject all other comparisons at this point.  Only
5859      do LE and GT if branches are expensive since they are expensive on
5860      2-operand machines.  */
5861
5862   if (op1 != const0_rtx
5863       || (code != EQ && code != NE
5864           && (BRANCH_COST (optimize_insn_for_speed_p (),
5865                            false) <= 1 || (code != LE && code != GT))))
5866     return 0;
5867
5868   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5869      do the necessary operation below.  */
5870
5871   rtx tem = 0;
5872
5873   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5874      the sign bit set.  */
5875
5876   if (code == LE)
5877     {
5878       /* This is destructive, so SUBTARGET can't be OP0.  */
5879       if (rtx_equal_p (subtarget, op0))
5880         subtarget = 0;
5881
5882       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5883                           OPTAB_WIDEN);
5884       if (tem)
5885         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5886                             OPTAB_WIDEN);
5887     }
5888
5889   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5890      number of bits in the mode of OP0, minus one.  */
5891
5892   if (code == GT)
5893     {
5894       if (rtx_equal_p (subtarget, op0))
5895         subtarget = 0;
5896
5897       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5898                                 GET_MODE_BITSIZE (mode) - 1,
5899                                 subtarget, 0);
5900       if (tem)
5901         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5902                             OPTAB_WIDEN);
5903     }
5904
5905   if (code == EQ || code == NE)
5906     {
5907       /* For EQ or NE, one way to do the comparison is to apply an operation
5908          that converts the operand into a positive number if it is nonzero
5909          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5910          for NE we negate.  This puts the result in the sign bit.  Then we
5911          normalize with a shift, if needed.
5912
5913          Two operations that can do the above actions are ABS and FFS, so try
5914          them.  If that doesn't work, and MODE is smaller than a full word,
5915          we can use zero-extension to the wider mode (an unsigned conversion)
5916          as the operation.  */
5917
5918       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5919          that is compensated by the subsequent overflow when subtracting
5920          one / negating.  */
5921
5922       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5923         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5924       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5925         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5926       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5927         {
5928           tem = convert_modes (word_mode, mode, op0, 1);
5929           mode = word_mode;
5930         }
5931
5932       if (tem != 0)
5933         {
5934           if (code == EQ)
5935             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5936                                 0, OPTAB_WIDEN);
5937           else
5938             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5939         }
5940
5941       /* If we couldn't do it that way, for NE we can "or" the two's complement
5942          of the value with itself.  For EQ, we take the one's complement of
5943          that "or", which is an extra insn, so we only handle EQ if branches
5944          are expensive.  */
5945
5946       if (tem == 0
5947           && (code == NE
5948               || BRANCH_COST (optimize_insn_for_speed_p (),
5949                               false) > 1))
5950         {
5951           if (rtx_equal_p (subtarget, op0))
5952             subtarget = 0;
5953
5954           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5955           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5956                               OPTAB_WIDEN);
5957
5958           if (tem && code == EQ)
5959             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5960         }
5961     }
5962
5963   if (tem && normalizep)
5964     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5965                               GET_MODE_BITSIZE (mode) - 1,
5966                               subtarget, normalizep == 1);
5967
5968   if (tem)
5969     {
5970       if (!target)
5971         ;
5972       else if (GET_MODE (tem) != target_mode)
5973         {
5974           convert_move (target, tem, 0);
5975           tem = target;
5976         }
5977       else if (!subtarget)
5978         {
5979           emit_move_insn (target, tem);
5980           tem = target;
5981         }
5982     }
5983   else
5984     delete_insns_since (last);
5985
5986   return tem;
5987 }
5988
5989 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5990    and storing in TARGET.  Normally return TARGET.
5991    Return 0 if that cannot be done.
5992
5993    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5994    it is VOIDmode, they cannot both be CONST_INT.
5995
5996    UNSIGNEDP is for the case where we have to widen the operands
5997    to perform the operation.  It says to use zero-extension.
5998
5999    NORMALIZEP is 1 if we should convert the result to be either zero
6000    or one.  Normalize is -1 if we should convert the result to be
6001    either zero or -1.  If NORMALIZEP is zero, the result will be left
6002    "raw" out of the scc insn.  */
6003
6004 rtx
6005 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6006                  machine_mode mode, int unsignedp, int normalizep)
6007 {
6008   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6009   enum rtx_code rcode;
6010   rtx subtarget;
6011   rtx tem, trueval;
6012   rtx_insn *last;
6013
6014   /* If we compare constants, we shouldn't use a store-flag operation,
6015      but a constant load.  We can get there via the vanilla route that
6016      usually generates a compare-branch sequence, but will in this case
6017      fold the comparison to a constant, and thus elide the branch.  */
6018   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6019     return NULL_RTX;
6020
6021   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6022                            target_mode);
6023   if (tem)
6024     return tem;
6025
6026   /* If we reached here, we can't do this with a scc insn, however there
6027      are some comparisons that can be done in other ways.  Don't do any
6028      of these cases if branches are very cheap.  */
6029   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6030     return 0;
6031
6032   /* See what we need to return.  We can only return a 1, -1, or the
6033      sign bit.  */
6034
6035   if (normalizep == 0)
6036     {
6037       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6038         normalizep = STORE_FLAG_VALUE;
6039
6040       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6041         ;
6042       else
6043         return 0;
6044     }
6045
6046   last = get_last_insn ();
6047
6048   /* If optimizing, use different pseudo registers for each insn, instead
6049      of reusing the same pseudo.  This leads to better CSE, but slows
6050      down the compiler, since there are more pseudos.  */
6051   subtarget = (!optimize
6052                && (target_mode == mode)) ? target : NULL_RTX;
6053   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6054
6055   /* For floating-point comparisons, try the reverse comparison or try
6056      changing the "orderedness" of the comparison.  */
6057   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6058     {
6059       enum rtx_code first_code;
6060       bool and_them;
6061
6062       rcode = reverse_condition_maybe_unordered (code);
6063       if (can_compare_p (rcode, mode, ccp_store_flag)
6064           && (code == ORDERED || code == UNORDERED
6065               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6066               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6067         {
6068           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6069                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6070
6071           /* For the reverse comparison, use either an addition or a XOR.  */
6072           if (want_add
6073               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6074                            optimize_insn_for_speed_p ()) == 0)
6075             {
6076               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6077                                        STORE_FLAG_VALUE, target_mode);
6078               if (tem)
6079                 return expand_binop (target_mode, add_optab, tem,
6080                                      gen_int_mode (normalizep, target_mode),
6081                                      target, 0, OPTAB_WIDEN);
6082             }
6083           else if (!want_add
6084                    && rtx_cost (trueval, mode, XOR, 1,
6085                                 optimize_insn_for_speed_p ()) == 0)
6086             {
6087               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6088                                        normalizep, target_mode);
6089               if (tem)
6090                 return expand_binop (target_mode, xor_optab, tem, trueval,
6091                                      target, INTVAL (trueval) >= 0,
6092                                      OPTAB_WIDEN);
6093             }
6094         }
6095
6096       delete_insns_since (last);
6097
6098       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6099       if (code == ORDERED || code == UNORDERED)
6100         return 0;
6101
6102       and_them = split_comparison (code, mode, &first_code, &code);
6103
6104       /* If there are no NaNs, the first comparison should always fall through.
6105          Effectively change the comparison to the other one.  */
6106       if (!HONOR_NANS (mode))
6107         {
6108           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6109           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6110                                     target_mode);
6111         }
6112
6113       if (!HAVE_conditional_move)
6114         return 0;
6115
6116       /* Do not turn a trapping comparison into a non-trapping one.  */
6117       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6118           && flag_trapping_math)
6119         return 0;
6120
6121       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6122          conditional move.  */
6123       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6124                                normalizep, target_mode);
6125       if (tem == 0)
6126         return 0;
6127
6128       if (and_them)
6129         tem = emit_conditional_move (target, code, op0, op1, mode,
6130                                      tem, const0_rtx, GET_MODE (tem), 0);
6131       else
6132         tem = emit_conditional_move (target, code, op0, op1, mode,
6133                                      trueval, tem, GET_MODE (tem), 0);
6134
6135       if (tem == 0)
6136         delete_insns_since (last);
6137       return tem;
6138     }
6139
6140   /* The remaining tricks only apply to integer comparisons.  */
6141
6142   scalar_int_mode int_mode;
6143   if (is_int_mode (mode, &int_mode))
6144     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6145                                 unsignedp, normalizep, trueval);
6146
6147   return 0;
6148 }
6149
6150 /* Like emit_store_flag, but always succeeds.  */
6151
6152 rtx
6153 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6154                        machine_mode mode, int unsignedp, int normalizep)
6155 {
6156   rtx tem;
6157   rtx_code_label *label;
6158   rtx trueval, falseval;
6159
6160   /* First see if emit_store_flag can do the job.  */
6161   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6162   if (tem != 0)
6163     return tem;
6164
6165   /* If one operand is constant, make it the second one.  Only do this
6166      if the other operand is not constant as well.  */
6167   if (swap_commutative_operands_p (op0, op1))
6168     {
6169       std::swap (op0, op1);
6170       code = swap_condition (code);
6171     }
6172
6173   if (mode == VOIDmode)
6174     mode = GET_MODE (op0);
6175
6176   if (!target)
6177     target = gen_reg_rtx (word_mode);
6178
6179   /* If this failed, we have to do this with set/compare/jump/set code.
6180      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6181   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6182   if (code == NE
6183       && GET_MODE_CLASS (mode) == MODE_INT
6184       && REG_P (target)
6185       && op0 == target
6186       && op1 == const0_rtx)
6187     {
6188       label = gen_label_rtx ();
6189       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6190                                NULL_RTX, NULL, label,
6191                                profile_probability::uninitialized ());
6192       emit_move_insn (target, trueval);
6193       emit_label (label);
6194       return target;
6195     }
6196
6197   if (!REG_P (target)
6198       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6199     target = gen_reg_rtx (GET_MODE (target));
6200
6201   /* Jump in the right direction if the target cannot implement CODE
6202      but can jump on its reverse condition.  */
6203   falseval = const0_rtx;
6204   if (! can_compare_p (code, mode, ccp_jump)
6205       && (! FLOAT_MODE_P (mode)
6206           || code == ORDERED || code == UNORDERED
6207           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6208           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6209     {
6210       enum rtx_code rcode;
6211       if (FLOAT_MODE_P (mode))
6212         rcode = reverse_condition_maybe_unordered (code);
6213       else
6214         rcode = reverse_condition (code);
6215
6216       /* Canonicalize to UNORDERED for the libcall.  */
6217       if (can_compare_p (rcode, mode, ccp_jump)
6218           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6219         {
6220           falseval = trueval;
6221           trueval = const0_rtx;
6222           code = rcode;
6223         }
6224     }
6225
6226   emit_move_insn (target, trueval);
6227   label = gen_label_rtx ();
6228   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6229                            label, profile_probability::uninitialized ());
6230
6231   emit_move_insn (target, falseval);
6232   emit_label (label);
6233
6234   return target;
6235 }
6236
6237 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6238    and exclusive ranges in order to create an equivalent comparison.  See
6239    canonicalize_cmp_for_target for the possible cases.  */
6240
6241 static enum rtx_code
6242 equivalent_cmp_code (enum rtx_code code)
6243 {
6244   switch (code)
6245     {
6246     case GT:
6247       return GE;
6248     case GE:
6249       return GT;
6250     case LT:
6251       return LE;
6252     case LE:
6253       return LT;
6254     case GTU:
6255       return GEU;
6256     case GEU:
6257       return GTU;
6258     case LTU:
6259       return LEU;
6260     case LEU:
6261       return LTU;
6262
6263     default:
6264       return code;
6265     }
6266 }
6267
6268 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6269    purpose of this is to end up with an immediate which can be loaded into a
6270    register in fewer moves, if possible.
6271
6272    For each integer comparison there exists an equivalent choice:
6273      i)   a >  b or a >= b + 1
6274      ii)  a <= b or a <  b + 1
6275      iii) a >= b or a >  b - 1
6276      iv)  a <  b or a <= b - 1
6277
6278    MODE is the mode of the first operand.
6279    CODE points to the comparison code.
6280    IMM points to the rtx containing the immediate.  *IMM must satisfy
6281    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6282    on exit.  */
6283
6284 void
6285 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6286 {
6287   if (!SCALAR_INT_MODE_P (mode))
6288     return;
6289
6290   int to_add = 0;
6291   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6292
6293   /* Extract the immediate value from the rtx.  */
6294   wide_int imm_val = rtx_mode_t (*imm, mode);
6295
6296   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6297     to_add = 1;
6298   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6299     to_add = -1;
6300   else
6301     return;
6302
6303   /* Check for overflow/underflow in the case of signed values and
6304      wrapping around in the case of unsigned values.  If any occur
6305      cancel the optimization.  */
6306   wi::overflow_type overflow = wi::OVF_NONE;
6307   wide_int imm_modif;
6308
6309   if (to_add == 1)
6310     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6311   else
6312     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6313
6314   if (overflow)
6315     return;
6316
6317   /* The following creates a pseudo; if we cannot do that, bail out.  */
6318   if (!can_create_pseudo_p ())
6319     return;
6320
6321   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6322   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6323
6324   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6325   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6326
6327   /* Update the immediate and the code.  */
6328   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6329     {
6330       *code = equivalent_cmp_code (*code);
6331       *imm = new_imm;
6332     }
6333 }
6334
6335
6336 \f
6337 /* Perform possibly multi-word comparison and conditional jump to LABEL
6338    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6339    now a thin wrapper around do_compare_rtx_and_jump.  */
6340
6341 static void
6342 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6343                  rtx_code_label *label)
6344 {
6345   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6346   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6347                            NULL, label, profile_probability::uninitialized ());
6348 }