gcc/expmed.cc

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2022 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158   /* Restore all->reg's mode.  */
 159   PUT_MODE (all->reg, to_mode);
 160 }
 161
 162 static void
 163 init_expmed_one_mode (struct init_expmed_rtl *all,
 164                       machine_mode mode, int speed)
 165 {
 166   int m, n, mode_bitsize;
 167   machine_mode mode_from;
 168
 169   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 170
 171   PUT_MODE (all->reg, mode);
 172   PUT_MODE (all->plus, mode);
 173   PUT_MODE (all->neg, mode);
 174   PUT_MODE (all->mult, mode);
 175   PUT_MODE (all->sdiv, mode);
 176   PUT_MODE (all->udiv, mode);
 177   PUT_MODE (all->sdiv_32, mode);
 178   PUT_MODE (all->smod_32, mode);
 179   PUT_MODE (all->wide_trunc, mode);
 180   PUT_MODE (all->shift, mode);
 181   PUT_MODE (all->shift_mult, mode);
 182   PUT_MODE (all->shift_add, mode);
 183   PUT_MODE (all->shift_sub0, mode);
 184   PUT_MODE (all->shift_sub1, mode);
 185   PUT_MODE (all->zext, mode);
 186   PUT_MODE (all->trunc, mode);
 187
 188   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 189   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 190   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 191   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 192   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 193
 194   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 195                                      <= 2 * add_cost (speed, mode)));
 196   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 197                                      <= 4 * add_cost (speed, mode)));
 198
 199   set_shift_cost (speed, mode, 0, 0);
 200   {
 201     int cost = add_cost (speed, mode);
 202     set_shiftadd_cost (speed, mode, 0, cost);
 203     set_shiftsub0_cost (speed, mode, 0, cost);
 204     set_shiftsub1_cost (speed, mode, 0, cost);
 205   }
 206
 207   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 208   for (m = 1; m < n; m++)
 209     {
 210       XEXP (all->shift, 1) = all->cint[m];
 211       XEXP (all->shift_mult, 1) = all->pow2[m];
 212
 213       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 214       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 215                                                        speed));
 216       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 217                                                         speed));
 218       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 219                                                         speed));
 220     }
 221
 222   scalar_int_mode int_mode_to;
 223   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 224     {
 225       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 226            mode_from = (machine_mode)(mode_from + 1))
 227         init_expmed_one_conv (all, int_mode_to,
 228                               as_a <scalar_int_mode> (mode_from), speed);
 229
 230       scalar_int_mode wider_mode;
 231       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 232           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 233         {
 234           PUT_MODE (all->reg, mode);
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1)
 239             = gen_int_shift_amount (wider_mode, mode_bitsize);
 240
 241           set_mul_widen_cost (speed, wider_mode,
 242                               set_src_cost (all->wide_mult, wider_mode, speed));
 243           set_mul_highpart_cost (speed, int_mode_to,
 244                                  set_src_cost (all->wide_trunc,
 245                                                int_mode_to, speed));
 246         }
 247     }
 248 }
 249
 250 void
 251 init_expmed (void)
 252 {
 253   struct init_expmed_rtl all;
 254   machine_mode mode = QImode;
 255   int m, speed;
 256
 257   memset (&all, 0, sizeof all);
 258   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 259     {
 260       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 261       all.cint[m] = GEN_INT (m);
 262     }
 263
 264   /* Avoid using hard regs in ways which may be unsupported.  */
 265   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 266   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 267   all.neg = gen_rtx_NEG (mode, all.reg);
 268   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 269   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 270   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 271   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 272   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 273   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 274   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 275   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 276   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 277   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 278   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 279   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 280   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 282   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312
 313   ggc_free (all.trunc);
 314   ggc_free (all.shift_sub1);
 315   ggc_free (all.shift_sub0);
 316   ggc_free (all.shift_add);
 317   ggc_free (all.shift_mult);
 318   ggc_free (all.shift);
 319   ggc_free (all.wide_trunc);
 320   ggc_free (all.wide_lshr);
 321   ggc_free (all.wide_mult);
 322   ggc_free (all.zext);
 323   ggc_free (all.smod_32);
 324   ggc_free (all.sdiv_32);
 325   ggc_free (all.udiv);
 326   ggc_free (all.sdiv);
 327   ggc_free (all.mult);
 328   ggc_free (all.neg);
 329   ggc_free (all.plus);
 330   ggc_free (all.reg);
 331 }
 332
 333 /* Return an rtx representing minus the value of X.
 334    MODE is the intended mode of the result,
 335    useful if X is a CONST_INT.  */
 336
 337 rtx
 338 negate_rtx (machine_mode mode, rtx x)
 339 {
 340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 341
 342   if (result == 0)
 343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 344
 345   return result;
 346 }
 347
 348 /* Whether reverse storage order is supported on the target.  */
 349 static int reverse_storage_order_supported = -1;
 350
 351 /* Check whether reverse storage order is supported on the target.  */
 352
 353 static void
 354 check_reverse_storage_order_support (void)
 355 {
 356   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 357     {
 358       reverse_storage_order_supported = 0;
 359       sorry ("reverse scalar storage order");
 360     }
 361   else
 362     reverse_storage_order_supported = 1;
 363 }
 364
 365 /* Whether reverse FP storage order is supported on the target.  */
 366 static int reverse_float_storage_order_supported = -1;
 367
 368 /* Check whether reverse FP storage order is supported on the target.  */
 369
 370 static void
 371 check_reverse_float_storage_order_support (void)
 372 {
 373   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 374     {
 375       reverse_float_storage_order_supported = 0;
 376       sorry ("reverse floating-point scalar storage order");
 377     }
 378   else
 379     reverse_float_storage_order_supported = 1;
 380 }
 381
 382 /* Return an rtx representing value of X with reverse storage order.
 383    MODE is the intended mode of the result,
 384    useful if X is a CONST_INT.  */
 385
 386 rtx
 387 flip_storage_order (machine_mode mode, rtx x)
 388 {
 389   scalar_int_mode int_mode;
 390   rtx result;
 391
 392   if (mode == QImode)
 393     return x;
 394
 395   if (COMPLEX_MODE_P (mode))
 396     {
 397       rtx real = read_complex_part (x, false);
 398       rtx imag = read_complex_part (x, true);
 399
 400       real = flip_storage_order (GET_MODE_INNER (mode), real);
 401       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 402
 403       return gen_rtx_CONCAT (mode, real, imag);
 404     }
 405
 406   if (UNLIKELY (reverse_storage_order_supported < 0))
 407     check_reverse_storage_order_support ();
 408
 409   if (!is_a <scalar_int_mode> (mode, &int_mode))
 410     {
 411       if (FLOAT_MODE_P (mode)
 412           && UNLIKELY (reverse_float_storage_order_supported < 0))
 413         check_reverse_float_storage_order_support ();
 414
 415       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 416           || !targetm.scalar_mode_supported_p (int_mode))
 417         {
 418           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 419           return x;
 420         }
 421       x = gen_lowpart (int_mode, x);
 422     }
 423
 424   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 425   if (result == 0)
 426     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 427
 428   if (int_mode != mode)
 429     result = gen_lowpart (mode, result);
 430
 431   return result;
 432 }
 433
 434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 435    first unit of mode MODE that contains a bitfield of size BITSIZE at
 436    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 437    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 438    of the field within the new memory.  */
 439
 440 static rtx
 441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 442                       unsigned HOST_WIDE_INT bitsize,
 443                       unsigned HOST_WIDE_INT bitnum,
 444                       unsigned HOST_WIDE_INT *new_bitnum)
 445 {
 446   scalar_int_mode imode;
 447   if (mode.exists (&imode))
 448     {
 449       unsigned int unit = GET_MODE_BITSIZE (imode);
 450       *new_bitnum = bitnum % unit;
 451       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 452       return adjust_bitfield_address (mem, imode, offset);
 453     }
 454   else
 455     {
 456       *new_bitnum = bitnum % BITS_PER_UNIT;
 457       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 458       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 459                             / BITS_PER_UNIT);
 460       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 461     }
 462 }
 463
 464 /* The caller wants to perform insertion or extraction PATTERN on a
 465    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 466    BITREGION_START and BITREGION_END are as for store_bit_field
 467    and FIELDMODE is the natural mode of the field.
 468
 469    Search for a mode that is compatible with the memory access
 470    restrictions and (where applicable) with a register insertion or
 471    extraction.  Return the new memory on success, storing the adjusted
 472    bit position in *NEW_BITNUM.  Return null otherwise.  */
 473
 474 static rtx
 475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 476                               rtx op0, HOST_WIDE_INT bitsize,
 477                               HOST_WIDE_INT bitnum,
 478                               poly_uint64 bitregion_start,
 479                               poly_uint64 bitregion_end,
 480                               machine_mode fieldmode,
 481                               unsigned HOST_WIDE_INT *new_bitnum)
 482 {
 483   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 484                                 bitregion_end, MEM_ALIGN (op0),
 485                                 MEM_VOLATILE_P (op0));
 486   scalar_int_mode best_mode;
 487   if (iter.next_mode (&best_mode))
 488     {
 489       /* We can use a memory in BEST_MODE.  See whether this is true for
 490          any wider modes.  All other things being equal, we prefer to
 491          use the widest mode possible because it tends to expose more
 492          CSE opportunities.  */
 493       if (!iter.prefer_smaller_modes ())
 494         {
 495           /* Limit the search to the mode required by the corresponding
 496              register insertion or extraction instruction, if any.  */
 497           scalar_int_mode limit_mode = word_mode;
 498           extraction_insn insn;
 499           if (get_best_reg_extraction_insn (&insn, pattern,
 500                                             GET_MODE_BITSIZE (best_mode),
 501                                             fieldmode))
 502             limit_mode = insn.field_mode;
 503
 504           scalar_int_mode wider_mode;
 505           while (iter.next_mode (&wider_mode)
 506                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 507             best_mode = wider_mode;
 508         }
 509       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 510                                    new_bitnum);
 511     }
 512   return NULL_RTX;
 513 }
 514
 515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 516    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 517    offset is then BITNUM / BITS_PER_UNIT.  */
 518
 519 static bool
 520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 521                      machine_mode struct_mode)
 522 {
 523   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 524   if (BYTES_BIG_ENDIAN)
 525     return (multiple_p (bitnum, BITS_PER_UNIT)
 526             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 527                 || multiple_p (bitnum + bitsize,
 528                                regsize * BITS_PER_UNIT)));
 529   else
 530     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 531 }
 532
 533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 534    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 535    Return false if the access would touch memory outside the range
 536    BITREGION_START to BITREGION_END for conformance to the C++ memory
 537    model.  */
 538
 539 static bool
 540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 541                             unsigned HOST_WIDE_INT bitnum,
 542                             scalar_int_mode fieldmode,
 543                             poly_uint64 bitregion_start,
 544                             poly_uint64 bitregion_end)
 545 {
 546   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 547
 548   /* -fstrict-volatile-bitfields must be enabled and we must have a
 549      volatile MEM.  */
 550   if (!MEM_P (op0)
 551       || !MEM_VOLATILE_P (op0)
 552       || flag_strict_volatile_bitfields <= 0)
 553     return false;
 554
 555   /* The bit size must not be larger than the field mode, and
 556      the field mode must not be larger than a word.  */
 557   if (bitsize > modesize || modesize > BITS_PER_WORD)
 558     return false;
 559
 560   /* Check for cases of unaligned fields that must be split.  */
 561   if (bitnum % modesize + bitsize > modesize)
 562     return false;
 563
 564   /* The memory must be sufficiently aligned for a MODESIZE access.
 565      This condition guarantees, that the memory access will not
 566      touch anything after the end of the structure.  */
 567   if (MEM_ALIGN (op0) < modesize)
 568     return false;
 569
 570   /* Check for cases where the C++ memory model applies.  */
 571   if (maybe_ne (bitregion_end, 0U)
 572       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 573           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 574                        bitregion_end)))
 575     return false;
 576
 577   return true;
 578 }
 579
 580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 581    bit number BITNUM can be treated as a simple value of mode MODE.
 582    Store the byte offset in *BYTENUM if so.  */
 583
 584 static bool
 585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 586                        machine_mode mode, poly_uint64 *bytenum)
 587 {
 588   return (MEM_P (op0)
 589           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 590           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 591           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 592               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 593                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 594 }
 595 \f
 596 /* Try to use instruction INSV to store VALUE into a field of OP0.
 597    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 598    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 599    are as for store_bit_field.  */
 600
 601 static bool
 602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 603                             opt_scalar_int_mode op0_mode,
 604                             unsigned HOST_WIDE_INT bitsize,
 605                             unsigned HOST_WIDE_INT bitnum,
 606                             rtx value, scalar_int_mode value_mode)
 607 {
 608   class expand_operand ops[4];
 609   rtx value1;
 610   rtx xop0 = op0;
 611   rtx_insn *last = get_last_insn ();
 612   bool copy_back = false;
 613
 614   scalar_int_mode op_mode = insv->field_mode;
 615   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 616   if (bitsize == 0 || bitsize > unit)
 617     return false;
 618
 619   if (MEM_P (xop0))
 620     /* Get a reference to the first byte of the field.  */
 621     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 622                                  &bitnum);
 623   else
 624     {
 625       /* Convert from counting within OP0 to counting in OP_MODE.  */
 626       if (BYTES_BIG_ENDIAN)
 627         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 628
 629       /* If xop0 is a register, we need it in OP_MODE
 630          to make it acceptable to the format of insv.  */
 631       if (GET_CODE (xop0) == SUBREG)
 632         {
 633           /* If such a SUBREG can't be created, give up.  */
 634           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 635                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 636             return false;
 637           /* We can't just change the mode, because this might clobber op0,
 638              and we will need the original value of op0 if insv fails.  */
 639           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 640                                  SUBREG_BYTE (xop0));
 641         }
 642       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 643         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 644     }
 645
 646   /* If the destination is a paradoxical subreg such that we need a
 647      truncate to the inner mode, perform the insertion on a temporary and
 648      truncate the result to the original destination.  Note that we can't
 649      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 650      X) 0)) is (reg:N X).  */
 651   if (GET_CODE (xop0) == SUBREG
 652       && REG_P (SUBREG_REG (xop0))
 653       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 654                                          op_mode))
 655     {
 656       rtx tem = gen_reg_rtx (op_mode);
 657       emit_move_insn (tem, xop0);
 658       xop0 = tem;
 659       copy_back = true;
 660     }
 661
 662   /* There are similar overflow check at the start of store_bit_field_1,
 663      but that only check the situation where the field lies completely
 664      outside the register, while there do have situation where the field
 665      lies partialy in the register, we need to adjust bitsize for this
 666      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 667      will broken on those arch support bit insert instruction, like arm, aarch64
 668      etc.  */
 669   if (bitsize + bitnum > unit && bitnum < unit)
 670     {
 671       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 672                "destination object, data truncated into %wu-bit",
 673                bitsize, unit - bitnum);
 674       bitsize = unit - bitnum;
 675     }
 676
 677   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 678      "backwards" from the size of the unit we are inserting into.
 679      Otherwise, we count bits from the most significant on a
 680      BYTES/BITS_BIG_ENDIAN machine.  */
 681
 682   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 683     bitnum = unit - bitsize - bitnum;
 684
 685   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 686   value1 = value;
 687   if (value_mode != op_mode)
 688     {
 689       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 690         {
 691           rtx tmp;
 692           /* Optimization: Don't bother really extending VALUE
 693              if it has all the bits we will actually use.  However,
 694              if we must narrow it, be sure we do it correctly.  */
 695
 696           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 697             {
 698               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 699               if (! tmp)
 700                 tmp = simplify_gen_subreg (op_mode,
 701                                            force_reg (value_mode, value1),
 702                                            value_mode, 0);
 703             }
 704           else
 705             {
 706               tmp = gen_lowpart_if_possible (op_mode, value1);
 707               if (! tmp)
 708                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 709             }
 710           value1 = tmp;
 711         }
 712       else if (CONST_INT_P (value))
 713         value1 = gen_int_mode (INTVAL (value), op_mode);
 714       else
 715         /* Parse phase is supposed to make VALUE's data type
 716            match that of the component reference, which is a type
 717            at least as wide as the field; so VALUE should have
 718            a mode that corresponds to that type.  */
 719         gcc_assert (CONSTANT_P (value));
 720     }
 721
 722   create_fixed_operand (&ops[0], xop0);
 723   create_integer_operand (&ops[1], bitsize);
 724   create_integer_operand (&ops[2], bitnum);
 725   create_input_operand (&ops[3], value1, op_mode);
 726   if (maybe_expand_insn (insv->icode, 4, ops))
 727     {
 728       if (copy_back)
 729         convert_move (op0, xop0, true);
 730       return true;
 731     }
 732   delete_insns_since (last);
 733   return false;
 734 }
 735
 736 /* A subroutine of store_bit_field, with the same arguments.  Return true
 737    if the operation could be implemented.
 738
 739    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 740    no other way of implementing the operation.  If FALLBACK_P is false,
 741    return false instead.  */
 742
 743 static bool
 744 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 745                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 746                    machine_mode fieldmode,
 747                    rtx value, bool reverse, bool fallback_p)
 748 {
 749   rtx op0 = str_rtx;
 750
 751   while (GET_CODE (op0) == SUBREG)
 752     {
 753       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 754       op0 = SUBREG_REG (op0);
 755     }
 756
 757   /* No action is needed if the target is a register and if the field
 758      lies completely outside that register.  This can occur if the source
 759      code contains an out-of-bounds access to a small array.  */
 760   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 761     return true;
 762
 763   /* Use vec_set patterns for inserting parts of vectors whenever
 764      available.  */
 765   machine_mode outermode = GET_MODE (op0);
 766   scalar_mode innermode = GET_MODE_INNER (outermode);
 767   poly_uint64 pos;
 768   if (VECTOR_MODE_P (outermode)
 769       && !MEM_P (op0)
 770       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 771       && fieldmode == innermode
 772       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 773       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 774     {
 775       class expand_operand ops[3];
 776       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 777
 778       create_fixed_operand (&ops[0], op0);
 779       create_input_operand (&ops[1], value, innermode);
 780       create_integer_operand (&ops[2], pos);
 781       if (maybe_expand_insn (icode, 3, ops))
 782         return true;
 783     }
 784
 785   /* If the target is a register, overwriting the entire object, or storing
 786      a full-word or multi-word field can be done with just a SUBREG.  */
 787   if (!MEM_P (op0)
 788       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794       HOST_WIDE_INT regnum;
 795       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 796       if (known_eq (bitnum, 0U)
 797           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 798         {
 799           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 800           if (sub)
 801             {
 802               if (reverse)
 803                 sub = flip_storage_order (GET_MODE (op0), sub);
 804               emit_move_insn (op0, sub);
 805               return true;
 806             }
 807         }
 808       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 809                && multiple_p (bitsize, regsize * BITS_PER_UNIT)
 810                && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
 811         {
 812           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 813                                      regnum * regsize);
 814           if (sub)
 815             {
 816               if (reverse)
 817                 value = flip_storage_order (fieldmode, value);
 818               emit_move_insn (sub, value);
 819               return true;
 820             }
 821         }
 822     }
 823
 824   /* If the target is memory, storing any naturally aligned field can be
 825      done with a simple store.  For targets that support fast unaligned
 826      memory, any naturally sized, unit aligned field can be done directly.  */
 827   poly_uint64 bytenum;
 828   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 829     {
 830       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 831       if (reverse)
 832         value = flip_storage_order (fieldmode, value);
 833       emit_move_insn (op0, value);
 834       return true;
 835     }
 836
 837   /* It's possible we'll need to handle other cases here for
 838      polynomial bitnum and bitsize.  */
 839
 840   /* From here on we need to be looking at a fixed-size insertion.  */
 841   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 842   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 843
 844   /* Make sure we are playing with integral modes.  Pun with subregs
 845      if we aren't.  This must come after the entire register case above,
 846      since that case is valid for any mode.  The following cases are only
 847      valid for integral modes.  */
 848   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 849   scalar_int_mode imode;
 850   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 851     {
 852       if (MEM_P (op0))
 853         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 854                                             0, MEM_SIZE (op0));
 855       else if (!op0_mode.exists ())
 856         {
 857           if (ibitnum == 0
 858               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 859               && MEM_P (value)
 860               && !reverse)
 861             {
 862               value = adjust_address (value, GET_MODE (op0), 0);
 863               emit_move_insn (op0, value);
 864               return true;
 865             }
 866           if (!fallback_p)
 867             return false;
 868           rtx temp = assign_stack_temp (GET_MODE (op0),
 869                                         GET_MODE_SIZE (GET_MODE (op0)));
 870           emit_move_insn (temp, op0);
 871           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 872                              reverse, fallback_p);
 873           emit_move_insn (op0, temp);
 874           return true;
 875         }
 876       else
 877         op0 = gen_lowpart (op0_mode.require (), op0);
 878     }
 879
 880   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 881                                    bitregion_start, bitregion_end,
 882                                    fieldmode, value, reverse, fallback_p);
 883 }
 884
 885 /* Subroutine of store_bit_field_1, with the same arguments, except
 886    that BITSIZE and BITNUM are constant.  Handle cases specific to
 887    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 888    otherwise OP0 is a BLKmode MEM.  */
 889
 890 static bool
 891 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 892                           unsigned HOST_WIDE_INT bitsize,
 893                           unsigned HOST_WIDE_INT bitnum,
 894                           poly_uint64 bitregion_start,
 895                           poly_uint64 bitregion_end,
 896                           machine_mode fieldmode,
 897                           rtx value, bool reverse, bool fallback_p)
 898 {
 899   /* Storing an lsb-aligned field in a register
 900      can be done with a movstrict instruction.  */
 901
 902   if (!MEM_P (op0)
 903       && !reverse
 904       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 905       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 906       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 907     {
 908       class expand_operand ops[2];
 909       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 910       rtx arg0 = op0;
 911       unsigned HOST_WIDE_INT subreg_off;
 912
 913       if (GET_CODE (arg0) == SUBREG)
 914         {
 915           /* Else we've got some float mode source being extracted into
 916              a different float mode destination -- this combination of
 917              subregs results in Severe Tire Damage.  */
 918           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 919                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 920                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 921           arg0 = SUBREG_REG (arg0);
 922         }
 923
 924       subreg_off = bitnum / BITS_PER_UNIT;
 925       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
 926           /* STRICT_LOW_PART must have a non-paradoxical subreg as
 927              operand.  */
 928           && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
 929         {
 930           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 931
 932           create_fixed_operand (&ops[0], arg0);
 933           /* Shrink the source operand to FIELDMODE.  */
 934           create_convert_operand_to (&ops[1], value, fieldmode, false);
 935           if (maybe_expand_insn (icode, 2, ops))
 936             return true;
 937         }
 938     }
 939
 940   /* Handle fields bigger than a word.  */
 941
 942   if (bitsize > BITS_PER_WORD)
 943     {
 944       /* Here we transfer the words of the field
 945          in the order least significant first.
 946          This is because the most significant word is the one which may
 947          be less than full.
 948          However, only do that if the value is not BLKmode.  */
 949
 950       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 951       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 952       rtx_insn *last;
 953
 954       /* This is the mode we must force value to, so that there will be enough
 955          subwords to extract.  Note that fieldmode will often (always?) be
 956          VOIDmode, because that is what store_field uses to indicate that this
 957          is a bit field, but passing VOIDmode to operand_subword_force
 958          is not allowed.
 959
 960          The mode must be fixed-size, since insertions into variable-sized
 961          objects are meant to be handled before calling this function.  */
 962       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 963       if (value_mode == VOIDmode)
 964         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 965
 966       last = get_last_insn ();
 967       for (int i = 0; i < nwords; i++)
 968         {
 969           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 970              except maybe for the last iteration.  */
 971           const unsigned HOST_WIDE_INT new_bitsize
 972             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 973           /* Bit offset from the starting bit number in the target.  */
 974           const unsigned int bit_offset
 975             = backwards ^ reverse
 976               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 977               : i * BITS_PER_WORD;
 978           /* Starting word number in the value.  */
 979           const unsigned int wordnum
 980             = backwards
 981               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 982               : i;
 983           /* The chunk of the value in word_mode.  We use bit-field extraction
 984               in BLKmode to handle unaligned memory references and to shift the
 985               last chunk right on big-endian machines if need be.  */
 986           rtx value_word
 987             = fieldmode == BLKmode
 988               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 989                                    1, NULL_RTX, word_mode, word_mode, false,
 990                                    NULL)
 991               : operand_subword_force (value, wordnum, value_mode);
 992
 993           if (!store_bit_field_1 (op0, new_bitsize,
 994                                   bitnum + bit_offset,
 995                                   bitregion_start, bitregion_end,
 996                                   word_mode,
 997                                   value_word, reverse, fallback_p))
 998             {
 999               delete_insns_since (last);
1000               return false;
1001             }
1002         }
1003       return true;
1004     }
1005
1006   /* If VALUE has a floating-point or complex mode, access it as an
1007      integer of the corresponding size.  This can occur on a machine
1008      with 64 bit registers that uses SFmode for float.  It can also
1009      occur for unaligned float or complex fields.  */
1010   rtx orig_value = value;
1011   scalar_int_mode value_mode;
1012   if (GET_MODE (value) == VOIDmode)
1013     /* By this point we've dealt with values that are bigger than a word,
1014        so word_mode is a conservatively correct choice.  */
1015     value_mode = word_mode;
1016   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1017     {
1018       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1019       value = gen_reg_rtx (value_mode);
1020       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1021     }
1022
1023   /* If OP0 is a multi-word register, narrow it to the affected word.
1024      If the region spans two words, defer to store_split_bit_field.
1025      Don't do this if op0 is a single hard register wider than word
1026      such as a float or vector register.  */
1027   if (!MEM_P (op0)
1028       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1029       && (!REG_P (op0)
1030           || !HARD_REGISTER_P (op0)
1031           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1032     {
1033       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1034         {
1035           if (!fallback_p)
1036             return false;
1037
1038           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1039                                  bitregion_start, bitregion_end,
1040                                  value, value_mode, reverse);
1041           return true;
1042         }
1043       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1044                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1045       gcc_assert (op0);
1046       op0_mode = word_mode;
1047       bitnum %= BITS_PER_WORD;
1048     }
1049
1050   /* From here on we can assume that the field to be stored in fits
1051      within a word.  If the destination is a register, it too fits
1052      in a word.  */
1053
1054   extraction_insn insv;
1055   if (!MEM_P (op0)
1056       && !reverse
1057       && get_best_reg_extraction_insn (&insv, EP_insv,
1058                                        GET_MODE_BITSIZE (op0_mode.require ()),
1059                                        fieldmode)
1060       && store_bit_field_using_insv (&insv, op0, op0_mode,
1061                                      bitsize, bitnum, value, value_mode))
1062     return true;
1063
1064   /* If OP0 is a memory, try copying it to a register and seeing if a
1065      cheap register alternative is available.  */
1066   if (MEM_P (op0) && !reverse)
1067     {
1068       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1069                                         fieldmode)
1070           && store_bit_field_using_insv (&insv, op0, op0_mode,
1071                                          bitsize, bitnum, value, value_mode))
1072         return true;
1073
1074       rtx_insn *last = get_last_insn ();
1075
1076       /* Try loading part of OP0 into a register, inserting the bitfield
1077          into that, and then copying the result back to OP0.  */
1078       unsigned HOST_WIDE_INT bitpos;
1079       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1080                                                bitregion_start, bitregion_end,
1081                                                fieldmode, &bitpos);
1082       if (xop0)
1083         {
1084           rtx tempreg = copy_to_reg (xop0);
1085           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1086                                  bitregion_start, bitregion_end,
1087                                  fieldmode, orig_value, reverse, false))
1088             {
1089               emit_move_insn (xop0, tempreg);
1090               return true;
1091             }
1092           delete_insns_since (last);
1093         }
1094     }
1095
1096   if (!fallback_p)
1097     return false;
1098
1099   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1100                          bitregion_end, value, value_mode, reverse);
1101   return true;
1102 }
1103
1104 /* Generate code to store value from rtx VALUE
1105    into a bit-field within structure STR_RTX
1106    containing BITSIZE bits starting at bit BITNUM.
1107
1108    BITREGION_START is bitpos of the first bitfield in this region.
1109    BITREGION_END is the bitpos of the ending bitfield in this region.
1110    These two fields are 0, if the C++ memory model does not apply,
1111    or we are not interested in keeping track of bitfield regions.
1112
1113    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1114
1115    If REVERSE is true, the store is to be done in reverse order.  */
1116
1117 void
1118 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1119                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1120                  machine_mode fieldmode,
1121                  rtx value, bool reverse)
1122 {
1123   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1124   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1125   scalar_int_mode int_mode;
1126   if (bitsize.is_constant (&ibitsize)
1127       && bitnum.is_constant (&ibitnum)
1128       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1129       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1130                                      bitregion_start, bitregion_end))
1131     {
1132       /* Storing of a full word can be done with a simple store.
1133          We know here that the field can be accessed with one single
1134          instruction.  For targets that support unaligned memory,
1135          an unaligned access may be necessary.  */
1136       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1137         {
1138           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1139                                              ibitnum / BITS_PER_UNIT);
1140           if (reverse)
1141             value = flip_storage_order (int_mode, value);
1142           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1143           emit_move_insn (str_rtx, value);
1144         }
1145       else
1146         {
1147           rtx temp;
1148
1149           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1150                                           ibitnum, &ibitnum);
1151           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1152           temp = copy_to_reg (str_rtx);
1153           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1154                                   int_mode, value, reverse, true))
1155             gcc_unreachable ();
1156
1157           emit_move_insn (str_rtx, temp);
1158         }
1159
1160       return;
1161     }
1162
1163   /* Under the C++0x memory model, we must not touch bits outside the
1164      bit region.  Adjust the address to start at the beginning of the
1165      bit region.  */
1166   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1167     {
1168       scalar_int_mode best_mode;
1169       machine_mode addr_mode = VOIDmode;
1170
1171       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1172       bitnum -= bitregion_start;
1173       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1174       bitregion_end -= bitregion_start;
1175       bitregion_start = 0;
1176       if (bitsize.is_constant (&ibitsize)
1177           && bitnum.is_constant (&ibitnum)
1178           && get_best_mode (ibitsize, ibitnum,
1179                             bitregion_start, bitregion_end,
1180                             MEM_ALIGN (str_rtx), INT_MAX,
1181                             MEM_VOLATILE_P (str_rtx), &best_mode))
1182         addr_mode = best_mode;
1183       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1184                                               offset, size);
1185     }
1186
1187   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1188                           bitregion_start, bitregion_end,
1189                           fieldmode, value, reverse, true))
1190     gcc_unreachable ();
1191 }
1192 \f
1193 /* Use shifts and boolean operations to store VALUE into a bit field of
1194    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1195    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1196    the mode of VALUE.
1197
1198    If REVERSE is true, the store is to be done in reverse order.  */
1199
1200 static void
1201 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1202                        unsigned HOST_WIDE_INT bitsize,
1203                        unsigned HOST_WIDE_INT bitnum,
1204                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1205                        rtx value, scalar_int_mode value_mode, bool reverse)
1206 {
1207   /* There is a case not handled here:
1208      a structure with a known alignment of just a halfword
1209      and a field split across two aligned halfwords within the structure.
1210      Or likewise a structure with a known alignment of just a byte
1211      and a field split across two bytes.
1212      Such cases are not supposed to be able to occur.  */
1213
1214   scalar_int_mode best_mode;
1215   if (MEM_P (op0))
1216     {
1217       unsigned int max_bitsize = BITS_PER_WORD;
1218       scalar_int_mode imode;
1219       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1220         max_bitsize = GET_MODE_BITSIZE (imode);
1221
1222       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1223                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1224                           &best_mode))
1225         {
1226           /* The only way this should occur is if the field spans word
1227              boundaries.  */
1228           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1229                                  bitregion_start, bitregion_end,
1230                                  value, value_mode, reverse);
1231           return;
1232         }
1233
1234       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1235     }
1236   else
1237     best_mode = op0_mode.require ();
1238
1239   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1240                            value, value_mode, reverse);
1241 }
1242
1243 /* Helper function for store_fixed_bit_field, stores
1244    the bit field always using MODE, which is the mode of OP0.  The other
1245    arguments are as for store_fixed_bit_field.  */
1246
1247 static void
1248 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1249                          unsigned HOST_WIDE_INT bitsize,
1250                          unsigned HOST_WIDE_INT bitnum,
1251                          rtx value, scalar_int_mode value_mode, bool reverse)
1252 {
1253   rtx temp;
1254   int all_zero = 0;
1255   int all_one = 0;
1256
1257   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1258      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1259
1260   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1261     /* BITNUM is the distance between our msb
1262        and that of the containing datum.
1263        Convert it to the distance from the lsb.  */
1264     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1265
1266   /* Now BITNUM is always the distance between our lsb
1267      and that of OP0.  */
1268
1269   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1270      we must first convert its mode to MODE.  */
1271
1272   if (CONST_INT_P (value))
1273     {
1274       unsigned HOST_WIDE_INT v = UINTVAL (value);
1275
1276       if (bitsize < HOST_BITS_PER_WIDE_INT)
1277         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1278
1279       if (v == 0)
1280         all_zero = 1;
1281       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1282                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1283                || (bitsize == HOST_BITS_PER_WIDE_INT
1284                    && v == HOST_WIDE_INT_M1U))
1285         all_one = 1;
1286
1287       value = lshift_value (mode, v, bitnum);
1288     }
1289   else
1290     {
1291       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1292                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1293
1294       if (value_mode != mode)
1295         value = convert_to_mode (mode, value, 1);
1296
1297       if (must_and)
1298         value = expand_binop (mode, and_optab, value,
1299                               mask_rtx (mode, 0, bitsize, 0),
1300                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1301       if (bitnum > 0)
1302         value = expand_shift (LSHIFT_EXPR, mode, value,
1303                               bitnum, NULL_RTX, 1);
1304     }
1305
1306   if (reverse)
1307     value = flip_storage_order (mode, value);
1308
1309   /* Now clear the chosen bits in OP0,
1310      except that if VALUE is -1 we need not bother.  */
1311   /* We keep the intermediates in registers to allow CSE to combine
1312      consecutive bitfield assignments.  */
1313
1314   temp = force_reg (mode, op0);
1315
1316   if (! all_one)
1317     {
1318       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1319       if (reverse)
1320         mask = flip_storage_order (mode, mask);
1321       temp = expand_binop (mode, and_optab, temp, mask,
1322                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1323       temp = force_reg (mode, temp);
1324     }
1325
1326   /* Now logical-or VALUE into OP0, unless it is zero.  */
1327
1328   if (! all_zero)
1329     {
1330       temp = expand_binop (mode, ior_optab, temp, value,
1331                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1332       temp = force_reg (mode, temp);
1333     }
1334
1335   if (op0 != temp)
1336     {
1337       op0 = copy_rtx (op0);
1338       emit_move_insn (op0, temp);
1339     }
1340 }
1341 \f
1342 /* Store a bit field that is split across multiple accessible memory objects.
1343
1344    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1345    BITSIZE is the field width; BITPOS the position of its first bit
1346    (within the word).
1347    VALUE is the value to store, which has mode VALUE_MODE.
1348    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1349    a BLKmode MEM.
1350
1351    If REVERSE is true, the store is to be done in reverse order.
1352
1353    This does not yet handle fields wider than BITS_PER_WORD.  */
1354
1355 static void
1356 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1357                        unsigned HOST_WIDE_INT bitsize,
1358                        unsigned HOST_WIDE_INT bitpos,
1359                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1360                        rtx value, scalar_int_mode value_mode, bool reverse)
1361 {
1362   unsigned int unit, total_bits, bitsdone = 0;
1363
1364   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1365      much at a time.  */
1366   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1367     unit = BITS_PER_WORD;
1368   else
1369     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1370
1371   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1372      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1373      again, and we will mutually recurse forever.  */
1374   if (MEM_P (op0) && op0_mode.exists ())
1375     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1376
1377   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1378      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1379      that VALUE might be a floating-point constant.  */
1380   if (CONSTANT_P (value) && !CONST_INT_P (value))
1381     {
1382       rtx word = gen_lowpart_common (word_mode, value);
1383
1384       if (word && (value != word))
1385         value = word;
1386       else
1387         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1388       value_mode = word_mode;
1389     }
1390
1391   total_bits = GET_MODE_BITSIZE (value_mode);
1392
1393   while (bitsdone < bitsize)
1394     {
1395       unsigned HOST_WIDE_INT thissize;
1396       unsigned HOST_WIDE_INT thispos;
1397       unsigned HOST_WIDE_INT offset;
1398       rtx part;
1399
1400       offset = (bitpos + bitsdone) / unit;
1401       thispos = (bitpos + bitsdone) % unit;
1402
1403       /* When region of bytes we can touch is restricted, decrease
1404          UNIT close to the end of the region as needed.  If op0 is a REG
1405          or SUBREG of REG, don't do this, as there can't be data races
1406          on a register and we can expand shorter code in some cases.  */
1407       if (maybe_ne (bitregion_end, 0U)
1408           && unit > BITS_PER_UNIT
1409           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1410           && !REG_P (op0)
1411           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1412         {
1413           unit = unit / 2;
1414           continue;
1415         }
1416
1417       /* THISSIZE must not overrun a word boundary.  Otherwise,
1418          store_fixed_bit_field will call us again, and we will mutually
1419          recurse forever.  */
1420       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1421       thissize = MIN (thissize, unit - thispos);
1422
1423       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1424         {
1425           /* Fetch successively less significant portions.  */
1426           if (CONST_INT_P (value))
1427             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1428                              >> (bitsize - bitsdone - thissize))
1429                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1430           /* Likewise, but the source is little-endian.  */
1431           else if (reverse)
1432             part = extract_fixed_bit_field (word_mode, value, value_mode,
1433                                             thissize,
1434                                             bitsize - bitsdone - thissize,
1435                                             NULL_RTX, 1, false);
1436           else
1437             /* The args are chosen so that the last part includes the
1438                lsb.  Give extract_bit_field the value it needs (with
1439                endianness compensation) to fetch the piece we want.  */
1440             part = extract_fixed_bit_field (word_mode, value, value_mode,
1441                                             thissize,
1442                                             total_bits - bitsize + bitsdone,
1443                                             NULL_RTX, 1, false);
1444         }
1445       else
1446         {
1447           /* Fetch successively more significant portions.  */
1448           if (CONST_INT_P (value))
1449             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1450                              >> bitsdone)
1451                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1452           /* Likewise, but the source is big-endian.  */
1453           else if (reverse)
1454             part = extract_fixed_bit_field (word_mode, value, value_mode,
1455                                             thissize,
1456                                             total_bits - bitsdone - thissize,
1457                                             NULL_RTX, 1, false);
1458           else
1459             part = extract_fixed_bit_field (word_mode, value, value_mode,
1460                                             thissize, bitsdone, NULL_RTX,
1461                                             1, false);
1462         }
1463
1464       /* If OP0 is a register, then handle OFFSET here.  */
1465       rtx op0_piece = op0;
1466       opt_scalar_int_mode op0_piece_mode = op0_mode;
1467       if (SUBREG_P (op0) || REG_P (op0))
1468         {
1469           scalar_int_mode imode;
1470           if (op0_mode.exists (&imode)
1471               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1472             {
1473               if (offset)
1474                 op0_piece = const0_rtx;
1475             }
1476           else
1477             {
1478               op0_piece = operand_subword_force (op0,
1479                                                  offset * unit / BITS_PER_WORD,
1480                                                  GET_MODE (op0));
1481               op0_piece_mode = word_mode;
1482             }
1483           offset &= BITS_PER_WORD / unit - 1;
1484         }
1485
1486       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1487          it is just an out-of-bounds access.  Ignore it.  */
1488       if (op0_piece != const0_rtx)
1489         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1490                                offset * unit + thispos, bitregion_start,
1491                                bitregion_end, part, word_mode, reverse);
1492       bitsdone += thissize;
1493     }
1494 }
1495 \f
1496 /* A subroutine of extract_bit_field_1 that converts return value X
1497    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1498    to extract_bit_field.  */
1499
1500 static rtx
1501 convert_extracted_bit_field (rtx x, machine_mode mode,
1502                              machine_mode tmode, bool unsignedp)
1503 {
1504   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1505     return x;
1506
1507   /* If the x mode is not a scalar integral, first convert to the
1508      integer mode of that size and then access it as a floating-point
1509      value via a SUBREG.  */
1510   if (!SCALAR_INT_MODE_P (tmode))
1511     {
1512       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1513       x = convert_to_mode (int_mode, x, unsignedp);
1514       x = force_reg (int_mode, x);
1515       return gen_lowpart (tmode, x);
1516     }
1517
1518   return convert_to_mode (tmode, x, unsignedp);
1519 }
1520
1521 /* Try to use an ext(z)v pattern to extract a field from OP0.
1522    Return the extracted value on success, otherwise return null.
1523    EXTV describes the extraction instruction to use.  If OP0_MODE
1524    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1525    The other arguments are as for extract_bit_field.  */
1526
1527 static rtx
1528 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1529                               opt_scalar_int_mode op0_mode,
1530                               unsigned HOST_WIDE_INT bitsize,
1531                               unsigned HOST_WIDE_INT bitnum,
1532                               int unsignedp, rtx target,
1533                               machine_mode mode, machine_mode tmode)
1534 {
1535   class expand_operand ops[4];
1536   rtx spec_target = target;
1537   rtx spec_target_subreg = 0;
1538   scalar_int_mode ext_mode = extv->field_mode;
1539   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1540
1541   if (bitsize == 0 || unit < bitsize)
1542     return NULL_RTX;
1543
1544   if (MEM_P (op0))
1545     /* Get a reference to the first byte of the field.  */
1546     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1547                                 &bitnum);
1548   else
1549     {
1550       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1551       if (BYTES_BIG_ENDIAN)
1552         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1553
1554       /* If op0 is a register, we need it in EXT_MODE to make it
1555          acceptable to the format of ext(z)v.  */
1556       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1557         return NULL_RTX;
1558       if (REG_P (op0) && op0_mode.require () != ext_mode)
1559         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1560     }
1561
1562   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1563      "backwards" from the size of the unit we are extracting from.
1564      Otherwise, we count bits from the most significant on a
1565      BYTES/BITS_BIG_ENDIAN machine.  */
1566
1567   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1568     bitnum = unit - bitsize - bitnum;
1569
1570   if (target == 0)
1571     target = spec_target = gen_reg_rtx (tmode);
1572
1573   if (GET_MODE (target) != ext_mode)
1574     {
1575       rtx temp;
1576       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1577          between the mode of the extraction (word_mode) and the target
1578          mode.  Instead, create a temporary and use convert_move to set
1579          the target.  */
1580       if (REG_P (target)
1581           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1582           && (temp = gen_lowpart_if_possible (ext_mode, target)))
1583         {
1584           target = temp;
1585           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1586             spec_target_subreg = target;
1587         }
1588       else
1589         target = gen_reg_rtx (ext_mode);
1590     }
1591
1592   create_output_operand (&ops[0], target, ext_mode);
1593   create_fixed_operand (&ops[1], op0);
1594   create_integer_operand (&ops[2], bitsize);
1595   create_integer_operand (&ops[3], bitnum);
1596   if (maybe_expand_insn (extv->icode, 4, ops))
1597     {
1598       target = ops[0].value;
1599       if (target == spec_target)
1600         return target;
1601       if (target == spec_target_subreg)
1602         return spec_target;
1603       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1604     }
1605   return NULL_RTX;
1606 }
1607
1608 /* See whether it would be valid to extract the part of OP0 with
1609    mode OP0_MODE described by BITNUM and BITSIZE into a value of
1610    mode MODE using a subreg operation.
1611    Return the subreg if so, otherwise return null.  */
1612
1613 static rtx
1614 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1615                              machine_mode op0_mode,
1616                              poly_uint64 bitsize, poly_uint64 bitnum)
1617 {
1618   poly_uint64 bytenum;
1619   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1620       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1621       && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1622       && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1623     return simplify_gen_subreg (mode, op0, op0_mode, bytenum);
1624   return NULL_RTX;
1625 }
1626
1627 /* A subroutine of extract_bit_field, with the same arguments.
1628    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1629    if we can find no other means of implementing the operation.
1630    if FALLBACK_P is false, return NULL instead.  */
1631
1632 static rtx
1633 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1634                      int unsignedp, rtx target, machine_mode mode,
1635                      machine_mode tmode, bool reverse, bool fallback_p,
1636                      rtx *alt_rtl)
1637 {
1638   rtx op0 = str_rtx;
1639   machine_mode mode1;
1640
1641   if (tmode == VOIDmode)
1642     tmode = mode;
1643
1644   while (GET_CODE (op0) == SUBREG)
1645     {
1646       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1647       op0 = SUBREG_REG (op0);
1648     }
1649
1650   /* If we have an out-of-bounds access to a register, just return an
1651      uninitialized register of the required mode.  This can occur if the
1652      source code contains an out-of-bounds access to a small array.  */
1653   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1654     return gen_reg_rtx (tmode);
1655
1656   if (REG_P (op0)
1657       && mode == GET_MODE (op0)
1658       && known_eq (bitnum, 0U)
1659       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1660     {
1661       if (reverse)
1662         op0 = flip_storage_order (mode, op0);
1663       /* We're trying to extract a full register from itself.  */
1664       return op0;
1665     }
1666
1667   /* First try to check for vector from vector extractions.  */
1668   if (VECTOR_MODE_P (GET_MODE (op0))
1669       && !MEM_P (op0)
1670       && VECTOR_MODE_P (tmode)
1671       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1672       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1673     {
1674       machine_mode new_mode = GET_MODE (op0);
1675       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1676         {
1677           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1678           poly_uint64 nunits;
1679           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1680                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1681               || !related_vector_mode (tmode, inner_mode,
1682                                        nunits).exists (&new_mode)
1683               || maybe_ne (GET_MODE_SIZE (new_mode),
1684                            GET_MODE_SIZE (GET_MODE (op0))))
1685             new_mode = VOIDmode;
1686         }
1687       poly_uint64 pos;
1688       if (new_mode != VOIDmode
1689           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1690               != CODE_FOR_nothing)
1691           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1692         {
1693           class expand_operand ops[3];
1694           machine_mode outermode = new_mode;
1695           machine_mode innermode = tmode;
1696           enum insn_code icode
1697             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1698
1699           if (new_mode != GET_MODE (op0))
1700             op0 = gen_lowpart (new_mode, op0);
1701           create_output_operand (&ops[0], target, innermode);
1702           ops[0].target = 1;
1703           create_input_operand (&ops[1], op0, outermode);
1704           create_integer_operand (&ops[2], pos);
1705           if (maybe_expand_insn (icode, 3, ops))
1706             {
1707               if (alt_rtl && ops[0].target)
1708                 *alt_rtl = target;
1709               target = ops[0].value;
1710               if (GET_MODE (target) != mode)
1711                 return gen_lowpart (tmode, target);
1712               return target;
1713             }
1714         }
1715     }
1716
1717   /* See if we can get a better vector mode before extracting.  */
1718   if (VECTOR_MODE_P (GET_MODE (op0))
1719       && !MEM_P (op0)
1720       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1721     {
1722       machine_mode new_mode;
1723
1724       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1725         new_mode = MIN_MODE_VECTOR_FLOAT;
1726       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1727         new_mode = MIN_MODE_VECTOR_FRACT;
1728       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1729         new_mode = MIN_MODE_VECTOR_UFRACT;
1730       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1731         new_mode = MIN_MODE_VECTOR_ACCUM;
1732       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1733         new_mode = MIN_MODE_VECTOR_UACCUM;
1734       else
1735         new_mode = MIN_MODE_VECTOR_INT;
1736
1737       FOR_EACH_MODE_FROM (new_mode, new_mode)
1738         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1739             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1740             && targetm.vector_mode_supported_p (new_mode)
1741             && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1742           break;
1743       if (new_mode != VOIDmode)
1744         op0 = gen_lowpart (new_mode, op0);
1745     }
1746
1747   /* Use vec_extract patterns for extracting parts of vectors whenever
1748      available.  If that fails, see whether the current modes and bitregion
1749      give a natural subreg.  */
1750   machine_mode outermode = GET_MODE (op0);
1751   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1752     {
1753       scalar_mode innermode = GET_MODE_INNER (outermode);
1754       enum insn_code icode
1755         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1756       poly_uint64 pos;
1757       if (icode != CODE_FOR_nothing
1758           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1759           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1760         {
1761           class expand_operand ops[3];
1762
1763           create_output_operand (&ops[0], target, innermode);
1764           ops[0].target = 1;
1765           create_input_operand (&ops[1], op0, outermode);
1766           create_integer_operand (&ops[2], pos);
1767           if (maybe_expand_insn (icode, 3, ops))
1768             {
1769               if (alt_rtl && ops[0].target)
1770                 *alt_rtl = target;
1771               target = ops[0].value;
1772               if (GET_MODE (target) != mode)
1773                 return gen_lowpart (tmode, target);
1774               return target;
1775             }
1776         }
1777       /* Using subregs is useful if we're extracting one register vector
1778          from a multi-register vector.  extract_bit_field_as_subreg checks
1779          for valid bitsize and bitnum, so we don't need to do that here.  */
1780       if (VECTOR_MODE_P (mode))
1781         {
1782           rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1783                                                  bitsize, bitnum);
1784           if (sub)
1785             return sub;
1786         }
1787     }
1788
1789   /* Make sure we are playing with integral modes.  Pun with subregs
1790      if we aren't.  */
1791   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1792   scalar_int_mode imode;
1793   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1794     {
1795       if (MEM_P (op0))
1796         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1797                                             0, MEM_SIZE (op0));
1798       else if (op0_mode.exists (&imode))
1799         {
1800           op0 = gen_lowpart (imode, op0);
1801
1802           /* If we got a SUBREG, force it into a register since we
1803              aren't going to be able to do another SUBREG on it.  */
1804           if (GET_CODE (op0) == SUBREG)
1805             op0 = force_reg (imode, op0);
1806         }
1807       else
1808         {
1809           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1810           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1811           emit_move_insn (mem, op0);
1812           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1813         }
1814     }
1815
1816   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1817      If that's wrong, the solution is to test for it and set TARGET to 0
1818      if needed.  */
1819
1820   /* Get the mode of the field to use for atomic access or subreg
1821      conversion.  */
1822   if (!SCALAR_INT_MODE_P (tmode)
1823       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1824     mode1 = mode;
1825   gcc_assert (mode1 != BLKmode);
1826
1827   /* Extraction of a full MODE1 value can be done with a subreg as long
1828      as the least significant bit of the value is the least significant
1829      bit of either OP0 or a word of OP0.  */
1830   if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1831     {
1832       rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1833                                              bitsize, bitnum);
1834       if (sub)
1835         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1836     }
1837
1838   /* Extraction of a full MODE1 value can be done with a load as long as
1839      the field is on a byte boundary and is sufficiently aligned.  */
1840   poly_uint64 bytenum;
1841   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1842     {
1843       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1844       if (reverse)
1845         op0 = flip_storage_order (mode1, op0);
1846       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1847     }
1848
1849   /* If we have a memory source and a non-constant bit offset, restrict
1850      the memory to the referenced bytes.  This is a worst-case fallback
1851      but is useful for things like vector booleans.  */
1852   if (MEM_P (op0) && !bitnum.is_constant ())
1853     {
1854       bytenum = bits_to_bytes_round_down (bitnum);
1855       bitnum = num_trailing_bits (bitnum);
1856       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1857       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1858       op0_mode = opt_scalar_int_mode ();
1859     }
1860
1861   /* It's possible we'll need to handle other cases here for
1862      polynomial bitnum and bitsize.  */
1863
1864   /* From here on we need to be looking at a fixed-size insertion.  */
1865   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1866                                      bitnum.to_constant (), unsignedp,
1867                                      target, mode, tmode, reverse, fallback_p);
1868 }
1869
1870 /* Subroutine of extract_bit_field_1, with the same arguments, except
1871    that BITSIZE and BITNUM are constant.  Handle cases specific to
1872    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1873    otherwise OP0 is a BLKmode MEM.  */
1874
1875 static rtx
1876 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1877                             unsigned HOST_WIDE_INT bitsize,
1878                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1879                             rtx target, machine_mode mode, machine_mode tmode,
1880                             bool reverse, bool fallback_p)
1881 {
1882   /* Handle fields bigger than a word.  */
1883
1884   if (bitsize > BITS_PER_WORD)
1885     {
1886       /* Here we transfer the words of the field
1887          in the order least significant first.
1888          This is because the most significant word is the one which may
1889          be less than full.  */
1890
1891       const bool backwards = WORDS_BIG_ENDIAN;
1892       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1893       unsigned int i;
1894       rtx_insn *last;
1895
1896       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1897         target = gen_reg_rtx (mode);
1898
1899       /* In case we're about to clobber a base register or something
1900          (see gcc.c-torture/execute/20040625-1.c).   */
1901       if (reg_mentioned_p (target, op0))
1902         target = gen_reg_rtx (mode);
1903
1904       /* Indicate for flow that the entire target reg is being set.  */
1905       emit_clobber (target);
1906
1907       /* The mode must be fixed-size, since extract_bit_field_1 handles
1908          extractions from variable-sized objects before calling this
1909          function.  */
1910       unsigned int target_size
1911         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1912       last = get_last_insn ();
1913       for (i = 0; i < nwords; i++)
1914         {
1915           /* If I is 0, use the low-order word in both field and target;
1916              if I is 1, use the next to lowest word; and so on.  */
1917           /* Word number in TARGET to use.  */
1918           unsigned int wordnum
1919             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1920           /* Offset from start of field in OP0.  */
1921           unsigned int bit_offset = (backwards ^ reverse
1922                                      ? MAX ((int) bitsize - ((int) i + 1)
1923                                             * BITS_PER_WORD,
1924                                             0)
1925                                      : (int) i * BITS_PER_WORD);
1926           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1927           rtx result_part
1928             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1929                                              bitsize - i * BITS_PER_WORD),
1930                                    bitnum + bit_offset, 1, target_part,
1931                                    mode, word_mode, reverse, fallback_p, NULL);
1932
1933           gcc_assert (target_part);
1934           if (!result_part)
1935             {
1936               delete_insns_since (last);
1937               return NULL;
1938             }
1939
1940           if (result_part != target_part)
1941             emit_move_insn (target_part, result_part);
1942         }
1943
1944       if (unsignedp)
1945         {
1946           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1947              need to be zero'd out.  */
1948           if (target_size > nwords * UNITS_PER_WORD)
1949             {
1950               unsigned int i, total_words;
1951
1952               total_words = target_size / UNITS_PER_WORD;
1953               for (i = nwords; i < total_words; i++)
1954                 emit_move_insn
1955                   (operand_subword (target,
1956                                     backwards ? total_words - i - 1 : i,
1957                                     1, VOIDmode),
1958                    const0_rtx);
1959             }
1960           return target;
1961         }
1962
1963       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1964       target = expand_shift (LSHIFT_EXPR, mode, target,
1965                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1966       return expand_shift (RSHIFT_EXPR, mode, target,
1967                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1968     }
1969
1970   /* If OP0 is a multi-word register, narrow it to the affected word.
1971      If the region spans two words, defer to extract_split_bit_field.  */
1972   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1973     {
1974       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1975         {
1976           if (!fallback_p)
1977             return NULL_RTX;
1978           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1979                                             unsignedp, reverse);
1980           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1981         }
1982       /* If OP0 is a hard register, copy it to a pseudo before calling
1983          simplify_gen_subreg.  */
1984       if (REG_P (op0) && HARD_REGISTER_P (op0))
1985         op0 = copy_to_reg (op0);
1986       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1987                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1988       op0_mode = word_mode;
1989       bitnum %= BITS_PER_WORD;
1990     }
1991
1992   /* From here on we know the desired field is smaller than a word.
1993      If OP0 is a register, it too fits within a word.  */
1994   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1995   extraction_insn extv;
1996   if (!MEM_P (op0)
1997       && !reverse
1998       /* ??? We could limit the structure size to the part of OP0 that
1999          contains the field, with appropriate checks for endianness
2000          and TARGET_TRULY_NOOP_TRUNCATION.  */
2001       && get_best_reg_extraction_insn (&extv, pattern,
2002                                        GET_MODE_BITSIZE (op0_mode.require ()),
2003                                        tmode))
2004     {
2005       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2006                                                  bitsize, bitnum,
2007                                                  unsignedp, target, mode,
2008                                                  tmode);
2009       if (result)
2010         return result;
2011     }
2012
2013   /* If OP0 is a memory, try copying it to a register and seeing if a
2014      cheap register alternative is available.  */
2015   if (MEM_P (op0) & !reverse)
2016     {
2017       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2018                                         tmode))
2019         {
2020           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2021                                                      bitsize, bitnum,
2022                                                      unsignedp, target, mode,
2023                                                      tmode);
2024           if (result)
2025             return result;
2026         }
2027
2028       rtx_insn *last = get_last_insn ();
2029
2030       /* Try loading part of OP0 into a register and extracting the
2031          bitfield from that.  */
2032       unsigned HOST_WIDE_INT bitpos;
2033       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2034                                                0, 0, tmode, &bitpos);
2035       if (xop0)
2036         {
2037           xop0 = copy_to_reg (xop0);
2038           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2039                                             unsignedp, target,
2040                                             mode, tmode, reverse, false, NULL);
2041           if (result)
2042             return result;
2043           delete_insns_since (last);
2044         }
2045     }
2046
2047   if (!fallback_p)
2048     return NULL;
2049
2050   /* Find a correspondingly-sized integer field, so we can apply
2051      shifts and masks to it.  */
2052   scalar_int_mode int_mode;
2053   if (!int_mode_for_mode (tmode).exists (&int_mode))
2054     /* If this fails, we should probably push op0 out to memory and then
2055        do a load.  */
2056     int_mode = int_mode_for_mode (mode).require ();
2057
2058   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2059                                     bitnum, target, unsignedp, reverse);
2060
2061   /* Complex values must be reversed piecewise, so we need to undo the global
2062      reversal, convert to the complex mode and reverse again.  */
2063   if (reverse && COMPLEX_MODE_P (tmode))
2064     {
2065       target = flip_storage_order (int_mode, target);
2066       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2067       target = flip_storage_order (tmode, target);
2068     }
2069   else
2070     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2071
2072   return target;
2073 }
2074
2075 /* Generate code to extract a byte-field from STR_RTX
2076    containing BITSIZE bits, starting at BITNUM,
2077    and put it in TARGET if possible (if TARGET is nonzero).
2078    Regardless of TARGET, we return the rtx for where the value is placed.
2079
2080    STR_RTX is the structure containing the byte (a REG or MEM).
2081    UNSIGNEDP is nonzero if this is an unsigned bit field.
2082    MODE is the natural mode of the field value once extracted.
2083    TMODE is the mode the caller would like the value to have;
2084    but the value may be returned with type MODE instead.
2085
2086    If REVERSE is true, the extraction is to be done in reverse order.
2087
2088    If a TARGET is specified and we can store in it at no extra cost,
2089    we do so, and return TARGET.
2090    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2091    if they are equally easy.
2092
2093    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2094    then *ALT_RTL is set to TARGET (before legitimziation).  */
2095
2096 rtx
2097 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2098                    int unsignedp, rtx target, machine_mode mode,
2099                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2100 {
2101   machine_mode mode1;
2102
2103   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2104   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2105     mode1 = GET_MODE (str_rtx);
2106   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2107     mode1 = GET_MODE (target);
2108   else
2109     mode1 = tmode;
2110
2111   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2112   scalar_int_mode int_mode;
2113   if (bitsize.is_constant (&ibitsize)
2114       && bitnum.is_constant (&ibitnum)
2115       && is_a <scalar_int_mode> (mode1, &int_mode)
2116       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2117                                      int_mode, 0, 0))
2118     {
2119       /* Extraction of a full INT_MODE value can be done with a simple load.
2120          We know here that the field can be accessed with one single
2121          instruction.  For targets that support unaligned memory,
2122          an unaligned access may be necessary.  */
2123       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2124         {
2125           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2126                                                 ibitnum / BITS_PER_UNIT);
2127           if (reverse)
2128             result = flip_storage_order (int_mode, result);
2129           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2130           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2131         }
2132
2133       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2134                                       &ibitnum);
2135       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2136       str_rtx = copy_to_reg (str_rtx);
2137       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2138                                   target, mode, tmode, reverse, true, alt_rtl);
2139     }
2140
2141   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2142                               target, mode, tmode, reverse, true, alt_rtl);
2143 }
2144 \f
2145 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2146    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2147    otherwise OP0 is a BLKmode MEM.
2148
2149    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2150    If REVERSE is true, the extraction is to be done in reverse order.
2151
2152    If TARGET is nonzero, attempts to store the value there
2153    and return TARGET, but this is not guaranteed.
2154    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2155
2156 static rtx
2157 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2158                          opt_scalar_int_mode op0_mode,
2159                          unsigned HOST_WIDE_INT bitsize,
2160                          unsigned HOST_WIDE_INT bitnum, rtx target,
2161                          int unsignedp, bool reverse)
2162 {
2163   scalar_int_mode mode;
2164   if (MEM_P (op0))
2165     {
2166       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2167                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2168         /* The only way this should occur is if the field spans word
2169            boundaries.  */
2170         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2171                                         unsignedp, reverse);
2172
2173       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2174     }
2175   else
2176     mode = op0_mode.require ();
2177
2178   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2179                                     target, unsignedp, reverse);
2180 }
2181
2182 /* Helper function for extract_fixed_bit_field, extracts
2183    the bit field always using MODE, which is the mode of OP0.
2184    The other arguments are as for extract_fixed_bit_field.  */
2185
2186 static rtx
2187 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2188                            unsigned HOST_WIDE_INT bitsize,
2189                            unsigned HOST_WIDE_INT bitnum, rtx target,
2190                            int unsignedp, bool reverse)
2191 {
2192   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2193      for invalid input, such as extract equivalent of f5 from
2194      gcc.dg/pr48335-2.c.  */
2195
2196   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2197     /* BITNUM is the distance between our msb and that of OP0.
2198        Convert it to the distance from the lsb.  */
2199     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2200
2201   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2202      We have reduced the big-endian case to the little-endian case.  */
2203   if (reverse)
2204     op0 = flip_storage_order (mode, op0);
2205
2206   if (unsignedp)
2207     {
2208       if (bitnum)
2209         {
2210           /* If the field does not already start at the lsb,
2211              shift it so it does.  */
2212           /* Maybe propagate the target for the shift.  */
2213           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2214           if (tmode != mode)
2215             subtarget = 0;
2216           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2217         }
2218       /* Convert the value to the desired mode.  TMODE must also be a
2219          scalar integer for this conversion to make sense, since we
2220          shouldn't reinterpret the bits.  */
2221       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2222       if (mode != new_mode)
2223         op0 = convert_to_mode (new_mode, op0, 1);
2224
2225       /* Unless the msb of the field used to be the msb when we shifted,
2226          mask out the upper bits.  */
2227
2228       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2229         return expand_binop (new_mode, and_optab, op0,
2230                              mask_rtx (new_mode, 0, bitsize, 0),
2231                              target, 1, OPTAB_LIB_WIDEN);
2232       return op0;
2233     }
2234
2235   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2236      then arithmetic-shift its lsb to the lsb of the word.  */
2237   op0 = force_reg (mode, op0);
2238
2239   /* Find the narrowest integer mode that contains the field.  */
2240
2241   opt_scalar_int_mode mode_iter;
2242   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2243     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2244       break;
2245
2246   mode = mode_iter.require ();
2247   op0 = convert_to_mode (mode, op0, 0);
2248
2249   if (mode != tmode)
2250     target = 0;
2251
2252   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2253     {
2254       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2255       /* Maybe propagate the target for the shift.  */
2256       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2257       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2258     }
2259
2260   return expand_shift (RSHIFT_EXPR, mode, op0,
2261                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2262 }
2263
2264 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2265    VALUE << BITPOS.  */
2266
2267 static rtx
2268 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2269               int bitpos)
2270 {
2271   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2272 }
2273 \f
2274 /* Extract a bit field that is split across two words
2275    and return an RTX for the result.
2276
2277    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2278    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2279    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2280    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2281    a BLKmode MEM.
2282
2283    If REVERSE is true, the extraction is to be done in reverse order.  */
2284
2285 static rtx
2286 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2287                          unsigned HOST_WIDE_INT bitsize,
2288                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2289                          bool reverse)
2290 {
2291   unsigned int unit;
2292   unsigned int bitsdone = 0;
2293   rtx result = NULL_RTX;
2294   int first = 1;
2295
2296   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2297      much at a time.  */
2298   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2299     unit = BITS_PER_WORD;
2300   else
2301     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2302
2303   while (bitsdone < bitsize)
2304     {
2305       unsigned HOST_WIDE_INT thissize;
2306       rtx part;
2307       unsigned HOST_WIDE_INT thispos;
2308       unsigned HOST_WIDE_INT offset;
2309
2310       offset = (bitpos + bitsdone) / unit;
2311       thispos = (bitpos + bitsdone) % unit;
2312
2313       /* THISSIZE must not overrun a word boundary.  Otherwise,
2314          extract_fixed_bit_field will call us again, and we will mutually
2315          recurse forever.  */
2316       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2317       thissize = MIN (thissize, unit - thispos);
2318
2319       /* If OP0 is a register, then handle OFFSET here.  */
2320       rtx op0_piece = op0;
2321       opt_scalar_int_mode op0_piece_mode = op0_mode;
2322       if (SUBREG_P (op0) || REG_P (op0))
2323         {
2324           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2325           op0_piece_mode = word_mode;
2326           offset = 0;
2327         }
2328
2329       /* Extract the parts in bit-counting order,
2330          whose meaning is determined by BYTES_PER_UNIT.
2331          OFFSET is in UNITs, and UNIT is in bits.  */
2332       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2333                                       thissize, offset * unit + thispos,
2334                                       0, 1, reverse);
2335       bitsdone += thissize;
2336
2337       /* Shift this part into place for the result.  */
2338       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2339         {
2340           if (bitsize != bitsdone)
2341             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2342                                  bitsize - bitsdone, 0, 1);
2343         }
2344       else
2345         {
2346           if (bitsdone != thissize)
2347             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2348                                  bitsdone - thissize, 0, 1);
2349         }
2350
2351       if (first)
2352         result = part;
2353       else
2354         /* Combine the parts with bitwise or.  This works
2355            because we extracted each part as an unsigned bit field.  */
2356         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2357                                OPTAB_LIB_WIDEN);
2358
2359       first = 0;
2360     }
2361
2362   /* Unsigned bit field: we are done.  */
2363   if (unsignedp)
2364     return result;
2365   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2366   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2367                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2368   return expand_shift (RSHIFT_EXPR, word_mode, result,
2369                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2370 }
2371 \f
2372 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2373    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2374    MODE, fill the upper bits with zeros.  Fail if the layout of either
2375    mode is unknown (as for CC modes) or if the extraction would involve
2376    unprofitable mode punning.  Return the value on success, otherwise
2377    return null.
2378
2379    This is different from gen_lowpart* in these respects:
2380
2381      - the returned value must always be considered an rvalue
2382
2383      - when MODE is wider than SRC_MODE, the extraction involves
2384        a zero extension
2385
2386      - when MODE is smaller than SRC_MODE, the extraction involves
2387        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2388
2389    In other words, this routine performs a computation, whereas the
2390    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2391    operations.  */
2392
2393 rtx
2394 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2395 {
2396   scalar_int_mode int_mode, src_int_mode;
2397
2398   if (mode == src_mode)
2399     return src;
2400
2401   if (CONSTANT_P (src))
2402     {
2403       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2404          fails, it will happily create (subreg (symbol_ref)) or similar
2405          invalid SUBREGs.  */
2406       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2407       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2408       if (ret)
2409         return ret;
2410
2411       if (GET_MODE (src) == VOIDmode
2412           || !validate_subreg (mode, src_mode, src, byte))
2413         return NULL_RTX;
2414
2415       src = force_reg (GET_MODE (src), src);
2416       return gen_rtx_SUBREG (mode, src, byte);
2417     }
2418
2419   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2420     return NULL_RTX;
2421
2422   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2423       && targetm.modes_tieable_p (mode, src_mode))
2424     {
2425       rtx x = gen_lowpart_common (mode, src);
2426       if (x)
2427         return x;
2428     }
2429
2430   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2431       || !int_mode_for_mode (mode).exists (&int_mode))
2432     return NULL_RTX;
2433
2434   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2435     return NULL_RTX;
2436   if (!targetm.modes_tieable_p (int_mode, mode))
2437     return NULL_RTX;
2438
2439   src = gen_lowpart (src_int_mode, src);
2440   if (!validate_subreg (int_mode, src_int_mode, src,
2441                         subreg_lowpart_offset (int_mode, src_int_mode)))
2442     return NULL_RTX;
2443
2444   src = convert_modes (int_mode, src_int_mode, src, true);
2445   src = gen_lowpart (mode, src);
2446   return src;
2447 }
2448 \f
2449 /* Add INC into TARGET.  */
2450
2451 void
2452 expand_inc (rtx target, rtx inc)
2453 {
2454   rtx value = expand_binop (GET_MODE (target), add_optab,
2455                             target, inc,
2456                             target, 0, OPTAB_LIB_WIDEN);
2457   if (value != target)
2458     emit_move_insn (target, value);
2459 }
2460
2461 /* Subtract DEC from TARGET.  */
2462
2463 void
2464 expand_dec (rtx target, rtx dec)
2465 {
2466   rtx value = expand_binop (GET_MODE (target), sub_optab,
2467                             target, dec,
2468                             target, 0, OPTAB_LIB_WIDEN);
2469   if (value != target)
2470     emit_move_insn (target, value);
2471 }
2472 \f
2473 /* Output a shift instruction for expression code CODE,
2474    with SHIFTED being the rtx for the value to shift,
2475    and AMOUNT the rtx for the amount to shift by.
2476    Store the result in the rtx TARGET, if that is convenient.
2477    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2478    Return the rtx for where the value is.
2479    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2480    in which case 0 is returned.  */
2481
2482 static rtx
2483 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2484                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2485 {
2486   rtx op1, temp = 0;
2487   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2488   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2489   optab lshift_optab = ashl_optab;
2490   optab rshift_arith_optab = ashr_optab;
2491   optab rshift_uns_optab = lshr_optab;
2492   optab lrotate_optab = rotl_optab;
2493   optab rrotate_optab = rotr_optab;
2494   machine_mode op1_mode;
2495   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2496   int attempt;
2497   bool speed = optimize_insn_for_speed_p ();
2498
2499   op1 = amount;
2500   op1_mode = GET_MODE (op1);
2501
2502   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2503      shift amount is a vector, use the vector/vector shift patterns.  */
2504   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2505     {
2506       lshift_optab = vashl_optab;
2507       rshift_arith_optab = vashr_optab;
2508       rshift_uns_optab = vlshr_optab;
2509       lrotate_optab = vrotl_optab;
2510       rrotate_optab = vrotr_optab;
2511     }
2512
2513   /* Previously detected shift-counts computed by NEGATE_EXPR
2514      and shifted in the other direction; but that does not work
2515      on all machines.  */
2516
2517   if (SHIFT_COUNT_TRUNCATED)
2518     {
2519       if (CONST_INT_P (op1)
2520           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2521               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2522         op1 = gen_int_shift_amount (mode,
2523                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2524                                     % GET_MODE_BITSIZE (scalar_mode));
2525       else if (GET_CODE (op1) == SUBREG
2526                && subreg_lowpart_p (op1)
2527                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2528                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2529         op1 = SUBREG_REG (op1);
2530     }
2531
2532   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2533      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2534      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2535      amount instead.  */
2536   if (rotate
2537       && CONST_INT_P (op1)
2538       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2539                    GET_MODE_BITSIZE (scalar_mode) - 1))
2540     {
2541       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2542                                          - INTVAL (op1)));
2543       left = !left;
2544       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2545     }
2546
2547   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2548      Note that this is not the case for bigger values.  For instance a rotation
2549      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2550      0x04030201 (bswapsi).  */
2551   if (rotate
2552       && CONST_INT_P (op1)
2553       && INTVAL (op1) == BITS_PER_UNIT
2554       && GET_MODE_SIZE (scalar_mode) == 2
2555       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2556     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2557
2558   if (op1 == const0_rtx)
2559     return shifted;
2560
2561   /* Check whether its cheaper to implement a left shift by a constant
2562      bit count by a sequence of additions.  */
2563   if (code == LSHIFT_EXPR
2564       && CONST_INT_P (op1)
2565       && INTVAL (op1) > 0
2566       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2567       && INTVAL (op1) < MAX_BITS_PER_WORD
2568       && (shift_cost (speed, mode, INTVAL (op1))
2569           > INTVAL (op1) * add_cost (speed, mode))
2570       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2571     {
2572       int i;
2573       for (i = 0; i < INTVAL (op1); i++)
2574         {
2575           temp = force_reg (mode, shifted);
2576           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2577                                   unsignedp, OPTAB_LIB_WIDEN);
2578         }
2579       return shifted;
2580     }
2581
2582   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2583     {
2584       enum optab_methods methods;
2585
2586       if (attempt == 0)
2587         methods = OPTAB_DIRECT;
2588       else if (attempt == 1)
2589         methods = OPTAB_WIDEN;
2590       else
2591         methods = OPTAB_LIB_WIDEN;
2592
2593       if (rotate)
2594         {
2595           /* Widening does not work for rotation.  */
2596           if (methods == OPTAB_WIDEN)
2597             continue;
2598           else if (methods == OPTAB_LIB_WIDEN)
2599             {
2600               /* If we have been unable to open-code this by a rotation,
2601                  do it as the IOR of two shifts.  I.e., to rotate A
2602                  by N bits, compute
2603                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2604                  where C is the bitsize of A.
2605
2606                  It is theoretically possible that the target machine might
2607                  not be able to perform either shift and hence we would
2608                  be making two libcalls rather than just the one for the
2609                  shift (similarly if IOR could not be done).  We will allow
2610                  this extremely unlikely lossage to avoid complicating the
2611                  code below.  */
2612
2613               rtx subtarget = target == shifted ? 0 : target;
2614               rtx new_amount, other_amount;
2615               rtx temp1;
2616
2617               new_amount = op1;
2618               if (op1 == const0_rtx)
2619                 return shifted;
2620               else if (CONST_INT_P (op1))
2621                 other_amount = gen_int_shift_amount
2622                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2623               else
2624                 {
2625                   other_amount
2626                     = simplify_gen_unary (NEG, GET_MODE (op1),
2627                                           op1, GET_MODE (op1));
2628                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2629                   other_amount
2630                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2631                                            gen_int_mode (mask, GET_MODE (op1)));
2632                 }
2633
2634               shifted = force_reg (mode, shifted);
2635
2636               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2637                                      mode, shifted, new_amount, 0, 1);
2638               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2639                                       mode, shifted, other_amount,
2640                                       subtarget, 1);
2641               return expand_binop (mode, ior_optab, temp, temp1, target,
2642                                    unsignedp, methods);
2643             }
2644
2645           temp = expand_binop (mode,
2646                                left ? lrotate_optab : rrotate_optab,
2647                                shifted, op1, target, unsignedp, methods);
2648         }
2649       else if (unsignedp)
2650         temp = expand_binop (mode,
2651                              left ? lshift_optab : rshift_uns_optab,
2652                              shifted, op1, target, unsignedp, methods);
2653
2654       /* Do arithmetic shifts.
2655          Also, if we are going to widen the operand, we can just as well
2656          use an arithmetic right-shift instead of a logical one.  */
2657       if (temp == 0 && ! rotate
2658           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2659         {
2660           enum optab_methods methods1 = methods;
2661
2662           /* If trying to widen a log shift to an arithmetic shift,
2663              don't accept an arithmetic shift of the same size.  */
2664           if (unsignedp)
2665             methods1 = OPTAB_MUST_WIDEN;
2666
2667           /* Arithmetic shift */
2668
2669           temp = expand_binop (mode,
2670                                left ? lshift_optab : rshift_arith_optab,
2671                                shifted, op1, target, unsignedp, methods1);
2672         }
2673
2674       /* We used to try extzv here for logical right shifts, but that was
2675          only useful for one machine, the VAX, and caused poor code
2676          generation there for lshrdi3, so the code was deleted and a
2677          define_expand for lshrsi3 was added to vax.md.  */
2678     }
2679
2680   gcc_assert (temp != NULL_RTX || may_fail);
2681   return temp;
2682 }
2683
2684 /* Output a shift instruction for expression code CODE,
2685    with SHIFTED being the rtx for the value to shift,
2686    and AMOUNT the amount to shift by.
2687    Store the result in the rtx TARGET, if that is convenient.
2688    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2689    Return the rtx for where the value is.  */
2690
2691 rtx
2692 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2693               poly_int64 amount, rtx target, int unsignedp)
2694 {
2695   return expand_shift_1 (code, mode, shifted,
2696                          gen_int_shift_amount (mode, amount),
2697                          target, unsignedp);
2698 }
2699
2700 /* Likewise, but return 0 if that cannot be done.  */
2701
2702 static rtx
2703 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2704                     int amount, rtx target, int unsignedp)
2705 {
2706   return expand_shift_1 (code, mode,
2707                          shifted, GEN_INT (amount), target, unsignedp, true);
2708 }
2709
2710 /* Output a shift instruction for expression code CODE,
2711    with SHIFTED being the rtx for the value to shift,
2712    and AMOUNT the tree for the amount to shift by.
2713    Store the result in the rtx TARGET, if that is convenient.
2714    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2715    Return the rtx for where the value is.  */
2716
2717 rtx
2718 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2719                        tree amount, rtx target, int unsignedp)
2720 {
2721   return expand_shift_1 (code, mode,
2722                          shifted, expand_normal (amount), target, unsignedp);
2723 }
2724
2725 \f
2726 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2727                         const struct mult_cost *, machine_mode mode);
2728 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2729                               const struct algorithm *, enum mult_variant);
2730 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2731 static rtx extract_high_half (scalar_int_mode, rtx);
2732 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2733 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2734                                        int, int);
2735 /* Compute and return the best algorithm for multiplying by T.
2736    The algorithm must cost less than cost_limit
2737    If retval.cost >= COST_LIMIT, no algorithm was found and all
2738    other field of the returned struct are undefined.
2739    MODE is the machine mode of the multiplication.  */
2740
2741 static void
2742 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2743             const struct mult_cost *cost_limit, machine_mode mode)
2744 {
2745   int m;
2746   struct algorithm *alg_in, *best_alg;
2747   struct mult_cost best_cost;
2748   struct mult_cost new_limit;
2749   int op_cost, op_latency;
2750   unsigned HOST_WIDE_INT orig_t = t;
2751   unsigned HOST_WIDE_INT q;
2752   int maxm, hash_index;
2753   bool cache_hit = false;
2754   enum alg_code cache_alg = alg_zero;
2755   bool speed = optimize_insn_for_speed_p ();
2756   scalar_int_mode imode;
2757   struct alg_hash_entry *entry_ptr;
2758
2759   /* Indicate that no algorithm is yet found.  If no algorithm
2760      is found, this value will be returned and indicate failure.  */
2761   alg_out->cost.cost = cost_limit->cost + 1;
2762   alg_out->cost.latency = cost_limit->latency + 1;
2763
2764   if (cost_limit->cost < 0
2765       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2766     return;
2767
2768   /* Be prepared for vector modes.  */
2769   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2770
2771   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2772
2773   /* Restrict the bits of "t" to the multiplication's mode.  */
2774   t &= GET_MODE_MASK (imode);
2775
2776   /* t == 1 can be done in zero cost.  */
2777   if (t == 1)
2778     {
2779       alg_out->ops = 1;
2780       alg_out->cost.cost = 0;
2781       alg_out->cost.latency = 0;
2782       alg_out->op[0] = alg_m;
2783       return;
2784     }
2785
2786   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2787      fail now.  */
2788   if (t == 0)
2789     {
2790       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2791         return;
2792       else
2793         {
2794           alg_out->ops = 1;
2795           alg_out->cost.cost = zero_cost (speed);
2796           alg_out->cost.latency = zero_cost (speed);
2797           alg_out->op[0] = alg_zero;
2798           return;
2799         }
2800     }
2801
2802   /* We'll be needing a couple extra algorithm structures now.  */
2803
2804   alg_in = XALLOCA (struct algorithm);
2805   best_alg = XALLOCA (struct algorithm);
2806   best_cost = *cost_limit;
2807
2808   /* Compute the hash index.  */
2809   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2810
2811   /* See if we already know what to do for T.  */
2812   entry_ptr = alg_hash_entry_ptr (hash_index);
2813   if (entry_ptr->t == t
2814       && entry_ptr->mode == mode
2815       && entry_ptr->speed == speed
2816       && entry_ptr->alg != alg_unknown)
2817     {
2818       cache_alg = entry_ptr->alg;
2819
2820       if (cache_alg == alg_impossible)
2821         {
2822           /* The cache tells us that it's impossible to synthesize
2823              multiplication by T within entry_ptr->cost.  */
2824           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2825             /* COST_LIMIT is at least as restrictive as the one
2826                recorded in the hash table, in which case we have no
2827                hope of synthesizing a multiplication.  Just
2828                return.  */
2829             return;
2830
2831           /* If we get here, COST_LIMIT is less restrictive than the
2832              one recorded in the hash table, so we may be able to
2833              synthesize a multiplication.  Proceed as if we didn't
2834              have the cache entry.  */
2835         }
2836       else
2837         {
2838           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2839             /* The cached algorithm shows that this multiplication
2840                requires more cost than COST_LIMIT.  Just return.  This
2841                way, we don't clobber this cache entry with
2842                alg_impossible but retain useful information.  */
2843             return;
2844
2845           cache_hit = true;
2846
2847           switch (cache_alg)
2848             {
2849             case alg_shift:
2850               goto do_alg_shift;
2851
2852             case alg_add_t_m2:
2853             case alg_sub_t_m2:
2854               goto do_alg_addsub_t_m2;
2855
2856             case alg_add_factor:
2857             case alg_sub_factor:
2858               goto do_alg_addsub_factor;
2859
2860             case alg_add_t2_m:
2861               goto do_alg_add_t2_m;
2862
2863             case alg_sub_t2_m:
2864               goto do_alg_sub_t2_m;
2865
2866             default:
2867               gcc_unreachable ();
2868             }
2869         }
2870     }
2871
2872   /* If we have a group of zero bits at the low-order part of T, try
2873      multiplying by the remaining bits and then doing a shift.  */
2874
2875   if ((t & 1) == 0)
2876     {
2877     do_alg_shift:
2878       m = ctz_or_zero (t); /* m = number of low zero bits */
2879       if (m < maxm)
2880         {
2881           q = t >> m;
2882           /* The function expand_shift will choose between a shift and
2883              a sequence of additions, so the observed cost is given as
2884              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2885           op_cost = m * add_cost (speed, mode);
2886           if (shift_cost (speed, mode, m) < op_cost)
2887             op_cost = shift_cost (speed, mode, m);
2888           new_limit.cost = best_cost.cost - op_cost;
2889           new_limit.latency = best_cost.latency - op_cost;
2890           synth_mult (alg_in, q, &new_limit, mode);
2891
2892           alg_in->cost.cost += op_cost;
2893           alg_in->cost.latency += op_cost;
2894           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2895             {
2896               best_cost = alg_in->cost;
2897               std::swap (alg_in, best_alg);
2898               best_alg->log[best_alg->ops] = m;
2899               best_alg->op[best_alg->ops] = alg_shift;
2900             }
2901
2902           /* See if treating ORIG_T as a signed number yields a better
2903              sequence.  Try this sequence only for a negative ORIG_T
2904              as it would be useless for a non-negative ORIG_T.  */
2905           if ((HOST_WIDE_INT) orig_t < 0)
2906             {
2907               /* Shift ORIG_T as follows because a right shift of a
2908                  negative-valued signed type is implementation
2909                  defined.  */
2910               q = ~(~orig_t >> m);
2911               /* The function expand_shift will choose between a shift
2912                  and a sequence of additions, so the observed cost is
2913                  given as MIN (m * add_cost(speed, mode),
2914                  shift_cost(speed, mode, m)).  */
2915               op_cost = m * add_cost (speed, mode);
2916               if (shift_cost (speed, mode, m) < op_cost)
2917                 op_cost = shift_cost (speed, mode, m);
2918               new_limit.cost = best_cost.cost - op_cost;
2919               new_limit.latency = best_cost.latency - op_cost;
2920               synth_mult (alg_in, q, &new_limit, mode);
2921
2922               alg_in->cost.cost += op_cost;
2923               alg_in->cost.latency += op_cost;
2924               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2925                 {
2926                   best_cost = alg_in->cost;
2927                   std::swap (alg_in, best_alg);
2928                   best_alg->log[best_alg->ops] = m;
2929                   best_alg->op[best_alg->ops] = alg_shift;
2930                 }
2931             }
2932         }
2933       if (cache_hit)
2934         goto done;
2935     }
2936
2937   /* If we have an odd number, add or subtract one.  */
2938   if ((t & 1) != 0)
2939     {
2940       unsigned HOST_WIDE_INT w;
2941
2942     do_alg_addsub_t_m2:
2943       for (w = 1; (w & t) != 0; w <<= 1)
2944         ;
2945       /* If T was -1, then W will be zero after the loop.  This is another
2946          case where T ends with ...111.  Handling this with (T + 1) and
2947          subtract 1 produces slightly better code and results in algorithm
2948          selection much faster than treating it like the ...0111 case
2949          below.  */
2950       if (w == 0
2951           || (w > 2
2952               /* Reject the case where t is 3.
2953                  Thus we prefer addition in that case.  */
2954               && t != 3))
2955         {
2956           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2957
2958           op_cost = add_cost (speed, mode);
2959           new_limit.cost = best_cost.cost - op_cost;
2960           new_limit.latency = best_cost.latency - op_cost;
2961           synth_mult (alg_in, t + 1, &new_limit, mode);
2962
2963           alg_in->cost.cost += op_cost;
2964           alg_in->cost.latency += op_cost;
2965           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2966             {
2967               best_cost = alg_in->cost;
2968               std::swap (alg_in, best_alg);
2969               best_alg->log[best_alg->ops] = 0;
2970               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2971             }
2972         }
2973       else
2974         {
2975           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2976
2977           op_cost = add_cost (speed, mode);
2978           new_limit.cost = best_cost.cost - op_cost;
2979           new_limit.latency = best_cost.latency - op_cost;
2980           synth_mult (alg_in, t - 1, &new_limit, mode);
2981
2982           alg_in->cost.cost += op_cost;
2983           alg_in->cost.latency += op_cost;
2984           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2985             {
2986               best_cost = alg_in->cost;
2987               std::swap (alg_in, best_alg);
2988               best_alg->log[best_alg->ops] = 0;
2989               best_alg->op[best_alg->ops] = alg_add_t_m2;
2990             }
2991         }
2992
2993       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2994          quickly with a - a * n for some appropriate constant n.  */
2995       m = exact_log2 (-orig_t + 1);
2996       if (m >= 0 && m < maxm)
2997         {
2998           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2999           /* If the target has a cheap shift-and-subtract insn use
3000              that in preference to a shift insn followed by a sub insn.
3001              Assume that the shift-and-sub is "atomic" with a latency
3002              equal to it's cost, otherwise assume that on superscalar
3003              hardware the shift may be executed concurrently with the
3004              earlier steps in the algorithm.  */
3005           if (shiftsub1_cost (speed, mode, m) <= op_cost)
3006             {
3007               op_cost = shiftsub1_cost (speed, mode, m);
3008               op_latency = op_cost;
3009             }
3010           else
3011             op_latency = add_cost (speed, mode);
3012
3013           new_limit.cost = best_cost.cost - op_cost;
3014           new_limit.latency = best_cost.latency - op_latency;
3015           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3016                       &new_limit, mode);
3017
3018           alg_in->cost.cost += op_cost;
3019           alg_in->cost.latency += op_latency;
3020           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3021             {
3022               best_cost = alg_in->cost;
3023               std::swap (alg_in, best_alg);
3024               best_alg->log[best_alg->ops] = m;
3025               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3026             }
3027         }
3028
3029       if (cache_hit)
3030         goto done;
3031     }
3032
3033   /* Look for factors of t of the form
3034      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3035      If we find such a factor, we can multiply by t using an algorithm that
3036      multiplies by q, shift the result by m and add/subtract it to itself.
3037
3038      We search for large factors first and loop down, even if large factors
3039      are less probable than small; if we find a large factor we will find a
3040      good sequence quickly, and therefore be able to prune (by decreasing
3041      COST_LIMIT) the search.  */
3042
3043  do_alg_addsub_factor:
3044   for (m = floor_log2 (t - 1); m >= 2; m--)
3045     {
3046       unsigned HOST_WIDE_INT d;
3047
3048       d = (HOST_WIDE_INT_1U << m) + 1;
3049       if (t % d == 0 && t > d && m < maxm
3050           && (!cache_hit || cache_alg == alg_add_factor))
3051         {
3052           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3053           if (shiftadd_cost (speed, mode, m) <= op_cost)
3054             op_cost = shiftadd_cost (speed, mode, m);
3055
3056           op_latency = op_cost;
3057
3058
3059           new_limit.cost = best_cost.cost - op_cost;
3060           new_limit.latency = best_cost.latency - op_latency;
3061           synth_mult (alg_in, t / d, &new_limit, mode);
3062
3063           alg_in->cost.cost += op_cost;
3064           alg_in->cost.latency += op_latency;
3065           if (alg_in->cost.latency < op_cost)
3066             alg_in->cost.latency = op_cost;
3067           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3068             {
3069               best_cost = alg_in->cost;
3070               std::swap (alg_in, best_alg);
3071               best_alg->log[best_alg->ops] = m;
3072               best_alg->op[best_alg->ops] = alg_add_factor;
3073             }
3074           /* Other factors will have been taken care of in the recursion.  */
3075           break;
3076         }
3077
3078       d = (HOST_WIDE_INT_1U << m) - 1;
3079       if (t % d == 0 && t > d && m < maxm
3080           && (!cache_hit || cache_alg == alg_sub_factor))
3081         {
3082           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3083           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3084             op_cost = shiftsub0_cost (speed, mode, m);
3085
3086           op_latency = op_cost;
3087
3088           new_limit.cost = best_cost.cost - op_cost;
3089           new_limit.latency = best_cost.latency - op_latency;
3090           synth_mult (alg_in, t / d, &new_limit, mode);
3091
3092           alg_in->cost.cost += op_cost;
3093           alg_in->cost.latency += op_latency;
3094           if (alg_in->cost.latency < op_cost)
3095             alg_in->cost.latency = op_cost;
3096           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3097             {
3098               best_cost = alg_in->cost;
3099               std::swap (alg_in, best_alg);
3100               best_alg->log[best_alg->ops] = m;
3101               best_alg->op[best_alg->ops] = alg_sub_factor;
3102             }
3103           break;
3104         }
3105     }
3106   if (cache_hit)
3107     goto done;
3108
3109   /* Try shift-and-add (load effective address) instructions,
3110      i.e. do a*3, a*5, a*9.  */
3111   if ((t & 1) != 0)
3112     {
3113     do_alg_add_t2_m:
3114       q = t - 1;
3115       m = ctz_hwi (q);
3116       if (q && m < maxm)
3117         {
3118           op_cost = shiftadd_cost (speed, mode, m);
3119           new_limit.cost = best_cost.cost - op_cost;
3120           new_limit.latency = best_cost.latency - op_cost;
3121           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3122
3123           alg_in->cost.cost += op_cost;
3124           alg_in->cost.latency += op_cost;
3125           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3126             {
3127               best_cost = alg_in->cost;
3128               std::swap (alg_in, best_alg);
3129               best_alg->log[best_alg->ops] = m;
3130               best_alg->op[best_alg->ops] = alg_add_t2_m;
3131             }
3132         }
3133       if (cache_hit)
3134         goto done;
3135
3136     do_alg_sub_t2_m:
3137       q = t + 1;
3138       m = ctz_hwi (q);
3139       if (q && m < maxm)
3140         {
3141           op_cost = shiftsub0_cost (speed, mode, m);
3142           new_limit.cost = best_cost.cost - op_cost;
3143           new_limit.latency = best_cost.latency - op_cost;
3144           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3145
3146           alg_in->cost.cost += op_cost;
3147           alg_in->cost.latency += op_cost;
3148           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3149             {
3150               best_cost = alg_in->cost;
3151               std::swap (alg_in, best_alg);
3152               best_alg->log[best_alg->ops] = m;
3153               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3154             }
3155         }
3156       if (cache_hit)
3157         goto done;
3158     }
3159
3160  done:
3161   /* If best_cost has not decreased, we have not found any algorithm.  */
3162   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3163     {
3164       /* We failed to find an algorithm.  Record alg_impossible for
3165          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3166          we are asked to find an algorithm for T within the same or
3167          lower COST_LIMIT, we can immediately return to the
3168          caller.  */
3169       entry_ptr->t = t;
3170       entry_ptr->mode = mode;
3171       entry_ptr->speed = speed;
3172       entry_ptr->alg = alg_impossible;
3173       entry_ptr->cost = *cost_limit;
3174       return;
3175     }
3176
3177   /* Cache the result.  */
3178   if (!cache_hit)
3179     {
3180       entry_ptr->t = t;
3181       entry_ptr->mode = mode;
3182       entry_ptr->speed = speed;
3183       entry_ptr->alg = best_alg->op[best_alg->ops];
3184       entry_ptr->cost.cost = best_cost.cost;
3185       entry_ptr->cost.latency = best_cost.latency;
3186     }
3187
3188   /* If we are getting a too long sequence for `struct algorithm'
3189      to record, make this search fail.  */
3190   if (best_alg->ops == MAX_BITS_PER_WORD)
3191     return;
3192
3193   /* Copy the algorithm from temporary space to the space at alg_out.
3194      We avoid using structure assignment because the majority of
3195      best_alg is normally undefined, and this is a critical function.  */
3196   alg_out->ops = best_alg->ops + 1;
3197   alg_out->cost = best_cost;
3198   memcpy (alg_out->op, best_alg->op,
3199           alg_out->ops * sizeof *alg_out->op);
3200   memcpy (alg_out->log, best_alg->log,
3201           alg_out->ops * sizeof *alg_out->log);
3202 }
3203 \f
3204 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3205    Try three variations:
3206
3207        - a shift/add sequence based on VAL itself
3208        - a shift/add sequence based on -VAL, followed by a negation
3209        - a shift/add sequence based on VAL - 1, followed by an addition.
3210
3211    Return true if the cheapest of these cost less than MULT_COST,
3212    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3213
3214 bool
3215 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3216                      struct algorithm *alg, enum mult_variant *variant,
3217                      int mult_cost)
3218 {
3219   struct algorithm alg2;
3220   struct mult_cost limit;
3221   int op_cost;
3222   bool speed = optimize_insn_for_speed_p ();
3223
3224   /* Fail quickly for impossible bounds.  */
3225   if (mult_cost < 0)
3226     return false;
3227
3228   /* Ensure that mult_cost provides a reasonable upper bound.
3229      Any constant multiplication can be performed with less
3230      than 2 * bits additions.  */
3231   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3232   if (mult_cost > op_cost)
3233     mult_cost = op_cost;
3234
3235   *variant = basic_variant;
3236   limit.cost = mult_cost;
3237   limit.latency = mult_cost;
3238   synth_mult (alg, val, &limit, mode);
3239
3240   /* This works only if the inverted value actually fits in an
3241      `unsigned int' */
3242   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3243     {
3244       op_cost = neg_cost (speed, mode);
3245       if (MULT_COST_LESS (&alg->cost, mult_cost))
3246         {
3247           limit.cost = alg->cost.cost - op_cost;
3248           limit.latency = alg->cost.latency - op_cost;
3249         }
3250       else
3251         {
3252           limit.cost = mult_cost - op_cost;
3253           limit.latency = mult_cost - op_cost;
3254         }
3255
3256       synth_mult (&alg2, -val, &limit, mode);
3257       alg2.cost.cost += op_cost;
3258       alg2.cost.latency += op_cost;
3259       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3260         *alg = alg2, *variant = negate_variant;
3261     }
3262
3263   /* This proves very useful for division-by-constant.  */
3264   op_cost = add_cost (speed, mode);
3265   if (MULT_COST_LESS (&alg->cost, mult_cost))
3266     {
3267       limit.cost = alg->cost.cost - op_cost;
3268       limit.latency = alg->cost.latency - op_cost;
3269     }
3270   else
3271     {
3272       limit.cost = mult_cost - op_cost;
3273       limit.latency = mult_cost - op_cost;
3274     }
3275
3276   synth_mult (&alg2, val - 1, &limit, mode);
3277   alg2.cost.cost += op_cost;
3278   alg2.cost.latency += op_cost;
3279   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3280     *alg = alg2, *variant = add_variant;
3281
3282   return MULT_COST_LESS (&alg->cost, mult_cost);
3283 }
3284
3285 /* A subroutine of expand_mult, used for constant multiplications.
3286    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3287    convenient.  Use the shift/add sequence described by ALG and apply
3288    the final fixup specified by VARIANT.  */
3289
3290 static rtx
3291 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3292                    rtx target, const struct algorithm *alg,
3293                    enum mult_variant variant)
3294 {
3295   unsigned HOST_WIDE_INT val_so_far;
3296   rtx_insn *insn;
3297   rtx accum, tem;
3298   int opno;
3299   machine_mode nmode;
3300
3301   /* Avoid referencing memory over and over and invalid sharing
3302      on SUBREGs.  */
3303   op0 = force_reg (mode, op0);
3304
3305   /* ACCUM starts out either as OP0 or as a zero, depending on
3306      the first operation.  */
3307
3308   if (alg->op[0] == alg_zero)
3309     {
3310       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3311       val_so_far = 0;
3312     }
3313   else if (alg->op[0] == alg_m)
3314     {
3315       accum = copy_to_mode_reg (mode, op0);
3316       val_so_far = 1;
3317     }
3318   else
3319     gcc_unreachable ();
3320
3321   for (opno = 1; opno < alg->ops; opno++)
3322     {
3323       int log = alg->log[opno];
3324       rtx shift_subtarget = optimize ? 0 : accum;
3325       rtx add_target
3326         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3327            && !optimize)
3328           ? target : 0;
3329       rtx accum_target = optimize ? 0 : accum;
3330       rtx accum_inner;
3331
3332       switch (alg->op[opno])
3333         {
3334         case alg_shift:
3335           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3336           /* REG_EQUAL note will be attached to the following insn.  */
3337           emit_move_insn (accum, tem);
3338           val_so_far <<= log;
3339           break;
3340
3341         case alg_add_t_m2:
3342           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3343           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3344                                  add_target ? add_target : accum_target);
3345           val_so_far += HOST_WIDE_INT_1U << log;
3346           break;
3347
3348         case alg_sub_t_m2:
3349           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3350           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3351                                  add_target ? add_target : accum_target);
3352           val_so_far -= HOST_WIDE_INT_1U << log;
3353           break;
3354
3355         case alg_add_t2_m:
3356           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3357                                 log, shift_subtarget, 0);
3358           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3359                                  add_target ? add_target : accum_target);
3360           val_so_far = (val_so_far << log) + 1;
3361           break;
3362
3363         case alg_sub_t2_m:
3364           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3365                                 log, shift_subtarget, 0);
3366           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3367                                  add_target ? add_target : accum_target);
3368           val_so_far = (val_so_far << log) - 1;
3369           break;
3370
3371         case alg_add_factor:
3372           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3373           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3374                                  add_target ? add_target : accum_target);
3375           val_so_far += val_so_far << log;
3376           break;
3377
3378         case alg_sub_factor:
3379           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3380           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3381                                  (add_target
3382                                   ? add_target : (optimize ? 0 : tem)));
3383           val_so_far = (val_so_far << log) - val_so_far;
3384           break;
3385
3386         default:
3387           gcc_unreachable ();
3388         }
3389
3390       if (SCALAR_INT_MODE_P (mode))
3391         {
3392           /* Write a REG_EQUAL note on the last insn so that we can cse
3393              multiplication sequences.  Note that if ACCUM is a SUBREG,
3394              we've set the inner register and must properly indicate that.  */
3395           tem = op0, nmode = mode;
3396           accum_inner = accum;
3397           if (GET_CODE (accum) == SUBREG)
3398             {
3399               accum_inner = SUBREG_REG (accum);
3400               nmode = GET_MODE (accum_inner);
3401               tem = gen_lowpart (nmode, op0);
3402             }
3403
3404           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3405              In that case, only the low bits of accum would be guaranteed to
3406              be equal to the content of the REG_EQUAL note, the upper bits
3407              can be anything.  */
3408           if (!paradoxical_subreg_p (tem))
3409             {
3410               insn = get_last_insn ();
3411               wide_int wval_so_far
3412                 = wi::uhwi (val_so_far,
3413                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3414               rtx c = immed_wide_int_const (wval_so_far, nmode);
3415               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3416                                 accum_inner);
3417             }
3418         }
3419     }
3420
3421   if (variant == negate_variant)
3422     {
3423       val_so_far = -val_so_far;
3424       accum = expand_unop (mode, neg_optab, accum, target, 0);
3425     }
3426   else if (variant == add_variant)
3427     {
3428       val_so_far = val_so_far + 1;
3429       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3430     }
3431
3432   /* Compare only the bits of val and val_so_far that are significant
3433      in the result mode, to avoid sign-/zero-extension confusion.  */
3434   nmode = GET_MODE_INNER (mode);
3435   val &= GET_MODE_MASK (nmode);
3436   val_so_far &= GET_MODE_MASK (nmode);
3437   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3438
3439   return accum;
3440 }
3441
3442 /* Perform a multiplication and return an rtx for the result.
3443    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3444    TARGET is a suggestion for where to store the result (an rtx).
3445
3446    We check specially for a constant integer as OP1.
3447    If you want this check for OP0 as well, then before calling
3448    you should swap the two operands if OP0 would be constant.  */
3449
3450 rtx
3451 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3452              int unsignedp, bool no_libcall)
3453 {
3454   enum mult_variant variant;
3455   struct algorithm algorithm;
3456   rtx scalar_op1;
3457   int max_cost;
3458   bool speed = optimize_insn_for_speed_p ();
3459   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3460
3461   if (CONSTANT_P (op0))
3462     std::swap (op0, op1);
3463
3464   /* For vectors, there are several simplifications that can be made if
3465      all elements of the vector constant are identical.  */
3466   scalar_op1 = unwrap_const_vec_duplicate (op1);
3467
3468   if (INTEGRAL_MODE_P (mode))
3469     {
3470       rtx fake_reg;
3471       HOST_WIDE_INT coeff;
3472       bool is_neg;
3473       int mode_bitsize;
3474
3475       if (op1 == CONST0_RTX (mode))
3476         return op1;
3477       if (op1 == CONST1_RTX (mode))
3478         return op0;
3479       if (op1 == CONSTM1_RTX (mode))
3480         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3481                             op0, target, 0);
3482
3483       if (do_trapv)
3484         goto skip_synth;
3485
3486       /* If mode is integer vector mode, check if the backend supports
3487          vector lshift (by scalar or vector) at all.  If not, we can't use
3488          synthetized multiply.  */
3489       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3490           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3491           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3492         goto skip_synth;
3493
3494       /* These are the operations that are potentially turned into
3495          a sequence of shifts and additions.  */
3496       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3497
3498       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3499          less than or equal in size to `unsigned int' this doesn't matter.
3500          If the mode is larger than `unsigned int', then synth_mult works
3501          only if the constant value exactly fits in an `unsigned int' without
3502          any truncation.  This means that multiplying by negative values does
3503          not work; results are off by 2^32 on a 32 bit machine.  */
3504       if (CONST_INT_P (scalar_op1))
3505         {
3506           coeff = INTVAL (scalar_op1);
3507           is_neg = coeff < 0;
3508         }
3509 #if TARGET_SUPPORTS_WIDE_INT
3510       else if (CONST_WIDE_INT_P (scalar_op1))
3511 #else
3512       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3513 #endif
3514         {
3515           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3516           /* Perfect power of 2 (other than 1, which is handled above).  */
3517           if (shift > 0)
3518             return expand_shift (LSHIFT_EXPR, mode, op0,
3519                                  shift, target, unsignedp);
3520           else
3521             goto skip_synth;
3522         }
3523       else
3524         goto skip_synth;
3525
3526       /* We used to test optimize here, on the grounds that it's better to
3527          produce a smaller program when -O is not used.  But this causes
3528          such a terrible slowdown sometimes that it seems better to always
3529          use synth_mult.  */
3530
3531       /* Special case powers of two.  */
3532       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3533           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3534         return expand_shift (LSHIFT_EXPR, mode, op0,
3535                              floor_log2 (coeff), target, unsignedp);
3536
3537       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3538
3539       /* Attempt to handle multiplication of DImode values by negative
3540          coefficients, by performing the multiplication by a positive
3541          multiplier and then inverting the result.  */
3542       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3543         {
3544           /* Its safe to use -coeff even for INT_MIN, as the
3545              result is interpreted as an unsigned coefficient.
3546              Exclude cost of op0 from max_cost to match the cost
3547              calculation of the synth_mult.  */
3548           coeff = -(unsigned HOST_WIDE_INT) coeff;
3549           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3550                                     mode, speed)
3551                       - neg_cost (speed, mode));
3552           if (max_cost <= 0)
3553             goto skip_synth;
3554
3555           /* Special case powers of two.  */
3556           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3557             {
3558               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3559                                        floor_log2 (coeff), target, unsignedp);
3560               return expand_unop (mode, neg_optab, temp, target, 0);
3561             }
3562
3563           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3564                                    max_cost))
3565             {
3566               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3567                                             &algorithm, variant);
3568               return expand_unop (mode, neg_optab, temp, target, 0);
3569             }
3570           goto skip_synth;
3571         }
3572
3573       /* Exclude cost of op0 from max_cost to match the cost
3574          calculation of the synth_mult.  */
3575       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3576       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3577         return expand_mult_const (mode, op0, coeff, target,
3578                                   &algorithm, variant);
3579     }
3580  skip_synth:
3581
3582   /* Expand x*2.0 as x+x.  */
3583   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3584       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3585     {
3586       op0 = force_reg (GET_MODE (op0), op0);
3587       return expand_binop (mode, add_optab, op0, op0,
3588                            target, unsignedp,
3589                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3590     }
3591
3592   /* This used to use umul_optab if unsigned, but for non-widening multiply
3593      there is no difference between signed and unsigned.  */
3594   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3595                       op0, op1, target, unsignedp,
3596                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3597   gcc_assert (op0 || no_libcall);
3598   return op0;
3599 }
3600
3601 /* Return a cost estimate for multiplying a register by the given
3602    COEFFicient in the given MODE and SPEED.  */
3603
3604 int
3605 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3606 {
3607   int max_cost;
3608   struct algorithm algorithm;
3609   enum mult_variant variant;
3610
3611   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3612   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3613                            mode, speed);
3614   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3615     return algorithm.cost.cost;
3616   else
3617     return max_cost;
3618 }
3619
3620 /* Perform a widening multiplication and return an rtx for the result.
3621    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3622    TARGET is a suggestion for where to store the result (an rtx).
3623    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3624    or smul_widen_optab.
3625
3626    We check specially for a constant integer as OP1, comparing the
3627    cost of a widening multiply against the cost of a sequence of shifts
3628    and adds.  */
3629
3630 rtx
3631 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3632                       int unsignedp, optab this_optab)
3633 {
3634   bool speed = optimize_insn_for_speed_p ();
3635   rtx cop1;
3636
3637   if (CONST_INT_P (op1)
3638       && GET_MODE (op0) != VOIDmode
3639       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3640                                 this_optab == umul_widen_optab))
3641       && CONST_INT_P (cop1)
3642       && (INTVAL (cop1) >= 0
3643           || HWI_COMPUTABLE_MODE_P (mode)))
3644     {
3645       HOST_WIDE_INT coeff = INTVAL (cop1);
3646       int max_cost;
3647       enum mult_variant variant;
3648       struct algorithm algorithm;
3649
3650       if (coeff == 0)
3651         return CONST0_RTX (mode);
3652
3653       /* Special case powers of two.  */
3654       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3655         {
3656           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3657           return expand_shift (LSHIFT_EXPR, mode, op0,
3658                                floor_log2 (coeff), target, unsignedp);
3659         }
3660
3661       /* Exclude cost of op0 from max_cost to match the cost
3662          calculation of the synth_mult.  */
3663       max_cost = mul_widen_cost (speed, mode);
3664       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3665                                max_cost))
3666         {
3667           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3668           return expand_mult_const (mode, op0, coeff, target,
3669                                     &algorithm, variant);
3670         }
3671     }
3672   return expand_binop (mode, this_optab, op0, op1, target,
3673                        unsignedp, OPTAB_LIB_WIDEN);
3674 }
3675 \f
3676 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3677    replace division by D, and put the least significant N bits of the result
3678    in *MULTIPLIER_PTR and return the most significant bit.
3679
3680    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3681    needed precision is in PRECISION (should be <= N).
3682
3683    PRECISION should be as small as possible so this function can choose
3684    multiplier more freely.
3685
3686    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3687    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3688
3689    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3690    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3691
3692 unsigned HOST_WIDE_INT
3693 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3694                    unsigned HOST_WIDE_INT *multiplier_ptr,
3695                    int *post_shift_ptr, int *lgup_ptr)
3696 {
3697   int lgup, post_shift;
3698   int pow, pow2;
3699
3700   /* lgup = ceil(log2(divisor)); */
3701   lgup = ceil_log2 (d);
3702
3703   gcc_assert (lgup <= n);
3704
3705   pow = n + lgup;
3706   pow2 = n + lgup - precision;
3707
3708   /* mlow = 2^(N + lgup)/d */
3709   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3710   wide_int mlow = wi::udiv_trunc (val, d);
3711
3712   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3713   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3714   wide_int mhigh = wi::udiv_trunc (val, d);
3715
3716   /* If precision == N, then mlow, mhigh exceed 2^N
3717      (but they do not exceed 2^(N+1)).  */
3718
3719   /* Reduce to lowest terms.  */
3720   for (post_shift = lgup; post_shift > 0; post_shift--)
3721     {
3722       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3723                                                        HOST_BITS_PER_WIDE_INT);
3724       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3725                                                        HOST_BITS_PER_WIDE_INT);
3726       if (ml_lo >= mh_lo)
3727         break;
3728
3729       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3730       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3731     }
3732
3733   *post_shift_ptr = post_shift;
3734   *lgup_ptr = lgup;
3735   if (n < HOST_BITS_PER_WIDE_INT)
3736     {
3737       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3738       *multiplier_ptr = mhigh.to_uhwi () & mask;
3739       return mhigh.to_uhwi () > mask;
3740     }
3741   else
3742     {
3743       *multiplier_ptr = mhigh.to_uhwi ();
3744       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3745     }
3746 }
3747
3748 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3749    congruent to 1 (mod 2**N).  */
3750
3751 static unsigned HOST_WIDE_INT
3752 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3753 {
3754   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3755
3756   /* The algorithm notes that the choice y = x satisfies
3757      x*y == 1 mod 2^3, since x is assumed odd.
3758      Each iteration doubles the number of bits of significance in y.  */
3759
3760   unsigned HOST_WIDE_INT mask;
3761   unsigned HOST_WIDE_INT y = x;
3762   int nbit = 3;
3763
3764   mask = (n == HOST_BITS_PER_WIDE_INT
3765           ? HOST_WIDE_INT_M1U
3766           : (HOST_WIDE_INT_1U << n) - 1);
3767
3768   while (nbit < n)
3769     {
3770       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3771       nbit *= 2;
3772     }
3773   return y;
3774 }
3775
3776 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3777    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3778    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3779    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3780    become signed.
3781
3782    The result is put in TARGET if that is convenient.
3783
3784    MODE is the mode of operation.  */
3785
3786 rtx
3787 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3788                              rtx op1, rtx target, int unsignedp)
3789 {
3790   rtx tem;
3791   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3792
3793   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3794                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3795   tem = expand_and (mode, tem, op1, NULL_RTX);
3796   adj_operand
3797     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3798                      adj_operand);
3799
3800   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3801                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3802   tem = expand_and (mode, tem, op0, NULL_RTX);
3803   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3804                           target);
3805
3806   return target;
3807 }
3808
3809 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3810
3811 static rtx
3812 extract_high_half (scalar_int_mode mode, rtx op)
3813 {
3814   if (mode == word_mode)
3815     return gen_highpart (mode, op);
3816
3817   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3818
3819   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3820                      GET_MODE_BITSIZE (mode), 0, 1);
3821   return convert_modes (mode, wider_mode, op, 0);
3822 }
3823
3824 /* Like expmed_mult_highpart, but only consider using a multiplication
3825    optab.  OP1 is an rtx for the constant operand.  */
3826
3827 static rtx
3828 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3829                             rtx target, int unsignedp, int max_cost)
3830 {
3831   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3832   optab moptab;
3833   rtx tem;
3834   int size;
3835   bool speed = optimize_insn_for_speed_p ();
3836
3837   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3838
3839   size = GET_MODE_BITSIZE (mode);
3840
3841   /* Firstly, try using a multiplication insn that only generates the needed
3842      high part of the product, and in the sign flavor of unsignedp.  */
3843   if (mul_highpart_cost (speed, mode) < max_cost)
3844     {
3845       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3846       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3847                           unsignedp, OPTAB_DIRECT);
3848       if (tem)
3849         return tem;
3850     }
3851
3852   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3853      Need to adjust the result after the multiplication.  */
3854   if (size - 1 < BITS_PER_WORD
3855       && (mul_highpart_cost (speed, mode)
3856           + 2 * shift_cost (speed, mode, size-1)
3857           + 4 * add_cost (speed, mode) < max_cost))
3858     {
3859       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3860       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3861                           unsignedp, OPTAB_DIRECT);
3862       if (tem)
3863         /* We used the wrong signedness.  Adjust the result.  */
3864         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3865                                             tem, unsignedp);
3866     }
3867
3868   /* Try widening multiplication.  */
3869   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3870   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3871       && mul_widen_cost (speed, wider_mode) < max_cost)
3872     {
3873       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3874                           unsignedp, OPTAB_WIDEN);
3875       if (tem)
3876         return extract_high_half (mode, tem);
3877     }
3878
3879   /* Try widening the mode and perform a non-widening multiplication.  */
3880   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3881       && size - 1 < BITS_PER_WORD
3882       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3883           < max_cost))
3884     {
3885       rtx_insn *insns;
3886       rtx wop0, wop1;
3887
3888       /* We need to widen the operands, for example to ensure the
3889          constant multiplier is correctly sign or zero extended.
3890          Use a sequence to clean-up any instructions emitted by
3891          the conversions if things don't work out.  */
3892       start_sequence ();
3893       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3894       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3895       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3896                           unsignedp, OPTAB_WIDEN);
3897       insns = get_insns ();
3898       end_sequence ();
3899
3900       if (tem)
3901         {
3902           emit_insn (insns);
3903           return extract_high_half (mode, tem);
3904         }
3905     }
3906
3907   /* Try widening multiplication of opposite signedness, and adjust.  */
3908   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3909   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3910       && size - 1 < BITS_PER_WORD
3911       && (mul_widen_cost (speed, wider_mode)
3912           + 2 * shift_cost (speed, mode, size-1)
3913           + 4 * add_cost (speed, mode) < max_cost))
3914     {
3915       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3916                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3917       if (tem != 0)
3918         {
3919           tem = extract_high_half (mode, tem);
3920           /* We used the wrong signedness.  Adjust the result.  */
3921           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3922                                               target, unsignedp);
3923         }
3924     }
3925
3926   return 0;
3927 }
3928
3929 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3930    putting the high half of the result in TARGET if that is convenient,
3931    and return where the result is.  If the operation cannot be performed,
3932    0 is returned.
3933
3934    MODE is the mode of operation and result.
3935
3936    UNSIGNEDP nonzero means unsigned multiply.
3937
3938    MAX_COST is the total allowed cost for the expanded RTL.  */
3939
3940 static rtx
3941 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3942                       rtx target, int unsignedp, int max_cost)
3943 {
3944   unsigned HOST_WIDE_INT cnst1;
3945   int extra_cost;
3946   bool sign_adjust = false;
3947   enum mult_variant variant;
3948   struct algorithm alg;
3949   rtx tem;
3950   bool speed = optimize_insn_for_speed_p ();
3951
3952   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3953   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3954
3955   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3956
3957   /* We can't optimize modes wider than BITS_PER_WORD.
3958      ??? We might be able to perform double-word arithmetic if
3959      mode == word_mode, however all the cost calculations in
3960      synth_mult etc. assume single-word operations.  */
3961   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3962   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3963     return expmed_mult_highpart_optab (mode, op0, op1, target,
3964                                        unsignedp, max_cost);
3965
3966   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3967
3968   /* Check whether we try to multiply by a negative constant.  */
3969   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3970     {
3971       sign_adjust = true;
3972       extra_cost += add_cost (speed, mode);
3973     }
3974
3975   /* See whether shift/add multiplication is cheap enough.  */
3976   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3977                            max_cost - extra_cost))
3978     {
3979       /* See whether the specialized multiplication optabs are
3980          cheaper than the shift/add version.  */
3981       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3982                                         alg.cost.cost + extra_cost);
3983       if (tem)
3984         return tem;
3985
3986       tem = convert_to_mode (wider_mode, op0, unsignedp);
3987       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3988       tem = extract_high_half (mode, tem);
3989
3990       /* Adjust result for signedness.  */
3991       if (sign_adjust)
3992         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3993
3994       return tem;
3995     }
3996   return expmed_mult_highpart_optab (mode, op0, op1, target,
3997                                      unsignedp, max_cost);
3998 }
3999
4000
4001 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
4002
4003 static rtx
4004 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4005 {
4006   rtx result, temp, shift;
4007   rtx_code_label *label;
4008   int logd;
4009   int prec = GET_MODE_PRECISION (mode);
4010
4011   logd = floor_log2 (d);
4012   result = gen_reg_rtx (mode);
4013
4014   /* Avoid conditional branches when they're expensive.  */
4015   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4016       && optimize_insn_for_speed_p ())
4017     {
4018       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4019                                       mode, 0, -1);
4020       if (signmask)
4021         {
4022           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4023           signmask = force_reg (mode, signmask);
4024           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4025
4026           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4027              which instruction sequence to use.  If logical right shifts
4028              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4029              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4030
4031           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4032           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4033               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4034                   > COSTS_N_INSNS (2)))
4035             {
4036               temp = expand_binop (mode, xor_optab, op0, signmask,
4037                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4038               temp = expand_binop (mode, sub_optab, temp, signmask,
4039                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4040               temp = expand_binop (mode, and_optab, temp,
4041                                    gen_int_mode (masklow, mode),
4042                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4043               temp = expand_binop (mode, xor_optab, temp, signmask,
4044                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4045               temp = expand_binop (mode, sub_optab, temp, signmask,
4046                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4047             }
4048           else
4049             {
4050               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4051                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4052               signmask = force_reg (mode, signmask);
4053
4054               temp = expand_binop (mode, add_optab, op0, signmask,
4055                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4056               temp = expand_binop (mode, and_optab, temp,
4057                                    gen_int_mode (masklow, mode),
4058                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4059               temp = expand_binop (mode, sub_optab, temp, signmask,
4060                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4061             }
4062           return temp;
4063         }
4064     }
4065
4066   /* Mask contains the mode's signbit and the significant bits of the
4067      modulus.  By including the signbit in the operation, many targets
4068      can avoid an explicit compare operation in the following comparison
4069      against zero.  */
4070   wide_int mask = wi::mask (logd, false, prec);
4071   mask = wi::set_bit (mask, prec - 1);
4072
4073   temp = expand_binop (mode, and_optab, op0,
4074                        immed_wide_int_const (mask, mode),
4075                        result, 1, OPTAB_LIB_WIDEN);
4076   if (temp != result)
4077     emit_move_insn (result, temp);
4078
4079   label = gen_label_rtx ();
4080   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4081
4082   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4083                        0, OPTAB_LIB_WIDEN);
4084
4085   mask = wi::mask (logd, true, prec);
4086   temp = expand_binop (mode, ior_optab, temp,
4087                        immed_wide_int_const (mask, mode),
4088                        result, 1, OPTAB_LIB_WIDEN);
4089   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4090                        0, OPTAB_LIB_WIDEN);
4091   if (temp != result)
4092     emit_move_insn (result, temp);
4093   emit_label (label);
4094   return result;
4095 }
4096
4097 /* Expand signed division of OP0 by a power of two D in mode MODE.
4098    This routine is only called for positive values of D.  */
4099
4100 static rtx
4101 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4102 {
4103   rtx temp;
4104   rtx_code_label *label;
4105   int logd;
4106
4107   logd = floor_log2 (d);
4108
4109   if (d == 2
4110       && BRANCH_COST (optimize_insn_for_speed_p (),
4111                       false) >= 1)
4112     {
4113       temp = gen_reg_rtx (mode);
4114       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4115       if (temp != NULL_RTX)
4116         {
4117           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4118                                0, OPTAB_LIB_WIDEN);
4119           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4120         }
4121     }
4122
4123   if (HAVE_conditional_move
4124       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4125     {
4126       rtx temp2;
4127
4128       start_sequence ();
4129       temp2 = copy_to_mode_reg (mode, op0);
4130       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4131                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4132       temp = force_reg (mode, temp);
4133
4134       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4135       temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4136                                      temp, temp2, mode, 0);
4137       if (temp2)
4138         {
4139           rtx_insn *seq = get_insns ();
4140           end_sequence ();
4141           emit_insn (seq);
4142           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4143         }
4144       end_sequence ();
4145     }
4146
4147   if (BRANCH_COST (optimize_insn_for_speed_p (),
4148                    false) >= 2)
4149     {
4150       int ushift = GET_MODE_BITSIZE (mode) - logd;
4151
4152       temp = gen_reg_rtx (mode);
4153       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4154       if (temp != NULL_RTX)
4155         {
4156           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4157               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4158               > COSTS_N_INSNS (1))
4159             temp = expand_binop (mode, and_optab, temp,
4160                                  gen_int_mode (d - 1, mode),
4161                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4162           else
4163             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4164                                  ushift, NULL_RTX, 1);
4165           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4166                                0, OPTAB_LIB_WIDEN);
4167           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4168         }
4169     }
4170
4171   label = gen_label_rtx ();
4172   temp = copy_to_mode_reg (mode, op0);
4173   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4174   expand_inc (temp, gen_int_mode (d - 1, mode));
4175   emit_label (label);
4176   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4177 }
4178 \f
4179 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4180    if that is convenient, and returning where the result is.
4181    You may request either the quotient or the remainder as the result;
4182    specify REM_FLAG nonzero to get the remainder.
4183
4184    CODE is the expression code for which kind of division this is;
4185    it controls how rounding is done.  MODE is the machine mode to use.
4186    UNSIGNEDP nonzero means do unsigned division.  */
4187
4188 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4189    and then correct it by or'ing in missing high bits
4190    if result of ANDI is nonzero.
4191    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4192    This could optimize to a bfexts instruction.
4193    But C doesn't use these operations, so their optimizations are
4194    left for later.  */
4195 /* ??? For modulo, we don't actually need the highpart of the first product,
4196    the low part will do nicely.  And for small divisors, the second multiply
4197    can also be a low-part only multiply or even be completely left out.
4198    E.g. to calculate the remainder of a division by 3 with a 32 bit
4199    multiply, multiply with 0x55555556 and extract the upper two bits;
4200    the result is exact for inputs up to 0x1fffffff.
4201    The input range can be reduced by using cross-sum rules.
4202    For odd divisors >= 3, the following table gives right shift counts
4203    so that if a number is shifted by an integer multiple of the given
4204    amount, the remainder stays the same:
4205    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4206    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4207    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4208    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4209    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4210
4211    Cross-sum rules for even numbers can be derived by leaving as many bits
4212    to the right alone as the divisor has zeros to the right.
4213    E.g. if x is an unsigned 32 bit number:
4214    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4215    */
4216
4217 rtx
4218 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4219                rtx op0, rtx op1, rtx target, int unsignedp,
4220                enum optab_methods methods)
4221 {
4222   machine_mode compute_mode;
4223   rtx tquotient;
4224   rtx quotient = 0, remainder = 0;
4225   rtx_insn *last;
4226   rtx_insn *insn;
4227   optab optab1, optab2;
4228   int op1_is_constant, op1_is_pow2 = 0;
4229   int max_cost, extra_cost;
4230   static HOST_WIDE_INT last_div_const = 0;
4231   bool speed = optimize_insn_for_speed_p ();
4232
4233   op1_is_constant = CONST_INT_P (op1);
4234   if (op1_is_constant)
4235     {
4236       wide_int ext_op1 = rtx_mode_t (op1, mode);
4237       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4238                      || (! unsignedp
4239                          && wi::popcount (wi::neg (ext_op1)) == 1));
4240     }
4241
4242   /*
4243      This is the structure of expand_divmod:
4244
4245      First comes code to fix up the operands so we can perform the operations
4246      correctly and efficiently.
4247
4248      Second comes a switch statement with code specific for each rounding mode.
4249      For some special operands this code emits all RTL for the desired
4250      operation, for other cases, it generates only a quotient and stores it in
4251      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4252      to indicate that it has not done anything.
4253
4254      Last comes code that finishes the operation.  If QUOTIENT is set and
4255      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4256      QUOTIENT is not set, it is computed using trunc rounding.
4257
4258      We try to generate special code for division and remainder when OP1 is a
4259      constant.  If |OP1| = 2**n we can use shifts and some other fast
4260      operations.  For other values of OP1, we compute a carefully selected
4261      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4262      by m.
4263
4264      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4265      half of the product.  Different strategies for generating the product are
4266      implemented in expmed_mult_highpart.
4267
4268      If what we actually want is the remainder, we generate that by another
4269      by-constant multiplication and a subtraction.  */
4270
4271   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4272      code below will malfunction if we are, so check here and handle
4273      the special case if so.  */
4274   if (op1 == const1_rtx)
4275     return rem_flag ? const0_rtx : op0;
4276
4277     /* When dividing by -1, we could get an overflow.
4278      negv_optab can handle overflows.  */
4279   if (! unsignedp && op1 == constm1_rtx)
4280     {
4281       if (rem_flag)
4282         return const0_rtx;
4283       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4284                           ? negv_optab : neg_optab, op0, target, 0);
4285     }
4286
4287   if (target
4288       /* Don't use the function value register as a target
4289          since we have to read it as well as write it,
4290          and function-inlining gets confused by this.  */
4291       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4292           /* Don't clobber an operand while doing a multi-step calculation.  */
4293           || ((rem_flag || op1_is_constant)
4294               && (reg_mentioned_p (target, op0)
4295                   || (MEM_P (op0) && MEM_P (target))))
4296           || reg_mentioned_p (target, op1)
4297           || (MEM_P (op1) && MEM_P (target))))
4298     target = 0;
4299
4300   /* Get the mode in which to perform this computation.  Normally it will
4301      be MODE, but sometimes we can't do the desired operation in MODE.
4302      If so, pick a wider mode in which we can do the operation.  Convert
4303      to that mode at the start to avoid repeated conversions.
4304
4305      First see what operations we need.  These depend on the expression
4306      we are evaluating.  (We assume that divxx3 insns exist under the
4307      same conditions that modxx3 insns and that these insns don't normally
4308      fail.  If these assumptions are not correct, we may generate less
4309      efficient code in some cases.)
4310
4311      Then see if we find a mode in which we can open-code that operation
4312      (either a division, modulus, or shift).  Finally, check for the smallest
4313      mode for which we can do the operation with a library call.  */
4314
4315   /* We might want to refine this now that we have division-by-constant
4316      optimization.  Since expmed_mult_highpart tries so many variants, it is
4317      not straightforward to generalize this.  Maybe we should make an array
4318      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4319
4320   optab1 = (op1_is_pow2
4321             ? (unsignedp ? lshr_optab : ashr_optab)
4322             : (unsignedp ? udiv_optab : sdiv_optab));
4323   optab2 = (op1_is_pow2 ? optab1
4324             : (unsignedp ? udivmod_optab : sdivmod_optab));
4325
4326   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4327     {
4328       FOR_EACH_MODE_FROM (compute_mode, mode)
4329       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4330           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4331         break;
4332
4333       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4334         FOR_EACH_MODE_FROM (compute_mode, mode)
4335           if (optab_libfunc (optab1, compute_mode)
4336               || optab_libfunc (optab2, compute_mode))
4337             break;
4338     }
4339   else
4340     compute_mode = mode;
4341
4342   /* If we still couldn't find a mode, use MODE, but expand_binop will
4343      probably die.  */
4344   if (compute_mode == VOIDmode)
4345     compute_mode = mode;
4346
4347   if (target && GET_MODE (target) == compute_mode)
4348     tquotient = target;
4349   else
4350     tquotient = gen_reg_rtx (compute_mode);
4351
4352 #if 0
4353   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4354      (mode), and thereby get better code when OP1 is a constant.  Do that
4355      later.  It will require going over all usages of SIZE below.  */
4356   size = GET_MODE_BITSIZE (mode);
4357 #endif
4358
4359   /* Only deduct something for a REM if the last divide done was
4360      for a different constant.   Then set the constant of the last
4361      divide.  */
4362   max_cost = (unsignedp
4363               ? udiv_cost (speed, compute_mode)
4364               : sdiv_cost (speed, compute_mode));
4365   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4366                      && INTVAL (op1) == last_div_const))
4367     max_cost -= (mul_cost (speed, compute_mode)
4368                  + add_cost (speed, compute_mode));
4369
4370   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4371
4372   /* Now convert to the best mode to use.  */
4373   if (compute_mode != mode)
4374     {
4375       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4376       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4377
4378       /* convert_modes may have placed op1 into a register, so we
4379          must recompute the following.  */
4380       op1_is_constant = CONST_INT_P (op1);
4381       if (op1_is_constant)
4382         {
4383           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4384           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4385                          || (! unsignedp
4386                              && wi::popcount (wi::neg (ext_op1)) == 1));
4387         }
4388       else
4389         op1_is_pow2 = 0;
4390     }
4391
4392   /* If one of the operands is a volatile MEM, copy it into a register.  */
4393
4394   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4395     op0 = force_reg (compute_mode, op0);
4396   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4397     op1 = force_reg (compute_mode, op1);
4398
4399   /* If we need the remainder or if OP1 is constant, we need to
4400      put OP0 in a register in case it has any queued subexpressions.  */
4401   if (rem_flag || op1_is_constant)
4402     op0 = force_reg (compute_mode, op0);
4403
4404   last = get_last_insn ();
4405
4406   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4407   if (unsignedp)
4408     {
4409       if (code == FLOOR_DIV_EXPR)
4410         code = TRUNC_DIV_EXPR;
4411       if (code == FLOOR_MOD_EXPR)
4412         code = TRUNC_MOD_EXPR;
4413       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4414         code = TRUNC_DIV_EXPR;
4415     }
4416
4417   if (op1 != const0_rtx)
4418     switch (code)
4419       {
4420       case TRUNC_MOD_EXPR:
4421       case TRUNC_DIV_EXPR:
4422         if (op1_is_constant)
4423           {
4424             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4425             int size = GET_MODE_BITSIZE (int_mode);
4426             if (unsignedp)
4427               {
4428                 unsigned HOST_WIDE_INT mh, ml;
4429                 int pre_shift, post_shift;
4430                 int dummy;
4431                 wide_int wd = rtx_mode_t (op1, int_mode);
4432                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4433
4434                 if (wi::popcount (wd) == 1)
4435                   {
4436                     pre_shift = floor_log2 (d);
4437                     if (rem_flag)
4438                       {
4439                         unsigned HOST_WIDE_INT mask
4440                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4441                         remainder
4442                           = expand_binop (int_mode, and_optab, op0,
4443                                           gen_int_mode (mask, int_mode),
4444                                           remainder, 1, methods);
4445                         if (remainder)
4446                           return gen_lowpart (mode, remainder);
4447                       }
4448                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4449                                              pre_shift, tquotient, 1);
4450                   }
4451                 else if (size <= HOST_BITS_PER_WIDE_INT)
4452                   {
4453                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4454                       {
4455                         /* Most significant bit of divisor is set; emit an scc
4456                            insn.  */
4457                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4458                                                           int_mode, 1, 1);
4459                       }
4460                     else
4461                       {
4462                         /* Find a suitable multiplier and right shift count
4463                            instead of multiplying with D.  */
4464
4465                         mh = choose_multiplier (d, size, size,
4466                                                 &ml, &post_shift, &dummy);
4467
4468                         /* If the suggested multiplier is more than SIZE bits,
4469                            we can do better for even divisors, using an
4470                            initial right shift.  */
4471                         if (mh != 0 && (d & 1) == 0)
4472                           {
4473                             pre_shift = ctz_or_zero (d);
4474                             mh = choose_multiplier (d >> pre_shift, size,
4475                                                     size - pre_shift,
4476                                                     &ml, &post_shift, &dummy);
4477                             gcc_assert (!mh);
4478                           }
4479                         else
4480                           pre_shift = 0;
4481
4482                         if (mh != 0)
4483                           {
4484                             rtx t1, t2, t3, t4;
4485
4486                             if (post_shift - 1 >= BITS_PER_WORD)
4487                               goto fail1;
4488
4489                             extra_cost
4490                               = (shift_cost (speed, int_mode, post_shift - 1)
4491                                  + shift_cost (speed, int_mode, 1)
4492                                  + 2 * add_cost (speed, int_mode));
4493                             t1 = expmed_mult_highpart
4494                               (int_mode, op0, gen_int_mode (ml, int_mode),
4495                                NULL_RTX, 1, max_cost - extra_cost);
4496                             if (t1 == 0)
4497                               goto fail1;
4498                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4499                                                                op0, t1),
4500                                                 NULL_RTX);
4501                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4502                                                t2, 1, NULL_RTX, 1);
4503                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4504                                                               t1, t3),
4505                                                 NULL_RTX);
4506                             quotient = expand_shift
4507                               (RSHIFT_EXPR, int_mode, t4,
4508                                post_shift - 1, tquotient, 1);
4509                           }
4510                         else
4511                           {
4512                             rtx t1, t2;
4513
4514                             if (pre_shift >= BITS_PER_WORD
4515                                 || post_shift >= BITS_PER_WORD)
4516                               goto fail1;
4517
4518                             t1 = expand_shift
4519                               (RSHIFT_EXPR, int_mode, op0,
4520                                pre_shift, NULL_RTX, 1);
4521                             extra_cost
4522                               = (shift_cost (speed, int_mode, pre_shift)
4523                                  + shift_cost (speed, int_mode, post_shift));
4524                             t2 = expmed_mult_highpart
4525                               (int_mode, t1,
4526                                gen_int_mode (ml, int_mode),
4527                                NULL_RTX, 1, max_cost - extra_cost);
4528                             if (t2 == 0)
4529                               goto fail1;
4530                             quotient = expand_shift
4531                               (RSHIFT_EXPR, int_mode, t2,
4532                                post_shift, tquotient, 1);
4533                           }
4534                       }
4535                   }
4536                 else            /* Too wide mode to use tricky code */
4537                   break;
4538
4539                 insn = get_last_insn ();
4540                 if (insn != last)
4541                   set_dst_reg_note (insn, REG_EQUAL,
4542                                     gen_rtx_UDIV (int_mode, op0, op1),
4543                                     quotient);
4544               }
4545             else                /* TRUNC_DIV, signed */
4546               {
4547                 unsigned HOST_WIDE_INT ml;
4548                 int lgup, post_shift;
4549                 rtx mlr;
4550                 HOST_WIDE_INT d = INTVAL (op1);
4551                 unsigned HOST_WIDE_INT abs_d;
4552
4553                 /* Not prepared to handle division/remainder by
4554                    0xffffffffffffffff8000000000000000 etc.  */
4555                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4556                   break;
4557
4558                 /* Since d might be INT_MIN, we have to cast to
4559                    unsigned HOST_WIDE_INT before negating to avoid
4560                    undefined signed overflow.  */
4561                 abs_d = (d >= 0
4562                          ? (unsigned HOST_WIDE_INT) d
4563                          : - (unsigned HOST_WIDE_INT) d);
4564
4565                 /* n rem d = n rem -d */
4566                 if (rem_flag && d < 0)
4567                   {
4568                     d = abs_d;
4569                     op1 = gen_int_mode (abs_d, int_mode);
4570                   }
4571
4572                 if (d == 1)
4573                   quotient = op0;
4574                 else if (d == -1)
4575                   quotient = expand_unop (int_mode, neg_optab, op0,
4576                                           tquotient, 0);
4577                 else if (size <= HOST_BITS_PER_WIDE_INT
4578                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4579                   {
4580                     /* This case is not handled correctly below.  */
4581                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4582                                                 int_mode, 1, 1);
4583                     if (quotient == 0)
4584                       goto fail1;
4585                   }
4586                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4587                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4588                          && (rem_flag
4589                              ? smod_pow2_cheap (speed, int_mode)
4590                              : sdiv_pow2_cheap (speed, int_mode))
4591                          /* We assume that cheap metric is true if the
4592                             optab has an expander for this mode.  */
4593                          && ((optab_handler ((rem_flag ? smod_optab
4594                                               : sdiv_optab),
4595                                              int_mode)
4596                               != CODE_FOR_nothing)
4597                              || (optab_handler (sdivmod_optab, int_mode)
4598                                  != CODE_FOR_nothing)))
4599                   ;
4600                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4601                   {
4602                     if (rem_flag)
4603                       {
4604                         remainder = expand_smod_pow2 (int_mode, op0, d);
4605                         if (remainder)
4606                           return gen_lowpart (mode, remainder);
4607                       }
4608
4609                     if (sdiv_pow2_cheap (speed, int_mode)
4610                         && ((optab_handler (sdiv_optab, int_mode)
4611                              != CODE_FOR_nothing)
4612                             || (optab_handler (sdivmod_optab, int_mode)
4613                                 != CODE_FOR_nothing)))
4614                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4615                                                 int_mode, op0,
4616                                                 gen_int_mode (abs_d,
4617                                                               int_mode),
4618                                                 NULL_RTX, 0);
4619                     else
4620                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4621
4622                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4623                        negate the quotient.  */
4624                     if (d < 0)
4625                       {
4626                         insn = get_last_insn ();
4627                         if (insn != last
4628                             && abs_d < (HOST_WIDE_INT_1U
4629                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4630                           set_dst_reg_note (insn, REG_EQUAL,
4631                                             gen_rtx_DIV (int_mode, op0,
4632                                                          gen_int_mode
4633                                                            (abs_d,
4634                                                             int_mode)),
4635                                             quotient);
4636
4637                         quotient = expand_unop (int_mode, neg_optab,
4638                                                 quotient, quotient, 0);
4639                       }
4640                   }
4641                 else if (size <= HOST_BITS_PER_WIDE_INT)
4642                   {
4643                     choose_multiplier (abs_d, size, size - 1,
4644                                        &ml, &post_shift, &lgup);
4645                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4646                       {
4647                         rtx t1, t2, t3;
4648
4649                         if (post_shift >= BITS_PER_WORD
4650                             || size - 1 >= BITS_PER_WORD)
4651                           goto fail1;
4652
4653                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4654                                       + shift_cost (speed, int_mode, size - 1)
4655                                       + add_cost (speed, int_mode));
4656                         t1 = expmed_mult_highpart
4657                           (int_mode, op0, gen_int_mode (ml, int_mode),
4658                            NULL_RTX, 0, max_cost - extra_cost);
4659                         if (t1 == 0)
4660                           goto fail1;
4661                         t2 = expand_shift
4662                           (RSHIFT_EXPR, int_mode, t1,
4663                            post_shift, NULL_RTX, 0);
4664                         t3 = expand_shift
4665                           (RSHIFT_EXPR, int_mode, op0,
4666                            size - 1, NULL_RTX, 0);
4667                         if (d < 0)
4668                           quotient
4669                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4670                                              tquotient);
4671                         else
4672                           quotient
4673                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4674                                              tquotient);
4675                       }
4676                     else
4677                       {
4678                         rtx t1, t2, t3, t4;
4679
4680                         if (post_shift >= BITS_PER_WORD
4681                             || size - 1 >= BITS_PER_WORD)
4682                           goto fail1;
4683
4684                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4685                         mlr = gen_int_mode (ml, int_mode);
4686                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4687                                       + shift_cost (speed, int_mode, size - 1)
4688                                       + 2 * add_cost (speed, int_mode));
4689                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4690                                                    NULL_RTX, 0,
4691                                                    max_cost - extra_cost);
4692                         if (t1 == 0)
4693                           goto fail1;
4694                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4695                                             NULL_RTX);
4696                         t3 = expand_shift
4697                           (RSHIFT_EXPR, int_mode, t2,
4698                            post_shift, NULL_RTX, 0);
4699                         t4 = expand_shift
4700                           (RSHIFT_EXPR, int_mode, op0,
4701                            size - 1, NULL_RTX, 0);
4702                         if (d < 0)
4703                           quotient
4704                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4705                                              tquotient);
4706                         else
4707                           quotient
4708                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4709                                              tquotient);
4710                       }
4711                   }
4712                 else            /* Too wide mode to use tricky code */
4713                   break;
4714
4715                 insn = get_last_insn ();
4716                 if (insn != last)
4717                   set_dst_reg_note (insn, REG_EQUAL,
4718                                     gen_rtx_DIV (int_mode, op0, op1),
4719                                     quotient);
4720               }
4721             break;
4722           }
4723       fail1:
4724         delete_insns_since (last);
4725         break;
4726
4727       case FLOOR_DIV_EXPR:
4728       case FLOOR_MOD_EXPR:
4729       /* We will come here only for signed operations.  */
4730         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4731           {
4732             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4733             int size = GET_MODE_BITSIZE (int_mode);
4734             unsigned HOST_WIDE_INT mh, ml;
4735             int pre_shift, lgup, post_shift;
4736             HOST_WIDE_INT d = INTVAL (op1);
4737
4738             if (d > 0)
4739               {
4740                 /* We could just as easily deal with negative constants here,
4741                    but it does not seem worth the trouble for GCC 2.6.  */
4742                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4743                   {
4744                     pre_shift = floor_log2 (d);
4745                     if (rem_flag)
4746                       {
4747                         unsigned HOST_WIDE_INT mask
4748                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4749                         remainder = expand_binop
4750                           (int_mode, and_optab, op0,
4751                            gen_int_mode (mask, int_mode),
4752                            remainder, 0, methods);
4753                         if (remainder)
4754                           return gen_lowpart (mode, remainder);
4755                       }
4756                     quotient = expand_shift
4757                       (RSHIFT_EXPR, int_mode, op0,
4758                        pre_shift, tquotient, 0);
4759                   }
4760                 else
4761                   {
4762                     rtx t1, t2, t3, t4;
4763
4764                     mh = choose_multiplier (d, size, size - 1,
4765                                             &ml, &post_shift, &lgup);
4766                     gcc_assert (!mh);
4767
4768                     if (post_shift < BITS_PER_WORD
4769                         && size - 1 < BITS_PER_WORD)
4770                       {
4771                         t1 = expand_shift
4772                           (RSHIFT_EXPR, int_mode, op0,
4773                            size - 1, NULL_RTX, 0);
4774                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4775                                            NULL_RTX, 0, OPTAB_WIDEN);
4776                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4777                                       + shift_cost (speed, int_mode, size - 1)
4778                                       + 2 * add_cost (speed, int_mode));
4779                         t3 = expmed_mult_highpart
4780                           (int_mode, t2, gen_int_mode (ml, int_mode),
4781                            NULL_RTX, 1, max_cost - extra_cost);
4782                         if (t3 != 0)
4783                           {
4784                             t4 = expand_shift
4785                               (RSHIFT_EXPR, int_mode, t3,
4786                                post_shift, NULL_RTX, 1);
4787                             quotient = expand_binop (int_mode, xor_optab,
4788                                                      t4, t1, tquotient, 0,
4789                                                      OPTAB_WIDEN);
4790                           }
4791                       }
4792                   }
4793               }
4794             else
4795               {
4796                 rtx nsign, t1, t2, t3, t4;
4797                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4798                                                   op0, constm1_rtx), NULL_RTX);
4799                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4800                                    0, OPTAB_WIDEN);
4801                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4802                                       size - 1, NULL_RTX, 0);
4803                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4804                                     NULL_RTX);
4805                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4806                                     NULL_RTX, 0);
4807                 if (t4)
4808                   {
4809                     rtx t5;
4810                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4811                                       NULL_RTX, 0);
4812                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4813                                               tquotient);
4814                   }
4815               }
4816           }
4817
4818         if (quotient != 0)
4819           break;
4820         delete_insns_since (last);
4821
4822         /* Try using an instruction that produces both the quotient and
4823            remainder, using truncation.  We can easily compensate the quotient
4824            or remainder to get floor rounding, once we have the remainder.
4825            Notice that we compute also the final remainder value here,
4826            and return the result right away.  */
4827         if (target == 0 || GET_MODE (target) != compute_mode)
4828           target = gen_reg_rtx (compute_mode);
4829
4830         if (rem_flag)
4831           {
4832             remainder
4833               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4834             quotient = gen_reg_rtx (compute_mode);
4835           }
4836         else
4837           {
4838             quotient
4839               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4840             remainder = gen_reg_rtx (compute_mode);
4841           }
4842
4843         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4844                                  quotient, remainder, 0))
4845           {
4846             /* This could be computed with a branch-less sequence.
4847                Save that for later.  */
4848             rtx tem;
4849             rtx_code_label *label = gen_label_rtx ();
4850             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4851             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4852                                 NULL_RTX, 0, OPTAB_WIDEN);
4853             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4854             expand_dec (quotient, const1_rtx);
4855             expand_inc (remainder, op1);
4856             emit_label (label);
4857             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4858           }
4859
4860         /* No luck with division elimination or divmod.  Have to do it
4861            by conditionally adjusting op0 *and* the result.  */
4862         {
4863           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4864           rtx adjusted_op0;
4865           rtx tem;
4866
4867           quotient = gen_reg_rtx (compute_mode);
4868           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4869           label1 = gen_label_rtx ();
4870           label2 = gen_label_rtx ();
4871           label3 = gen_label_rtx ();
4872           label4 = gen_label_rtx ();
4873           label5 = gen_label_rtx ();
4874           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4875           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4876           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4877                               quotient, 0, methods);
4878           if (tem != quotient)
4879             emit_move_insn (quotient, tem);
4880           emit_jump_insn (targetm.gen_jump (label5));
4881           emit_barrier ();
4882           emit_label (label1);
4883           expand_inc (adjusted_op0, const1_rtx);
4884           emit_jump_insn (targetm.gen_jump (label4));
4885           emit_barrier ();
4886           emit_label (label2);
4887           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4888           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4889                               quotient, 0, methods);
4890           if (tem != quotient)
4891             emit_move_insn (quotient, tem);
4892           emit_jump_insn (targetm.gen_jump (label5));
4893           emit_barrier ();
4894           emit_label (label3);
4895           expand_dec (adjusted_op0, const1_rtx);
4896           emit_label (label4);
4897           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4898                               quotient, 0, methods);
4899           if (tem != quotient)
4900             emit_move_insn (quotient, tem);
4901           expand_dec (quotient, const1_rtx);
4902           emit_label (label5);
4903         }
4904         break;
4905
4906       case CEIL_DIV_EXPR:
4907       case CEIL_MOD_EXPR:
4908         if (unsignedp)
4909           {
4910             if (op1_is_constant
4911                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4912                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4913                     || INTVAL (op1) >= 0))
4914               {
4915                 scalar_int_mode int_mode
4916                   = as_a <scalar_int_mode> (compute_mode);
4917                 rtx t1, t2, t3;
4918                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4919                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4920                                    floor_log2 (d), tquotient, 1);
4921                 t2 = expand_binop (int_mode, and_optab, op0,
4922                                    gen_int_mode (d - 1, int_mode),
4923                                    NULL_RTX, 1, methods);
4924                 t3 = gen_reg_rtx (int_mode);
4925                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4926                 if (t3 == 0)
4927                   {
4928                     rtx_code_label *lab;
4929                     lab = gen_label_rtx ();
4930                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4931                     expand_inc (t1, const1_rtx);
4932                     emit_label (lab);
4933                     quotient = t1;
4934                   }
4935                 else
4936                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4937                                             tquotient);
4938                 break;
4939               }
4940
4941             /* Try using an instruction that produces both the quotient and
4942                remainder, using truncation.  We can easily compensate the
4943                quotient or remainder to get ceiling rounding, once we have the
4944                remainder.  Notice that we compute also the final remainder
4945                value here, and return the result right away.  */
4946             if (target == 0 || GET_MODE (target) != compute_mode)
4947               target = gen_reg_rtx (compute_mode);
4948
4949             if (rem_flag)
4950               {
4951                 remainder = (REG_P (target)
4952                              ? target : gen_reg_rtx (compute_mode));
4953                 quotient = gen_reg_rtx (compute_mode);
4954               }
4955             else
4956               {
4957                 quotient = (REG_P (target)
4958                             ? target : gen_reg_rtx (compute_mode));
4959                 remainder = gen_reg_rtx (compute_mode);
4960               }
4961
4962             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4963                                      remainder, 1))
4964               {
4965                 /* This could be computed with a branch-less sequence.
4966                    Save that for later.  */
4967                 rtx_code_label *label = gen_label_rtx ();
4968                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4969                                  compute_mode, label);
4970                 expand_inc (quotient, const1_rtx);
4971                 expand_dec (remainder, op1);
4972                 emit_label (label);
4973                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4974               }
4975
4976             /* No luck with division elimination or divmod.  Have to do it
4977                by conditionally adjusting op0 *and* the result.  */
4978             {
4979               rtx_code_label *label1, *label2;
4980               rtx adjusted_op0, tem;
4981
4982               quotient = gen_reg_rtx (compute_mode);
4983               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4984               label1 = gen_label_rtx ();
4985               label2 = gen_label_rtx ();
4986               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4987                                compute_mode, label1);
4988               emit_move_insn  (quotient, const0_rtx);
4989               emit_jump_insn (targetm.gen_jump (label2));
4990               emit_barrier ();
4991               emit_label (label1);
4992               expand_dec (adjusted_op0, const1_rtx);
4993               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4994                                   quotient, 1, methods);
4995               if (tem != quotient)
4996                 emit_move_insn (quotient, tem);
4997               expand_inc (quotient, const1_rtx);
4998               emit_label (label2);
4999             }
5000           }
5001         else /* signed */
5002           {
5003             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5004                 && INTVAL (op1) >= 0)
5005               {
5006                 /* This is extremely similar to the code for the unsigned case
5007                    above.  For 2.7 we should merge these variants, but for
5008                    2.6.1 I don't want to touch the code for unsigned since that
5009                    get used in C.  The signed case will only be used by other
5010                    languages (Ada).  */
5011
5012                 rtx t1, t2, t3;
5013                 unsigned HOST_WIDE_INT d = INTVAL (op1);
5014                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5015                                    floor_log2 (d), tquotient, 0);
5016                 t2 = expand_binop (compute_mode, and_optab, op0,
5017                                    gen_int_mode (d - 1, compute_mode),
5018                                    NULL_RTX, 1, methods);
5019                 t3 = gen_reg_rtx (compute_mode);
5020                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5021                                       compute_mode, 1, 1);
5022                 if (t3 == 0)
5023                   {
5024                     rtx_code_label *lab;
5025                     lab = gen_label_rtx ();
5026                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5027                     expand_inc (t1, const1_rtx);
5028                     emit_label (lab);
5029                     quotient = t1;
5030                   }
5031                 else
5032                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5033                                                           t1, t3),
5034                                             tquotient);
5035                 break;
5036               }
5037
5038             /* Try using an instruction that produces both the quotient and
5039                remainder, using truncation.  We can easily compensate the
5040                quotient or remainder to get ceiling rounding, once we have the
5041                remainder.  Notice that we compute also the final remainder
5042                value here, and return the result right away.  */
5043             if (target == 0 || GET_MODE (target) != compute_mode)
5044               target = gen_reg_rtx (compute_mode);
5045             if (rem_flag)
5046               {
5047                 remainder= (REG_P (target)
5048                             ? target : gen_reg_rtx (compute_mode));
5049                 quotient = gen_reg_rtx (compute_mode);
5050               }
5051             else
5052               {
5053                 quotient = (REG_P (target)
5054                             ? target : gen_reg_rtx (compute_mode));
5055                 remainder = gen_reg_rtx (compute_mode);
5056               }
5057
5058             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5059                                      remainder, 0))
5060               {
5061                 /* This could be computed with a branch-less sequence.
5062                    Save that for later.  */
5063                 rtx tem;
5064                 rtx_code_label *label = gen_label_rtx ();
5065                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5066                                  compute_mode, label);
5067                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5068                                     NULL_RTX, 0, OPTAB_WIDEN);
5069                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5070                 expand_inc (quotient, const1_rtx);
5071                 expand_dec (remainder, op1);
5072                 emit_label (label);
5073                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5074               }
5075
5076             /* No luck with division elimination or divmod.  Have to do it
5077                by conditionally adjusting op0 *and* the result.  */
5078             {
5079               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5080               rtx adjusted_op0;
5081               rtx tem;
5082
5083               quotient = gen_reg_rtx (compute_mode);
5084               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5085               label1 = gen_label_rtx ();
5086               label2 = gen_label_rtx ();
5087               label3 = gen_label_rtx ();
5088               label4 = gen_label_rtx ();
5089               label5 = gen_label_rtx ();
5090               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5091               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5092                                compute_mode, label1);
5093               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5094                                   quotient, 0, methods);
5095               if (tem != quotient)
5096                 emit_move_insn (quotient, tem);
5097               emit_jump_insn (targetm.gen_jump (label5));
5098               emit_barrier ();
5099               emit_label (label1);
5100               expand_dec (adjusted_op0, const1_rtx);
5101               emit_jump_insn (targetm.gen_jump (label4));
5102               emit_barrier ();
5103               emit_label (label2);
5104               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5105                                compute_mode, label3);
5106               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5107                                   quotient, 0, methods);
5108               if (tem != quotient)
5109                 emit_move_insn (quotient, tem);
5110               emit_jump_insn (targetm.gen_jump (label5));
5111               emit_barrier ();
5112               emit_label (label3);
5113               expand_inc (adjusted_op0, const1_rtx);
5114               emit_label (label4);
5115               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5116                                   quotient, 0, methods);
5117               if (tem != quotient)
5118                 emit_move_insn (quotient, tem);
5119               expand_inc (quotient, const1_rtx);
5120               emit_label (label5);
5121             }
5122           }
5123         break;
5124
5125       case EXACT_DIV_EXPR:
5126         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5127           {
5128             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5129             int size = GET_MODE_BITSIZE (int_mode);
5130             HOST_WIDE_INT d = INTVAL (op1);
5131             unsigned HOST_WIDE_INT ml;
5132             int pre_shift;
5133             rtx t1;
5134
5135             pre_shift = ctz_or_zero (d);
5136             ml = invert_mod2n (d >> pre_shift, size);
5137             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5138                                pre_shift, NULL_RTX, unsignedp);
5139             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5140                                     NULL_RTX, 1);
5141
5142             insn = get_last_insn ();
5143             set_dst_reg_note (insn, REG_EQUAL,
5144                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5145                                               int_mode, op0, op1),
5146                               quotient);
5147           }
5148         break;
5149
5150       case ROUND_DIV_EXPR:
5151       case ROUND_MOD_EXPR:
5152         if (unsignedp)
5153           {
5154             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5155             rtx tem;
5156             rtx_code_label *label;
5157             label = gen_label_rtx ();
5158             quotient = gen_reg_rtx (int_mode);
5159             remainder = gen_reg_rtx (int_mode);
5160             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5161               {
5162                 rtx tem;
5163                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5164                                          quotient, 1, methods);
5165                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5166                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5167                                           remainder, 1, methods);
5168               }
5169             tem = plus_constant (int_mode, op1, -1);
5170             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5171             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5172             expand_inc (quotient, const1_rtx);
5173             expand_dec (remainder, op1);
5174             emit_label (label);
5175           }
5176         else
5177           {
5178             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5179             int size = GET_MODE_BITSIZE (int_mode);
5180             rtx abs_rem, abs_op1, tem, mask;
5181             rtx_code_label *label;
5182             label = gen_label_rtx ();
5183             quotient = gen_reg_rtx (int_mode);
5184             remainder = gen_reg_rtx (int_mode);
5185             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5186               {
5187                 rtx tem;
5188                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5189                                          quotient, 0, methods);
5190                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5191                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5192                                           remainder, 0, methods);
5193               }
5194             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5195             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5196             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5197                                 1, NULL_RTX, 1);
5198             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5199             tem = expand_binop (int_mode, xor_optab, op0, op1,
5200                                 NULL_RTX, 0, OPTAB_WIDEN);
5201             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5202                                  size - 1, NULL_RTX, 0);
5203             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5204                                 NULL_RTX, 0, OPTAB_WIDEN);
5205             tem = expand_binop (int_mode, sub_optab, tem, mask,
5206                                 NULL_RTX, 0, OPTAB_WIDEN);
5207             expand_inc (quotient, tem);
5208             tem = expand_binop (int_mode, xor_optab, mask, op1,
5209                                 NULL_RTX, 0, OPTAB_WIDEN);
5210             tem = expand_binop (int_mode, sub_optab, tem, mask,
5211                                 NULL_RTX, 0, OPTAB_WIDEN);
5212             expand_dec (remainder, tem);
5213             emit_label (label);
5214           }
5215         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5216
5217       default:
5218         gcc_unreachable ();
5219       }
5220
5221   if (quotient == 0)
5222     {
5223       if (target && GET_MODE (target) != compute_mode)
5224         target = 0;
5225
5226       if (rem_flag)
5227         {
5228           /* Try to produce the remainder without producing the quotient.
5229              If we seem to have a divmod pattern that does not require widening,
5230              don't try widening here.  We should really have a WIDEN argument
5231              to expand_twoval_binop, since what we'd really like to do here is
5232              1) try a mod insn in compute_mode
5233              2) try a divmod insn in compute_mode
5234              3) try a div insn in compute_mode and multiply-subtract to get
5235                 remainder
5236              4) try the same things with widening allowed.  */
5237           remainder
5238             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5239                                  op0, op1, target,
5240                                  unsignedp,
5241                                  ((optab_handler (optab2, compute_mode)
5242                                    != CODE_FOR_nothing)
5243                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5244           if (remainder == 0)
5245             {
5246               /* No luck there.  Can we do remainder and divide at once
5247                  without a library call?  */
5248               remainder = gen_reg_rtx (compute_mode);
5249               if (! expand_twoval_binop ((unsignedp
5250                                           ? udivmod_optab
5251                                           : sdivmod_optab),
5252                                          op0, op1,
5253                                          NULL_RTX, remainder, unsignedp))
5254                 remainder = 0;
5255             }
5256
5257           if (remainder)
5258             return gen_lowpart (mode, remainder);
5259         }
5260
5261       /* Produce the quotient.  Try a quotient insn, but not a library call.
5262          If we have a divmod in this mode, use it in preference to widening
5263          the div (for this test we assume it will not fail). Note that optab2
5264          is set to the one of the two optabs that the call below will use.  */
5265       quotient
5266         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5267                              op0, op1, rem_flag ? NULL_RTX : target,
5268                              unsignedp,
5269                              ((optab_handler (optab2, compute_mode)
5270                                != CODE_FOR_nothing)
5271                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5272
5273       if (quotient == 0)
5274         {
5275           /* No luck there.  Try a quotient-and-remainder insn,
5276              keeping the quotient alone.  */
5277           quotient = gen_reg_rtx (compute_mode);
5278           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5279                                      op0, op1,
5280                                      quotient, NULL_RTX, unsignedp))
5281             {
5282               quotient = 0;
5283               if (! rem_flag)
5284                 /* Still no luck.  If we are not computing the remainder,
5285                    use a library call for the quotient.  */
5286                 quotient = sign_expand_binop (compute_mode,
5287                                               udiv_optab, sdiv_optab,
5288                                               op0, op1, target,
5289                                               unsignedp, methods);
5290             }
5291         }
5292     }
5293
5294   if (rem_flag)
5295     {
5296       if (target && GET_MODE (target) != compute_mode)
5297         target = 0;
5298
5299       if (quotient == 0)
5300         {
5301           /* No divide instruction either.  Use library for remainder.  */
5302           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5303                                          op0, op1, target,
5304                                          unsignedp, methods);
5305           /* No remainder function.  Try a quotient-and-remainder
5306              function, keeping the remainder.  */
5307           if (!remainder
5308               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5309             {
5310               remainder = gen_reg_rtx (compute_mode);
5311               if (!expand_twoval_binop_libfunc
5312                   (unsignedp ? udivmod_optab : sdivmod_optab,
5313                    op0, op1,
5314                    NULL_RTX, remainder,
5315                    unsignedp ? UMOD : MOD))
5316                 remainder = NULL_RTX;
5317             }
5318         }
5319       else
5320         {
5321           /* We divided.  Now finish doing X - Y * (X / Y).  */
5322           remainder = expand_mult (compute_mode, quotient, op1,
5323                                    NULL_RTX, unsignedp);
5324           remainder = expand_binop (compute_mode, sub_optab, op0,
5325                                     remainder, target, unsignedp,
5326                                     methods);
5327         }
5328     }
5329
5330   if (methods != OPTAB_LIB_WIDEN
5331       && (rem_flag ? remainder : quotient) == NULL_RTX)
5332     return NULL_RTX;
5333
5334   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5335 }
5336 \f
5337 /* Return a tree node with data type TYPE, describing the value of X.
5338    Usually this is an VAR_DECL, if there is no obvious better choice.
5339    X may be an expression, however we only support those expressions
5340    generated by loop.c.  */
5341
5342 tree
5343 make_tree (tree type, rtx x)
5344 {
5345   tree t;
5346
5347   switch (GET_CODE (x))
5348     {
5349     case CONST_INT:
5350     case CONST_WIDE_INT:
5351       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5352       return t;
5353
5354     case CONST_DOUBLE:
5355       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5356       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5357         t = wide_int_to_tree (type,
5358                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5359                                                     HOST_BITS_PER_WIDE_INT * 2));
5360       else
5361         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5362
5363       return t;
5364
5365     case CONST_VECTOR:
5366       {
5367         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5368         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5369         tree itype = TREE_TYPE (type);
5370
5371         /* Build a tree with vector elements.  */
5372         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5373         unsigned int count = elts.encoded_nelts ();
5374         for (unsigned int i = 0; i < count; ++i)
5375           {
5376             rtx elt = CONST_VECTOR_ELT (x, i);
5377             elts.quick_push (make_tree (itype, elt));
5378           }
5379
5380         return elts.build ();
5381       }
5382
5383     case PLUS:
5384       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5385                           make_tree (type, XEXP (x, 1)));
5386
5387     case MINUS:
5388       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5389                           make_tree (type, XEXP (x, 1)));
5390
5391     case NEG:
5392       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5393
5394     case MULT:
5395       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5396                           make_tree (type, XEXP (x, 1)));
5397
5398     case ASHIFT:
5399       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5400                           make_tree (type, XEXP (x, 1)));
5401
5402     case LSHIFTRT:
5403       t = unsigned_type_for (type);
5404       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5405                                          make_tree (t, XEXP (x, 0)),
5406                                          make_tree (type, XEXP (x, 1))));
5407
5408     case ASHIFTRT:
5409       t = signed_type_for (type);
5410       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5411                                          make_tree (t, XEXP (x, 0)),
5412                                          make_tree (type, XEXP (x, 1))));
5413
5414     case DIV:
5415       if (TREE_CODE (type) != REAL_TYPE)
5416         t = signed_type_for (type);
5417       else
5418         t = type;
5419
5420       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5421                                          make_tree (t, XEXP (x, 0)),
5422                                          make_tree (t, XEXP (x, 1))));
5423     case UDIV:
5424       t = unsigned_type_for (type);
5425       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5426                                          make_tree (t, XEXP (x, 0)),
5427                                          make_tree (t, XEXP (x, 1))));
5428
5429     case SIGN_EXTEND:
5430     case ZERO_EXTEND:
5431       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5432                                           GET_CODE (x) == ZERO_EXTEND);
5433       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5434
5435     case CONST:
5436       return make_tree (type, XEXP (x, 0));
5437
5438     case SYMBOL_REF:
5439       t = SYMBOL_REF_DECL (x);
5440       if (t)
5441         return fold_convert (type, build_fold_addr_expr (t));
5442       /* fall through.  */
5443
5444     default:
5445       if (CONST_POLY_INT_P (x))
5446         return wide_int_to_tree (t, const_poly_int_value (x));
5447
5448       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5449
5450       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5451          address mode to pointer mode.  */
5452       if (POINTER_TYPE_P (type))
5453         x = convert_memory_address_addr_space
5454           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5455
5456       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5457          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5458       t->decl_with_rtl.rtl = x;
5459
5460       return t;
5461     }
5462 }
5463 \f
5464 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5465    and returning TARGET.
5466
5467    If TARGET is 0, a pseudo-register or constant is returned.  */
5468
5469 rtx
5470 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5471 {
5472   rtx tem = 0;
5473
5474   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5475     tem = simplify_binary_operation (AND, mode, op0, op1);
5476   if (tem == 0)
5477     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5478
5479   if (target == 0)
5480     target = tem;
5481   else if (tem != target)
5482     emit_move_insn (target, tem);
5483   return target;
5484 }
5485
5486 /* Helper function for emit_store_flag.  */
5487 rtx
5488 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5489              machine_mode mode, machine_mode compare_mode,
5490              int unsignedp, rtx x, rtx y, int normalizep,
5491              machine_mode target_mode)
5492 {
5493   class expand_operand ops[4];
5494   rtx op0, comparison, subtarget;
5495   rtx_insn *last;
5496   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5497   scalar_int_mode int_target_mode;
5498
5499   last = get_last_insn ();
5500   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5501   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5502   if (!x || !y)
5503     {
5504       delete_insns_since (last);
5505       return NULL_RTX;
5506     }
5507
5508   if (target_mode == VOIDmode)
5509     int_target_mode = result_mode;
5510   else
5511     int_target_mode = as_a <scalar_int_mode> (target_mode);
5512   if (!target)
5513     target = gen_reg_rtx (int_target_mode);
5514
5515   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5516
5517   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5518   create_fixed_operand (&ops[1], comparison);
5519   create_fixed_operand (&ops[2], x);
5520   create_fixed_operand (&ops[3], y);
5521   if (!maybe_expand_insn (icode, 4, ops))
5522     {
5523       delete_insns_since (last);
5524       return NULL_RTX;
5525     }
5526   subtarget = ops[0].value;
5527
5528   /* If we are converting to a wider mode, first convert to
5529      INT_TARGET_MODE, then normalize.  This produces better combining
5530      opportunities on machines that have a SIGN_EXTRACT when we are
5531      testing a single bit.  This mostly benefits the 68k.
5532
5533      If STORE_FLAG_VALUE does not have the sign bit set when
5534      interpreted in MODE, we can do this conversion as unsigned, which
5535      is usually more efficient.  */
5536   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5537     {
5538       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5539                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5540
5541       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5542       convert_move (target, subtarget, unsignedp);
5543
5544       op0 = target;
5545       result_mode = int_target_mode;
5546     }
5547   else
5548     op0 = subtarget;
5549
5550   /* If we want to keep subexpressions around, don't reuse our last
5551      target.  */
5552   if (optimize)
5553     subtarget = 0;
5554
5555   /* Now normalize to the proper value in MODE.  Sometimes we don't
5556      have to do anything.  */
5557   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5558     ;
5559   /* STORE_FLAG_VALUE might be the most negative number, so write
5560      the comparison this way to avoid a compiler-time warning.  */
5561   else if (- normalizep == STORE_FLAG_VALUE)
5562     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5563
5564   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5565      it hard to use a value of just the sign bit due to ANSI integer
5566      constant typing rules.  */
5567   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5568     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5569                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5570                         normalizep == 1);
5571   else
5572     {
5573       gcc_assert (STORE_FLAG_VALUE & 1);
5574
5575       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5576       if (normalizep == -1)
5577         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5578     }
5579
5580   /* If we were converting to a smaller mode, do the conversion now.  */
5581   if (int_target_mode != result_mode)
5582     {
5583       convert_move (target, op0, 0);
5584       return target;
5585     }
5586   else
5587     return op0;
5588 }
5589
5590
5591 /* A subroutine of emit_store_flag only including "tricks" that do not
5592    need a recursive call.  These are kept separate to avoid infinite
5593    loops.  */
5594
5595 static rtx
5596 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5597                    machine_mode mode, int unsignedp, int normalizep,
5598                    machine_mode target_mode)
5599 {
5600   rtx subtarget;
5601   enum insn_code icode;
5602   machine_mode compare_mode;
5603   enum mode_class mclass;
5604   enum rtx_code scode;
5605
5606   if (unsignedp)
5607     code = unsigned_condition (code);
5608   scode = swap_condition (code);
5609
5610   /* If one operand is constant, make it the second one.  Only do this
5611      if the other operand is not constant as well.  */
5612
5613   if (swap_commutative_operands_p (op0, op1))
5614     {
5615       std::swap (op0, op1);
5616       code = swap_condition (code);
5617     }
5618
5619   if (mode == VOIDmode)
5620     mode = GET_MODE (op0);
5621
5622   if (CONST_SCALAR_INT_P (op1))
5623     canonicalize_comparison (mode, &code, &op1);
5624
5625   /* For some comparisons with 1 and -1, we can convert this to
5626      comparisons with zero.  This will often produce more opportunities for
5627      store-flag insns.  */
5628
5629   switch (code)
5630     {
5631     case LT:
5632       if (op1 == const1_rtx)
5633         op1 = const0_rtx, code = LE;
5634       break;
5635     case LE:
5636       if (op1 == constm1_rtx)
5637         op1 = const0_rtx, code = LT;
5638       break;
5639     case GE:
5640       if (op1 == const1_rtx)
5641         op1 = const0_rtx, code = GT;
5642       break;
5643     case GT:
5644       if (op1 == constm1_rtx)
5645         op1 = const0_rtx, code = GE;
5646       break;
5647     case GEU:
5648       if (op1 == const1_rtx)
5649         op1 = const0_rtx, code = NE;
5650       break;
5651     case LTU:
5652       if (op1 == const1_rtx)
5653         op1 = const0_rtx, code = EQ;
5654       break;
5655     default:
5656       break;
5657     }
5658
5659   /* If we are comparing a double-word integer with zero or -1, we can
5660      convert the comparison into one involving a single word.  */
5661   scalar_int_mode int_mode;
5662   if (is_int_mode (mode, &int_mode)
5663       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5664       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5665     {
5666       rtx tem;
5667       if ((code == EQ || code == NE)
5668           && (op1 == const0_rtx || op1 == constm1_rtx))
5669         {
5670           rtx op00, op01;
5671
5672           /* Do a logical OR or AND of the two words and compare the
5673              result.  */
5674           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5675           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5676           tem = expand_binop (word_mode,
5677                               op1 == const0_rtx ? ior_optab : and_optab,
5678                               op00, op01, NULL_RTX, unsignedp,
5679                               OPTAB_DIRECT);
5680
5681           if (tem != 0)
5682             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5683                                    unsignedp, normalizep);
5684         }
5685       else if ((code == LT || code == GE) && op1 == const0_rtx)
5686         {
5687           rtx op0h;
5688
5689           /* If testing the sign bit, can just test on high word.  */
5690           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5691                                       subreg_highpart_offset (word_mode,
5692                                                               int_mode));
5693           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5694                                  unsignedp, normalizep);
5695         }
5696       else
5697         tem = NULL_RTX;
5698
5699       if (tem)
5700         {
5701           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5702             return tem;
5703           if (!target)
5704             target = gen_reg_rtx (target_mode);
5705
5706           convert_move (target, tem,
5707                         !val_signbit_known_set_p (word_mode,
5708                                                   (normalizep ? normalizep
5709                                                    : STORE_FLAG_VALUE)));
5710           return target;
5711         }
5712     }
5713
5714   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5715      complement of A (for GE) and shifting the sign bit to the low bit.  */
5716   if (op1 == const0_rtx && (code == LT || code == GE)
5717       && is_int_mode (mode, &int_mode)
5718       && (normalizep || STORE_FLAG_VALUE == 1
5719           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5720     {
5721       scalar_int_mode int_target_mode;
5722       subtarget = target;
5723
5724       if (!target)
5725         int_target_mode = int_mode;
5726       else
5727         {
5728           /* If the result is to be wider than OP0, it is best to convert it
5729              first.  If it is to be narrower, it is *incorrect* to convert it
5730              first.  */
5731           int_target_mode = as_a <scalar_int_mode> (target_mode);
5732           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5733             {
5734               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5735               int_mode = int_target_mode;
5736             }
5737         }
5738
5739       if (int_target_mode != int_mode)
5740         subtarget = 0;
5741
5742       if (code == GE)
5743         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5744                            ((STORE_FLAG_VALUE == 1 || normalizep)
5745                             ? 0 : subtarget), 0);
5746
5747       if (STORE_FLAG_VALUE == 1 || normalizep)
5748         /* If we are supposed to produce a 0/1 value, we want to do
5749            a logical shift from the sign bit to the low-order bit; for
5750            a -1/0 value, we do an arithmetic shift.  */
5751         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5752                             GET_MODE_BITSIZE (int_mode) - 1,
5753                             subtarget, normalizep != -1);
5754
5755       if (int_mode != int_target_mode)
5756         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5757
5758       return op0;
5759     }
5760
5761   mclass = GET_MODE_CLASS (mode);
5762   FOR_EACH_MODE_FROM (compare_mode, mode)
5763     {
5764      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5765      icode = optab_handler (cstore_optab, optab_mode);
5766      if (icode != CODE_FOR_nothing)
5767         {
5768           do_pending_stack_adjust ();
5769           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5770                                  unsignedp, op0, op1, normalizep, target_mode);
5771           if (tem)
5772             return tem;
5773
5774           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5775             {
5776               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5777                                  unsignedp, op1, op0, normalizep, target_mode);
5778               if (tem)
5779                 return tem;
5780             }
5781           break;
5782         }
5783     }
5784
5785   return 0;
5786 }
5787
5788 /* Subroutine of emit_store_flag that handles cases in which the operands
5789    are scalar integers.  SUBTARGET is the target to use for temporary
5790    operations and TRUEVAL is the value to store when the condition is
5791    true.  All other arguments are as for emit_store_flag.  */
5792
5793 rtx
5794 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5795                      rtx op1, scalar_int_mode mode, int unsignedp,
5796                      int normalizep, rtx trueval)
5797 {
5798   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5799   rtx_insn *last = get_last_insn ();
5800
5801   /* If this is an equality comparison of integers, we can try to exclusive-or
5802      (or subtract) the two operands and use a recursive call to try the
5803      comparison with zero.  Don't do any of these cases if branches are
5804      very cheap.  */
5805
5806   if ((code == EQ || code == NE) && op1 != const0_rtx)
5807     {
5808       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5809                               OPTAB_WIDEN);
5810
5811       if (tem == 0)
5812         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5813                             OPTAB_WIDEN);
5814       if (tem != 0)
5815         tem = emit_store_flag (target, code, tem, const0_rtx,
5816                                mode, unsignedp, normalizep);
5817       if (tem != 0)
5818         return tem;
5819
5820       delete_insns_since (last);
5821     }
5822
5823   /* For integer comparisons, try the reverse comparison.  However, for
5824      small X and if we'd have anyway to extend, implementing "X != 0"
5825      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5826   rtx_code rcode = reverse_condition (code);
5827   if (can_compare_p (rcode, mode, ccp_store_flag)
5828       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5829             && code == NE
5830             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5831             && op1 == const0_rtx))
5832     {
5833       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5834                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5835
5836       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5837       if (want_add
5838           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5839                        optimize_insn_for_speed_p ()) == 0)
5840         {
5841           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5842                                        STORE_FLAG_VALUE, target_mode);
5843           if (tem != 0)
5844             tem = expand_binop (target_mode, add_optab, tem,
5845                                 gen_int_mode (normalizep, target_mode),
5846                                 target, 0, OPTAB_WIDEN);
5847           if (tem != 0)
5848             return tem;
5849         }
5850       else if (!want_add
5851                && rtx_cost (trueval, mode, XOR, 1,
5852                             optimize_insn_for_speed_p ()) == 0)
5853         {
5854           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5855                                        normalizep, target_mode);
5856           if (tem != 0)
5857             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5858                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5859           if (tem != 0)
5860             return tem;
5861         }
5862
5863       delete_insns_since (last);
5864     }
5865
5866   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5867      the constant zero.  Reject all other comparisons at this point.  Only
5868      do LE and GT if branches are expensive since they are expensive on
5869      2-operand machines.  */
5870
5871   if (op1 != const0_rtx
5872       || (code != EQ && code != NE
5873           && (BRANCH_COST (optimize_insn_for_speed_p (),
5874                            false) <= 1 || (code != LE && code != GT))))
5875     return 0;
5876
5877   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5878      do the necessary operation below.  */
5879
5880   rtx tem = 0;
5881
5882   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5883      the sign bit set.  */
5884
5885   if (code == LE)
5886     {
5887       /* This is destructive, so SUBTARGET can't be OP0.  */
5888       if (rtx_equal_p (subtarget, op0))
5889         subtarget = 0;
5890
5891       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5892                           OPTAB_WIDEN);
5893       if (tem)
5894         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5895                             OPTAB_WIDEN);
5896     }
5897
5898   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5899      number of bits in the mode of OP0, minus one.  */
5900
5901   if (code == GT)
5902     {
5903       if (rtx_equal_p (subtarget, op0))
5904         subtarget = 0;
5905
5906       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5907                                 GET_MODE_BITSIZE (mode) - 1,
5908                                 subtarget, 0);
5909       if (tem)
5910         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5911                             OPTAB_WIDEN);
5912     }
5913
5914   if (code == EQ || code == NE)
5915     {
5916       /* For EQ or NE, one way to do the comparison is to apply an operation
5917          that converts the operand into a positive number if it is nonzero
5918          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5919          for NE we negate.  This puts the result in the sign bit.  Then we
5920          normalize with a shift, if needed.
5921
5922          Two operations that can do the above actions are ABS and FFS, so try
5923          them.  If that doesn't work, and MODE is smaller than a full word,
5924          we can use zero-extension to the wider mode (an unsigned conversion)
5925          as the operation.  */
5926
5927       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5928          that is compensated by the subsequent overflow when subtracting
5929          one / negating.  */
5930
5931       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5932         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5933       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5934         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5935       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5936         {
5937           tem = convert_modes (word_mode, mode, op0, 1);
5938           mode = word_mode;
5939         }
5940
5941       if (tem != 0)
5942         {
5943           if (code == EQ)
5944             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5945                                 0, OPTAB_WIDEN);
5946           else
5947             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5948         }
5949
5950       /* If we couldn't do it that way, for NE we can "or" the two's complement
5951          of the value with itself.  For EQ, we take the one's complement of
5952          that "or", which is an extra insn, so we only handle EQ if branches
5953          are expensive.  */
5954
5955       if (tem == 0
5956           && (code == NE
5957               || BRANCH_COST (optimize_insn_for_speed_p (),
5958                               false) > 1))
5959         {
5960           if (rtx_equal_p (subtarget, op0))
5961             subtarget = 0;
5962
5963           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5964           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5965                               OPTAB_WIDEN);
5966
5967           if (tem && code == EQ)
5968             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5969         }
5970     }
5971
5972   if (tem && normalizep)
5973     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5974                               GET_MODE_BITSIZE (mode) - 1,
5975                               subtarget, normalizep == 1);
5976
5977   if (tem)
5978     {
5979       if (!target)
5980         ;
5981       else if (GET_MODE (tem) != target_mode)
5982         {
5983           convert_move (target, tem, 0);
5984           tem = target;
5985         }
5986       else if (!subtarget)
5987         {
5988           emit_move_insn (target, tem);
5989           tem = target;
5990         }
5991     }
5992   else
5993     delete_insns_since (last);
5994
5995   return tem;
5996 }
5997
5998 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5999    and storing in TARGET.  Normally return TARGET.
6000    Return 0 if that cannot be done.
6001
6002    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
6003    it is VOIDmode, they cannot both be CONST_INT.
6004
6005    UNSIGNEDP is for the case where we have to widen the operands
6006    to perform the operation.  It says to use zero-extension.
6007
6008    NORMALIZEP is 1 if we should convert the result to be either zero
6009    or one.  Normalize is -1 if we should convert the result to be
6010    either zero or -1.  If NORMALIZEP is zero, the result will be left
6011    "raw" out of the scc insn.  */
6012
6013 rtx
6014 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6015                  machine_mode mode, int unsignedp, int normalizep)
6016 {
6017   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6018   enum rtx_code rcode;
6019   rtx subtarget;
6020   rtx tem, trueval;
6021   rtx_insn *last;
6022
6023   /* If we compare constants, we shouldn't use a store-flag operation,
6024      but a constant load.  We can get there via the vanilla route that
6025      usually generates a compare-branch sequence, but will in this case
6026      fold the comparison to a constant, and thus elide the branch.  */
6027   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6028     return NULL_RTX;
6029
6030   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6031                            target_mode);
6032   if (tem)
6033     return tem;
6034
6035   /* If we reached here, we can't do this with a scc insn, however there
6036      are some comparisons that can be done in other ways.  Don't do any
6037      of these cases if branches are very cheap.  */
6038   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6039     return 0;
6040
6041   /* See what we need to return.  We can only return a 1, -1, or the
6042      sign bit.  */
6043
6044   if (normalizep == 0)
6045     {
6046       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6047         normalizep = STORE_FLAG_VALUE;
6048
6049       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6050         ;
6051       else
6052         return 0;
6053     }
6054
6055   last = get_last_insn ();
6056
6057   /* If optimizing, use different pseudo registers for each insn, instead
6058      of reusing the same pseudo.  This leads to better CSE, but slows
6059      down the compiler, since there are more pseudos.  */
6060   subtarget = (!optimize
6061                && (target_mode == mode)) ? target : NULL_RTX;
6062   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6063
6064   /* For floating-point comparisons, try the reverse comparison or try
6065      changing the "orderedness" of the comparison.  */
6066   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6067     {
6068       enum rtx_code first_code;
6069       bool and_them;
6070
6071       rcode = reverse_condition_maybe_unordered (code);
6072       if (can_compare_p (rcode, mode, ccp_store_flag)
6073           && (code == ORDERED || code == UNORDERED
6074               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6075               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6076         {
6077           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6078                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6079
6080           /* For the reverse comparison, use either an addition or a XOR.  */
6081           if (want_add
6082               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6083                            optimize_insn_for_speed_p ()) == 0)
6084             {
6085               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6086                                        STORE_FLAG_VALUE, target_mode);
6087               if (tem)
6088                 return expand_binop (target_mode, add_optab, tem,
6089                                      gen_int_mode (normalizep, target_mode),
6090                                      target, 0, OPTAB_WIDEN);
6091             }
6092           else if (!want_add
6093                    && rtx_cost (trueval, mode, XOR, 1,
6094                                 optimize_insn_for_speed_p ()) == 0)
6095             {
6096               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6097                                        normalizep, target_mode);
6098               if (tem)
6099                 return expand_binop (target_mode, xor_optab, tem, trueval,
6100                                      target, INTVAL (trueval) >= 0,
6101                                      OPTAB_WIDEN);
6102             }
6103         }
6104
6105       delete_insns_since (last);
6106
6107       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6108       if (code == ORDERED || code == UNORDERED)
6109         return 0;
6110
6111       and_them = split_comparison (code, mode, &first_code, &code);
6112
6113       /* If there are no NaNs, the first comparison should always fall through.
6114          Effectively change the comparison to the other one.  */
6115       if (!HONOR_NANS (mode))
6116         {
6117           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6118           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6119                                     target_mode);
6120         }
6121
6122       if (!HAVE_conditional_move)
6123         return 0;
6124
6125       /* Do not turn a trapping comparison into a non-trapping one.  */
6126       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6127           && flag_trapping_math)
6128         return 0;
6129
6130       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6131          conditional move.  */
6132       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6133                                normalizep, target_mode);
6134       if (tem == 0)
6135         return 0;
6136
6137       if (and_them)
6138         tem = emit_conditional_move (target, { code, op0, op1, mode },
6139                                      tem, const0_rtx, GET_MODE (tem), 0);
6140       else
6141         tem = emit_conditional_move (target, { code, op0, op1, mode },
6142                                      trueval, tem, GET_MODE (tem), 0);
6143
6144       if (tem == 0)
6145         delete_insns_since (last);
6146       return tem;
6147     }
6148
6149   /* The remaining tricks only apply to integer comparisons.  */
6150
6151   scalar_int_mode int_mode;
6152   if (is_int_mode (mode, &int_mode))
6153     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6154                                 unsignedp, normalizep, trueval);
6155
6156   return 0;
6157 }
6158
6159 /* Like emit_store_flag, but always succeeds.  */
6160
6161 rtx
6162 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6163                        machine_mode mode, int unsignedp, int normalizep)
6164 {
6165   rtx tem;
6166   rtx_code_label *label;
6167   rtx trueval, falseval;
6168
6169   /* First see if emit_store_flag can do the job.  */
6170   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6171   if (tem != 0)
6172     return tem;
6173
6174   /* If one operand is constant, make it the second one.  Only do this
6175      if the other operand is not constant as well.  */
6176   if (swap_commutative_operands_p (op0, op1))
6177     {
6178       std::swap (op0, op1);
6179       code = swap_condition (code);
6180     }
6181
6182   if (mode == VOIDmode)
6183     mode = GET_MODE (op0);
6184
6185   if (!target)
6186     target = gen_reg_rtx (word_mode);
6187
6188   /* If this failed, we have to do this with set/compare/jump/set code.
6189      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6190   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6191   if (code == NE
6192       && GET_MODE_CLASS (mode) == MODE_INT
6193       && REG_P (target)
6194       && op0 == target
6195       && op1 == const0_rtx)
6196     {
6197       label = gen_label_rtx ();
6198       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6199                                NULL_RTX, NULL, label,
6200                                profile_probability::uninitialized ());
6201       emit_move_insn (target, trueval);
6202       emit_label (label);
6203       return target;
6204     }
6205
6206   if (!REG_P (target)
6207       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6208     target = gen_reg_rtx (GET_MODE (target));
6209
6210   /* Jump in the right direction if the target cannot implement CODE
6211      but can jump on its reverse condition.  */
6212   falseval = const0_rtx;
6213   if (! can_compare_p (code, mode, ccp_jump)
6214       && (! FLOAT_MODE_P (mode)
6215           || code == ORDERED || code == UNORDERED
6216           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6217           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6218     {
6219       enum rtx_code rcode;
6220       if (FLOAT_MODE_P (mode))
6221         rcode = reverse_condition_maybe_unordered (code);
6222       else
6223         rcode = reverse_condition (code);
6224
6225       /* Canonicalize to UNORDERED for the libcall.  */
6226       if (can_compare_p (rcode, mode, ccp_jump)
6227           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6228         {
6229           falseval = trueval;
6230           trueval = const0_rtx;
6231           code = rcode;
6232         }
6233     }
6234
6235   emit_move_insn (target, trueval);
6236   label = gen_label_rtx ();
6237   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6238                            label, profile_probability::uninitialized ());
6239
6240   emit_move_insn (target, falseval);
6241   emit_label (label);
6242
6243   return target;
6244 }
6245
6246 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6247    and exclusive ranges in order to create an equivalent comparison.  See
6248    canonicalize_cmp_for_target for the possible cases.  */
6249
6250 static enum rtx_code
6251 equivalent_cmp_code (enum rtx_code code)
6252 {
6253   switch (code)
6254     {
6255     case GT:
6256       return GE;
6257     case GE:
6258       return GT;
6259     case LT:
6260       return LE;
6261     case LE:
6262       return LT;
6263     case GTU:
6264       return GEU;
6265     case GEU:
6266       return GTU;
6267     case LTU:
6268       return LEU;
6269     case LEU:
6270       return LTU;
6271
6272     default:
6273       return code;
6274     }
6275 }
6276
6277 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6278    purpose of this is to end up with an immediate which can be loaded into a
6279    register in fewer moves, if possible.
6280
6281    For each integer comparison there exists an equivalent choice:
6282      i)   a >  b or a >= b + 1
6283      ii)  a <= b or a <  b + 1
6284      iii) a >= b or a >  b - 1
6285      iv)  a <  b or a <= b - 1
6286
6287    MODE is the mode of the first operand.
6288    CODE points to the comparison code.
6289    IMM points to the rtx containing the immediate.  *IMM must satisfy
6290    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6291    on exit.  */
6292
6293 void
6294 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6295 {
6296   if (!SCALAR_INT_MODE_P (mode))
6297     return;
6298
6299   int to_add = 0;
6300   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6301
6302   /* Extract the immediate value from the rtx.  */
6303   wide_int imm_val = rtx_mode_t (*imm, mode);
6304
6305   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6306     to_add = 1;
6307   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6308     to_add = -1;
6309   else
6310     return;
6311
6312   /* Check for overflow/underflow in the case of signed values and
6313      wrapping around in the case of unsigned values.  If any occur
6314      cancel the optimization.  */
6315   wi::overflow_type overflow = wi::OVF_NONE;
6316   wide_int imm_modif;
6317
6318   if (to_add == 1)
6319     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6320   else
6321     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6322
6323   if (overflow)
6324     return;
6325
6326   /* The following creates a pseudo; if we cannot do that, bail out.  */
6327   if (!can_create_pseudo_p ())
6328     return;
6329
6330   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6331   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6332
6333   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6334   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6335
6336   /* Update the immediate and the code.  */
6337   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6338     {
6339       *code = equivalent_cmp_code (*code);
6340       *imm = new_imm;
6341     }
6342 }
6343
6344
6345 \f
6346 /* Perform possibly multi-word comparison and conditional jump to LABEL
6347    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6348    now a thin wrapper around do_compare_rtx_and_jump.  */
6349
6350 static void
6351 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6352                  rtx_code_label *label)
6353 {
6354   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6355   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6356                            NULL, label, profile_probability::uninitialized ());
6357 }