gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2021 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158   /* Restore all->reg's mode.  */
 159   PUT_MODE (all->reg, to_mode);
 160 }
 161
 162 static void
 163 init_expmed_one_mode (struct init_expmed_rtl *all,
 164                       machine_mode mode, int speed)
 165 {
 166   int m, n, mode_bitsize;
 167   machine_mode mode_from;
 168
 169   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 170
 171   PUT_MODE (all->reg, mode);
 172   PUT_MODE (all->plus, mode);
 173   PUT_MODE (all->neg, mode);
 174   PUT_MODE (all->mult, mode);
 175   PUT_MODE (all->sdiv, mode);
 176   PUT_MODE (all->udiv, mode);
 177   PUT_MODE (all->sdiv_32, mode);
 178   PUT_MODE (all->smod_32, mode);
 179   PUT_MODE (all->wide_trunc, mode);
 180   PUT_MODE (all->shift, mode);
 181   PUT_MODE (all->shift_mult, mode);
 182   PUT_MODE (all->shift_add, mode);
 183   PUT_MODE (all->shift_sub0, mode);
 184   PUT_MODE (all->shift_sub1, mode);
 185   PUT_MODE (all->zext, mode);
 186   PUT_MODE (all->trunc, mode);
 187
 188   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 189   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 190   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 191   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 192   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 193
 194   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 195                                      <= 2 * add_cost (speed, mode)));
 196   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 197                                      <= 4 * add_cost (speed, mode)));
 198
 199   set_shift_cost (speed, mode, 0, 0);
 200   {
 201     int cost = add_cost (speed, mode);
 202     set_shiftadd_cost (speed, mode, 0, cost);
 203     set_shiftsub0_cost (speed, mode, 0, cost);
 204     set_shiftsub1_cost (speed, mode, 0, cost);
 205   }
 206
 207   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 208   for (m = 1; m < n; m++)
 209     {
 210       XEXP (all->shift, 1) = all->cint[m];
 211       XEXP (all->shift_mult, 1) = all->pow2[m];
 212
 213       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 214       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 215                                                        speed));
 216       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 217                                                         speed));
 218       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 219                                                         speed));
 220     }
 221
 222   scalar_int_mode int_mode_to;
 223   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 224     {
 225       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 226            mode_from = (machine_mode)(mode_from + 1))
 227         init_expmed_one_conv (all, int_mode_to,
 228                               as_a <scalar_int_mode> (mode_from), speed);
 229
 230       scalar_int_mode wider_mode;
 231       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 232           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 233         {
 234           PUT_MODE (all->reg, mode);
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1)
 239             = gen_int_shift_amount (wider_mode, mode_bitsize);
 240
 241           set_mul_widen_cost (speed, wider_mode,
 242                               set_src_cost (all->wide_mult, wider_mode, speed));
 243           set_mul_highpart_cost (speed, int_mode_to,
 244                                  set_src_cost (all->wide_trunc,
 245                                                int_mode_to, speed));
 246         }
 247     }
 248 }
 249
 250 void
 251 init_expmed (void)
 252 {
 253   struct init_expmed_rtl all;
 254   machine_mode mode = QImode;
 255   int m, speed;
 256
 257   memset (&all, 0, sizeof all);
 258   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 259     {
 260       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 261       all.cint[m] = GEN_INT (m);
 262     }
 263
 264   /* Avoid using hard regs in ways which may be unsupported.  */
 265   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 266   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 267   all.neg = gen_rtx_NEG (mode, all.reg);
 268   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 269   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 270   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 271   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 272   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 273   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 274   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 275   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 276   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 277   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 278   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 279   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 280   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 282   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312
 313   ggc_free (all.trunc);
 314   ggc_free (all.shift_sub1);
 315   ggc_free (all.shift_sub0);
 316   ggc_free (all.shift_add);
 317   ggc_free (all.shift_mult);
 318   ggc_free (all.shift);
 319   ggc_free (all.wide_trunc);
 320   ggc_free (all.wide_lshr);
 321   ggc_free (all.wide_mult);
 322   ggc_free (all.zext);
 323   ggc_free (all.smod_32);
 324   ggc_free (all.sdiv_32);
 325   ggc_free (all.udiv);
 326   ggc_free (all.sdiv);
 327   ggc_free (all.mult);
 328   ggc_free (all.neg);
 329   ggc_free (all.plus);
 330   ggc_free (all.reg);
 331 }
 332
 333 /* Return an rtx representing minus the value of X.
 334    MODE is the intended mode of the result,
 335    useful if X is a CONST_INT.  */
 336
 337 rtx
 338 negate_rtx (machine_mode mode, rtx x)
 339 {
 340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 341
 342   if (result == 0)
 343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 344
 345   return result;
 346 }
 347
 348 /* Whether reverse storage order is supported on the target.  */
 349 static int reverse_storage_order_supported = -1;
 350
 351 /* Check whether reverse storage order is supported on the target.  */
 352
 353 static void
 354 check_reverse_storage_order_support (void)
 355 {
 356   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 357     {
 358       reverse_storage_order_supported = 0;
 359       sorry ("reverse scalar storage order");
 360     }
 361   else
 362     reverse_storage_order_supported = 1;
 363 }
 364
 365 /* Whether reverse FP storage order is supported on the target.  */
 366 static int reverse_float_storage_order_supported = -1;
 367
 368 /* Check whether reverse FP storage order is supported on the target.  */
 369
 370 static void
 371 check_reverse_float_storage_order_support (void)
 372 {
 373   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 374     {
 375       reverse_float_storage_order_supported = 0;
 376       sorry ("reverse floating-point scalar storage order");
 377     }
 378   else
 379     reverse_float_storage_order_supported = 1;
 380 }
 381
 382 /* Return an rtx representing value of X with reverse storage order.
 383    MODE is the intended mode of the result,
 384    useful if X is a CONST_INT.  */
 385
 386 rtx
 387 flip_storage_order (machine_mode mode, rtx x)
 388 {
 389   scalar_int_mode int_mode;
 390   rtx result;
 391
 392   if (mode == QImode)
 393     return x;
 394
 395   if (COMPLEX_MODE_P (mode))
 396     {
 397       rtx real = read_complex_part (x, false);
 398       rtx imag = read_complex_part (x, true);
 399
 400       real = flip_storage_order (GET_MODE_INNER (mode), real);
 401       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 402
 403       return gen_rtx_CONCAT (mode, real, imag);
 404     }
 405
 406   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 407     check_reverse_storage_order_support ();
 408
 409   if (!is_a <scalar_int_mode> (mode, &int_mode))
 410     {
 411       if (FLOAT_MODE_P (mode)
 412           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 413         check_reverse_float_storage_order_support ();
 414
 415       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 416           || !targetm.scalar_mode_supported_p (int_mode))
 417         {
 418           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 419           return x;
 420         }
 421       x = gen_lowpart (int_mode, x);
 422     }
 423
 424   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 425   if (result == 0)
 426     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 427
 428   if (int_mode != mode)
 429     result = gen_lowpart (mode, result);
 430
 431   return result;
 432 }
 433
 434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 435    first unit of mode MODE that contains a bitfield of size BITSIZE at
 436    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 437    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 438    of the field within the new memory.  */
 439
 440 static rtx
 441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 442                       unsigned HOST_WIDE_INT bitsize,
 443                       unsigned HOST_WIDE_INT bitnum,
 444                       unsigned HOST_WIDE_INT *new_bitnum)
 445 {
 446   scalar_int_mode imode;
 447   if (mode.exists (&imode))
 448     {
 449       unsigned int unit = GET_MODE_BITSIZE (imode);
 450       *new_bitnum = bitnum % unit;
 451       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 452       return adjust_bitfield_address (mem, imode, offset);
 453     }
 454   else
 455     {
 456       *new_bitnum = bitnum % BITS_PER_UNIT;
 457       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 458       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 459                             / BITS_PER_UNIT);
 460       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 461     }
 462 }
 463
 464 /* The caller wants to perform insertion or extraction PATTERN on a
 465    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 466    BITREGION_START and BITREGION_END are as for store_bit_field
 467    and FIELDMODE is the natural mode of the field.
 468
 469    Search for a mode that is compatible with the memory access
 470    restrictions and (where applicable) with a register insertion or
 471    extraction.  Return the new memory on success, storing the adjusted
 472    bit position in *NEW_BITNUM.  Return null otherwise.  */
 473
 474 static rtx
 475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 476                               rtx op0, HOST_WIDE_INT bitsize,
 477                               HOST_WIDE_INT bitnum,
 478                               poly_uint64 bitregion_start,
 479                               poly_uint64 bitregion_end,
 480                               machine_mode fieldmode,
 481                               unsigned HOST_WIDE_INT *new_bitnum)
 482 {
 483   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 484                                 bitregion_end, MEM_ALIGN (op0),
 485                                 MEM_VOLATILE_P (op0));
 486   scalar_int_mode best_mode;
 487   if (iter.next_mode (&best_mode))
 488     {
 489       /* We can use a memory in BEST_MODE.  See whether this is true for
 490          any wider modes.  All other things being equal, we prefer to
 491          use the widest mode possible because it tends to expose more
 492          CSE opportunities.  */
 493       if (!iter.prefer_smaller_modes ())
 494         {
 495           /* Limit the search to the mode required by the corresponding
 496              register insertion or extraction instruction, if any.  */
 497           scalar_int_mode limit_mode = word_mode;
 498           extraction_insn insn;
 499           if (get_best_reg_extraction_insn (&insn, pattern,
 500                                             GET_MODE_BITSIZE (best_mode),
 501                                             fieldmode))
 502             limit_mode = insn.field_mode;
 503
 504           scalar_int_mode wider_mode;
 505           while (iter.next_mode (&wider_mode)
 506                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 507             best_mode = wider_mode;
 508         }
 509       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 510                                    new_bitnum);
 511     }
 512   return NULL_RTX;
 513 }
 514
 515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 516    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 517    offset is then BITNUM / BITS_PER_UNIT.  */
 518
 519 static bool
 520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 521                      machine_mode struct_mode)
 522 {
 523   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 524   if (BYTES_BIG_ENDIAN)
 525     return (multiple_p (bitnum, BITS_PER_UNIT)
 526             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 527                 || multiple_p (bitnum + bitsize,
 528                                regsize * BITS_PER_UNIT)));
 529   else
 530     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 531 }
 532
 533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 534    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 535    Return false if the access would touch memory outside the range
 536    BITREGION_START to BITREGION_END for conformance to the C++ memory
 537    model.  */
 538
 539 static bool
 540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 541                             unsigned HOST_WIDE_INT bitnum,
 542                             scalar_int_mode fieldmode,
 543                             poly_uint64 bitregion_start,
 544                             poly_uint64 bitregion_end)
 545 {
 546   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 547
 548   /* -fstrict-volatile-bitfields must be enabled and we must have a
 549      volatile MEM.  */
 550   if (!MEM_P (op0)
 551       || !MEM_VOLATILE_P (op0)
 552       || flag_strict_volatile_bitfields <= 0)
 553     return false;
 554
 555   /* The bit size must not be larger than the field mode, and
 556      the field mode must not be larger than a word.  */
 557   if (bitsize > modesize || modesize > BITS_PER_WORD)
 558     return false;
 559
 560   /* Check for cases of unaligned fields that must be split.  */
 561   if (bitnum % modesize + bitsize > modesize)
 562     return false;
 563
 564   /* The memory must be sufficiently aligned for a MODESIZE access.
 565      This condition guarantees, that the memory access will not
 566      touch anything after the end of the structure.  */
 567   if (MEM_ALIGN (op0) < modesize)
 568     return false;
 569
 570   /* Check for cases where the C++ memory model applies.  */
 571   if (maybe_ne (bitregion_end, 0U)
 572       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 573           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 574                        bitregion_end)))
 575     return false;
 576
 577   return true;
 578 }
 579
 580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 581    bit number BITNUM can be treated as a simple value of mode MODE.
 582    Store the byte offset in *BYTENUM if so.  */
 583
 584 static bool
 585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 586                        machine_mode mode, poly_uint64 *bytenum)
 587 {
 588   return (MEM_P (op0)
 589           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 590           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 591           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 592               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 593                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 594 }
 595 \f
 596 /* Try to use instruction INSV to store VALUE into a field of OP0.
 597    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 598    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 599    are as for store_bit_field.  */
 600
 601 static bool
 602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 603                             opt_scalar_int_mode op0_mode,
 604                             unsigned HOST_WIDE_INT bitsize,
 605                             unsigned HOST_WIDE_INT bitnum,
 606                             rtx value, scalar_int_mode value_mode)
 607 {
 608   class expand_operand ops[4];
 609   rtx value1;
 610   rtx xop0 = op0;
 611   rtx_insn *last = get_last_insn ();
 612   bool copy_back = false;
 613
 614   scalar_int_mode op_mode = insv->field_mode;
 615   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 616   if (bitsize == 0 || bitsize > unit)
 617     return false;
 618
 619   if (MEM_P (xop0))
 620     /* Get a reference to the first byte of the field.  */
 621     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 622                                  &bitnum);
 623   else
 624     {
 625       /* Convert from counting within OP0 to counting in OP_MODE.  */
 626       if (BYTES_BIG_ENDIAN)
 627         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 628
 629       /* If xop0 is a register, we need it in OP_MODE
 630          to make it acceptable to the format of insv.  */
 631       if (GET_CODE (xop0) == SUBREG)
 632         {
 633           /* If such a SUBREG can't be created, give up.  */
 634           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 635                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 636             return false;
 637           /* We can't just change the mode, because this might clobber op0,
 638              and we will need the original value of op0 if insv fails.  */
 639           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 640                                  SUBREG_BYTE (xop0));
 641         }
 642       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 643         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 644     }
 645
 646   /* If the destination is a paradoxical subreg such that we need a
 647      truncate to the inner mode, perform the insertion on a temporary and
 648      truncate the result to the original destination.  Note that we can't
 649      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 650      X) 0)) is (reg:N X).  */
 651   if (GET_CODE (xop0) == SUBREG
 652       && REG_P (SUBREG_REG (xop0))
 653       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 654                                          op_mode))
 655     {
 656       rtx tem = gen_reg_rtx (op_mode);
 657       emit_move_insn (tem, xop0);
 658       xop0 = tem;
 659       copy_back = true;
 660     }
 661
 662   /* There are similar overflow check at the start of store_bit_field_1,
 663      but that only check the situation where the field lies completely
 664      outside the register, while there do have situation where the field
 665      lies partialy in the register, we need to adjust bitsize for this
 666      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 667      will broken on those arch support bit insert instruction, like arm, aarch64
 668      etc.  */
 669   if (bitsize + bitnum > unit && bitnum < unit)
 670     {
 671       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 672                "destination object, data truncated into %wu-bit",
 673                bitsize, unit - bitnum);
 674       bitsize = unit - bitnum;
 675     }
 676
 677   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 678      "backwards" from the size of the unit we are inserting into.
 679      Otherwise, we count bits from the most significant on a
 680      BYTES/BITS_BIG_ENDIAN machine.  */
 681
 682   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 683     bitnum = unit - bitsize - bitnum;
 684
 685   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 686   value1 = value;
 687   if (value_mode != op_mode)
 688     {
 689       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 690         {
 691           rtx tmp;
 692           /* Optimization: Don't bother really extending VALUE
 693              if it has all the bits we will actually use.  However,
 694              if we must narrow it, be sure we do it correctly.  */
 695
 696           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 697             {
 698               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 699               if (! tmp)
 700                 tmp = simplify_gen_subreg (op_mode,
 701                                            force_reg (value_mode, value1),
 702                                            value_mode, 0);
 703             }
 704           else
 705             {
 706               tmp = gen_lowpart_if_possible (op_mode, value1);
 707               if (! tmp)
 708                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 709             }
 710           value1 = tmp;
 711         }
 712       else if (CONST_INT_P (value))
 713         value1 = gen_int_mode (INTVAL (value), op_mode);
 714       else
 715         /* Parse phase is supposed to make VALUE's data type
 716            match that of the component reference, which is a type
 717            at least as wide as the field; so VALUE should have
 718            a mode that corresponds to that type.  */
 719         gcc_assert (CONSTANT_P (value));
 720     }
 721
 722   create_fixed_operand (&ops[0], xop0);
 723   create_integer_operand (&ops[1], bitsize);
 724   create_integer_operand (&ops[2], bitnum);
 725   create_input_operand (&ops[3], value1, op_mode);
 726   if (maybe_expand_insn (insv->icode, 4, ops))
 727     {
 728       if (copy_back)
 729         convert_move (op0, xop0, true);
 730       return true;
 731     }
 732   delete_insns_since (last);
 733   return false;
 734 }
 735
 736 /* A subroutine of store_bit_field, with the same arguments.  Return true
 737    if the operation could be implemented.
 738
 739    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 740    no other way of implementing the operation.  If FALLBACK_P is false,
 741    return false instead.  */
 742
 743 static bool
 744 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 745                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 746                    machine_mode fieldmode,
 747                    rtx value, bool reverse, bool fallback_p)
 748 {
 749   rtx op0 = str_rtx;
 750
 751   while (GET_CODE (op0) == SUBREG)
 752     {
 753       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 754       op0 = SUBREG_REG (op0);
 755     }
 756
 757   /* No action is needed if the target is a register and if the field
 758      lies completely outside that register.  This can occur if the source
 759      code contains an out-of-bounds access to a small array.  */
 760   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 761     return true;
 762
 763   /* Use vec_set patterns for inserting parts of vectors whenever
 764      available.  */
 765   machine_mode outermode = GET_MODE (op0);
 766   scalar_mode innermode = GET_MODE_INNER (outermode);
 767   poly_uint64 pos;
 768   if (VECTOR_MODE_P (outermode)
 769       && !MEM_P (op0)
 770       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 771       && fieldmode == innermode
 772       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 773       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 774     {
 775       class expand_operand ops[3];
 776       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 777
 778       create_fixed_operand (&ops[0], op0);
 779       create_input_operand (&ops[1], value, innermode);
 780       create_integer_operand (&ops[2], pos);
 781       if (maybe_expand_insn (icode, 3, ops))
 782         return true;
 783     }
 784
 785   /* If the target is a register, overwriting the entire object, or storing
 786      a full-word or multi-word field can be done with just a SUBREG.  */
 787   if (!MEM_P (op0)
 788       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794       HOST_WIDE_INT regnum;
 795       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 796       if (known_eq (bitnum, 0U)
 797           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 798         {
 799           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 800           if (sub)
 801             {
 802               if (reverse)
 803                 sub = flip_storage_order (GET_MODE (op0), sub);
 804               emit_move_insn (op0, sub);
 805               return true;
 806             }
 807         }
 808       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 809                && multiple_p (bitsize, regsize * BITS_PER_UNIT))
 810         {
 811           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 812                                      regnum * regsize);
 813           if (sub)
 814             {
 815               if (reverse)
 816                 value = flip_storage_order (fieldmode, value);
 817               emit_move_insn (sub, value);
 818               return true;
 819             }
 820         }
 821     }
 822
 823   /* If the target is memory, storing any naturally aligned field can be
 824      done with a simple store.  For targets that support fast unaligned
 825      memory, any naturally sized, unit aligned field can be done directly.  */
 826   poly_uint64 bytenum;
 827   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 828     {
 829       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 830       if (reverse)
 831         value = flip_storage_order (fieldmode, value);
 832       emit_move_insn (op0, value);
 833       return true;
 834     }
 835
 836   /* It's possible we'll need to handle other cases here for
 837      polynomial bitnum and bitsize.  */
 838
 839   /* From here on we need to be looking at a fixed-size insertion.  */
 840   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 841   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 842
 843   /* Make sure we are playing with integral modes.  Pun with subregs
 844      if we aren't.  This must come after the entire register case above,
 845      since that case is valid for any mode.  The following cases are only
 846      valid for integral modes.  */
 847   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 848   scalar_int_mode imode;
 849   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 850     {
 851       if (MEM_P (op0))
 852         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 853                                             0, MEM_SIZE (op0));
 854       else if (!op0_mode.exists ())
 855         {
 856           if (ibitnum == 0
 857               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 858               && MEM_P (value)
 859               && !reverse)
 860             {
 861               value = adjust_address (value, GET_MODE (op0), 0);
 862               emit_move_insn (op0, value);
 863               return true;
 864             }
 865           if (!fallback_p)
 866             return false;
 867           rtx temp = assign_stack_temp (GET_MODE (op0),
 868                                         GET_MODE_SIZE (GET_MODE (op0)));
 869           emit_move_insn (temp, op0);
 870           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 871                              reverse, fallback_p);
 872           emit_move_insn (op0, temp);
 873           return true;
 874         }
 875       else
 876         op0 = gen_lowpart (op0_mode.require (), op0);
 877     }
 878
 879   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 880                                    bitregion_start, bitregion_end,
 881                                    fieldmode, value, reverse, fallback_p);
 882 }
 883
 884 /* Subroutine of store_bit_field_1, with the same arguments, except
 885    that BITSIZE and BITNUM are constant.  Handle cases specific to
 886    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 887    otherwise OP0 is a BLKmode MEM.  */
 888
 889 static bool
 890 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 891                           unsigned HOST_WIDE_INT bitsize,
 892                           unsigned HOST_WIDE_INT bitnum,
 893                           poly_uint64 bitregion_start,
 894                           poly_uint64 bitregion_end,
 895                           machine_mode fieldmode,
 896                           rtx value, bool reverse, bool fallback_p)
 897 {
 898   /* Storing an lsb-aligned field in a register
 899      can be done with a movstrict instruction.  */
 900
 901   if (!MEM_P (op0)
 902       && !reverse
 903       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 904       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 905       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 906     {
 907       class expand_operand ops[2];
 908       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 909       rtx arg0 = op0;
 910       unsigned HOST_WIDE_INT subreg_off;
 911
 912       if (GET_CODE (arg0) == SUBREG)
 913         {
 914           /* Else we've got some float mode source being extracted into
 915              a different float mode destination -- this combination of
 916              subregs results in Severe Tire Damage.  */
 917           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 918                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 919                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 920           arg0 = SUBREG_REG (arg0);
 921         }
 922
 923       subreg_off = bitnum / BITS_PER_UNIT;
 924       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
 925           /* STRICT_LOW_PART must have a non-paradoxical subreg as
 926              operand.  */
 927           && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
 928         {
 929           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 930
 931           create_fixed_operand (&ops[0], arg0);
 932           /* Shrink the source operand to FIELDMODE.  */
 933           create_convert_operand_to (&ops[1], value, fieldmode, false);
 934           if (maybe_expand_insn (icode, 2, ops))
 935             return true;
 936         }
 937     }
 938
 939   /* Handle fields bigger than a word.  */
 940
 941   if (bitsize > BITS_PER_WORD)
 942     {
 943       /* Here we transfer the words of the field
 944          in the order least significant first.
 945          This is because the most significant word is the one which may
 946          be less than full.
 947          However, only do that if the value is not BLKmode.  */
 948
 949       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 950       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 951       rtx_insn *last;
 952
 953       /* This is the mode we must force value to, so that there will be enough
 954          subwords to extract.  Note that fieldmode will often (always?) be
 955          VOIDmode, because that is what store_field uses to indicate that this
 956          is a bit field, but passing VOIDmode to operand_subword_force
 957          is not allowed.
 958
 959          The mode must be fixed-size, since insertions into variable-sized
 960          objects are meant to be handled before calling this function.  */
 961       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 962       if (value_mode == VOIDmode)
 963         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 964
 965       last = get_last_insn ();
 966       for (int i = 0; i < nwords; i++)
 967         {
 968           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 969              except maybe for the last iteration.  */
 970           const unsigned HOST_WIDE_INT new_bitsize
 971             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 972           /* Bit offset from the starting bit number in the target.  */
 973           const unsigned int bit_offset
 974             = backwards ^ reverse
 975               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 976               : i * BITS_PER_WORD;
 977           /* Starting word number in the value.  */
 978           const unsigned int wordnum
 979             = backwards
 980               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 981               : i;
 982           /* The chunk of the value in word_mode.  We use bit-field extraction
 983               in BLKmode to handle unaligned memory references and to shift the
 984               last chunk right on big-endian machines if need be.  */
 985           rtx value_word
 986             = fieldmode == BLKmode
 987               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 988                                    1, NULL_RTX, word_mode, word_mode, false,
 989                                    NULL)
 990               : operand_subword_force (value, wordnum, value_mode);
 991
 992           if (!store_bit_field_1 (op0, new_bitsize,
 993                                   bitnum + bit_offset,
 994                                   bitregion_start, bitregion_end,
 995                                   word_mode,
 996                                   value_word, reverse, fallback_p))
 997             {
 998               delete_insns_since (last);
 999               return false;
1000             }
1001         }
1002       return true;
1003     }
1004
1005   /* If VALUE has a floating-point or complex mode, access it as an
1006      integer of the corresponding size.  This can occur on a machine
1007      with 64 bit registers that uses SFmode for float.  It can also
1008      occur for unaligned float or complex fields.  */
1009   rtx orig_value = value;
1010   scalar_int_mode value_mode;
1011   if (GET_MODE (value) == VOIDmode)
1012     /* By this point we've dealt with values that are bigger than a word,
1013        so word_mode is a conservatively correct choice.  */
1014     value_mode = word_mode;
1015   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1016     {
1017       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1018       value = gen_reg_rtx (value_mode);
1019       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1020     }
1021
1022   /* If OP0 is a multi-word register, narrow it to the affected word.
1023      If the region spans two words, defer to store_split_bit_field.
1024      Don't do this if op0 is a single hard register wider than word
1025      such as a float or vector register.  */
1026   if (!MEM_P (op0)
1027       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1028       && (!REG_P (op0)
1029           || !HARD_REGISTER_P (op0)
1030           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1031     {
1032       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1033         {
1034           if (!fallback_p)
1035             return false;
1036
1037           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1038                                  bitregion_start, bitregion_end,
1039                                  value, value_mode, reverse);
1040           return true;
1041         }
1042       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1043                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1044       gcc_assert (op0);
1045       op0_mode = word_mode;
1046       bitnum %= BITS_PER_WORD;
1047     }
1048
1049   /* From here on we can assume that the field to be stored in fits
1050      within a word.  If the destination is a register, it too fits
1051      in a word.  */
1052
1053   extraction_insn insv;
1054   if (!MEM_P (op0)
1055       && !reverse
1056       && get_best_reg_extraction_insn (&insv, EP_insv,
1057                                        GET_MODE_BITSIZE (op0_mode.require ()),
1058                                        fieldmode)
1059       && store_bit_field_using_insv (&insv, op0, op0_mode,
1060                                      bitsize, bitnum, value, value_mode))
1061     return true;
1062
1063   /* If OP0 is a memory, try copying it to a register and seeing if a
1064      cheap register alternative is available.  */
1065   if (MEM_P (op0) && !reverse)
1066     {
1067       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1068                                         fieldmode)
1069           && store_bit_field_using_insv (&insv, op0, op0_mode,
1070                                          bitsize, bitnum, value, value_mode))
1071         return true;
1072
1073       rtx_insn *last = get_last_insn ();
1074
1075       /* Try loading part of OP0 into a register, inserting the bitfield
1076          into that, and then copying the result back to OP0.  */
1077       unsigned HOST_WIDE_INT bitpos;
1078       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1079                                                bitregion_start, bitregion_end,
1080                                                fieldmode, &bitpos);
1081       if (xop0)
1082         {
1083           rtx tempreg = copy_to_reg (xop0);
1084           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1085                                  bitregion_start, bitregion_end,
1086                                  fieldmode, orig_value, reverse, false))
1087             {
1088               emit_move_insn (xop0, tempreg);
1089               return true;
1090             }
1091           delete_insns_since (last);
1092         }
1093     }
1094
1095   if (!fallback_p)
1096     return false;
1097
1098   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1099                          bitregion_end, value, value_mode, reverse);
1100   return true;
1101 }
1102
1103 /* Generate code to store value from rtx VALUE
1104    into a bit-field within structure STR_RTX
1105    containing BITSIZE bits starting at bit BITNUM.
1106
1107    BITREGION_START is bitpos of the first bitfield in this region.
1108    BITREGION_END is the bitpos of the ending bitfield in this region.
1109    These two fields are 0, if the C++ memory model does not apply,
1110    or we are not interested in keeping track of bitfield regions.
1111
1112    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1113
1114    If REVERSE is true, the store is to be done in reverse order.  */
1115
1116 void
1117 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1118                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1119                  machine_mode fieldmode,
1120                  rtx value, bool reverse)
1121 {
1122   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1123   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1124   scalar_int_mode int_mode;
1125   if (bitsize.is_constant (&ibitsize)
1126       && bitnum.is_constant (&ibitnum)
1127       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1128       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1129                                      bitregion_start, bitregion_end))
1130     {
1131       /* Storing of a full word can be done with a simple store.
1132          We know here that the field can be accessed with one single
1133          instruction.  For targets that support unaligned memory,
1134          an unaligned access may be necessary.  */
1135       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1136         {
1137           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1138                                              ibitnum / BITS_PER_UNIT);
1139           if (reverse)
1140             value = flip_storage_order (int_mode, value);
1141           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1142           emit_move_insn (str_rtx, value);
1143         }
1144       else
1145         {
1146           rtx temp;
1147
1148           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1149                                           ibitnum, &ibitnum);
1150           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1151           temp = copy_to_reg (str_rtx);
1152           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1153                                   int_mode, value, reverse, true))
1154             gcc_unreachable ();
1155
1156           emit_move_insn (str_rtx, temp);
1157         }
1158
1159       return;
1160     }
1161
1162   /* Under the C++0x memory model, we must not touch bits outside the
1163      bit region.  Adjust the address to start at the beginning of the
1164      bit region.  */
1165   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1166     {
1167       scalar_int_mode best_mode;
1168       machine_mode addr_mode = VOIDmode;
1169
1170       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1171       bitnum -= bitregion_start;
1172       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1173       bitregion_end -= bitregion_start;
1174       bitregion_start = 0;
1175       if (bitsize.is_constant (&ibitsize)
1176           && bitnum.is_constant (&ibitnum)
1177           && get_best_mode (ibitsize, ibitnum,
1178                             bitregion_start, bitregion_end,
1179                             MEM_ALIGN (str_rtx), INT_MAX,
1180                             MEM_VOLATILE_P (str_rtx), &best_mode))
1181         addr_mode = best_mode;
1182       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1183                                               offset, size);
1184     }
1185
1186   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1187                           bitregion_start, bitregion_end,
1188                           fieldmode, value, reverse, true))
1189     gcc_unreachable ();
1190 }
1191 \f
1192 /* Use shifts and boolean operations to store VALUE into a bit field of
1193    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1194    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1195    the mode of VALUE.
1196
1197    If REVERSE is true, the store is to be done in reverse order.  */
1198
1199 static void
1200 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1201                        unsigned HOST_WIDE_INT bitsize,
1202                        unsigned HOST_WIDE_INT bitnum,
1203                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1204                        rtx value, scalar_int_mode value_mode, bool reverse)
1205 {
1206   /* There is a case not handled here:
1207      a structure with a known alignment of just a halfword
1208      and a field split across two aligned halfwords within the structure.
1209      Or likewise a structure with a known alignment of just a byte
1210      and a field split across two bytes.
1211      Such cases are not supposed to be able to occur.  */
1212
1213   scalar_int_mode best_mode;
1214   if (MEM_P (op0))
1215     {
1216       unsigned int max_bitsize = BITS_PER_WORD;
1217       scalar_int_mode imode;
1218       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1219         max_bitsize = GET_MODE_BITSIZE (imode);
1220
1221       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1222                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1223                           &best_mode))
1224         {
1225           /* The only way this should occur is if the field spans word
1226              boundaries.  */
1227           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1228                                  bitregion_start, bitregion_end,
1229                                  value, value_mode, reverse);
1230           return;
1231         }
1232
1233       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1234     }
1235   else
1236     best_mode = op0_mode.require ();
1237
1238   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1239                            value, value_mode, reverse);
1240 }
1241
1242 /* Helper function for store_fixed_bit_field, stores
1243    the bit field always using MODE, which is the mode of OP0.  The other
1244    arguments are as for store_fixed_bit_field.  */
1245
1246 static void
1247 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1248                          unsigned HOST_WIDE_INT bitsize,
1249                          unsigned HOST_WIDE_INT bitnum,
1250                          rtx value, scalar_int_mode value_mode, bool reverse)
1251 {
1252   rtx temp;
1253   int all_zero = 0;
1254   int all_one = 0;
1255
1256   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1257      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1258
1259   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1260     /* BITNUM is the distance between our msb
1261        and that of the containing datum.
1262        Convert it to the distance from the lsb.  */
1263     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1264
1265   /* Now BITNUM is always the distance between our lsb
1266      and that of OP0.  */
1267
1268   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1269      we must first convert its mode to MODE.  */
1270
1271   if (CONST_INT_P (value))
1272     {
1273       unsigned HOST_WIDE_INT v = UINTVAL (value);
1274
1275       if (bitsize < HOST_BITS_PER_WIDE_INT)
1276         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1277
1278       if (v == 0)
1279         all_zero = 1;
1280       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1281                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1282                || (bitsize == HOST_BITS_PER_WIDE_INT
1283                    && v == HOST_WIDE_INT_M1U))
1284         all_one = 1;
1285
1286       value = lshift_value (mode, v, bitnum);
1287     }
1288   else
1289     {
1290       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1291                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1292
1293       if (value_mode != mode)
1294         value = convert_to_mode (mode, value, 1);
1295
1296       if (must_and)
1297         value = expand_binop (mode, and_optab, value,
1298                               mask_rtx (mode, 0, bitsize, 0),
1299                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1300       if (bitnum > 0)
1301         value = expand_shift (LSHIFT_EXPR, mode, value,
1302                               bitnum, NULL_RTX, 1);
1303     }
1304
1305   if (reverse)
1306     value = flip_storage_order (mode, value);
1307
1308   /* Now clear the chosen bits in OP0,
1309      except that if VALUE is -1 we need not bother.  */
1310   /* We keep the intermediates in registers to allow CSE to combine
1311      consecutive bitfield assignments.  */
1312
1313   temp = force_reg (mode, op0);
1314
1315   if (! all_one)
1316     {
1317       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1318       if (reverse)
1319         mask = flip_storage_order (mode, mask);
1320       temp = expand_binop (mode, and_optab, temp, mask,
1321                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1322       temp = force_reg (mode, temp);
1323     }
1324
1325   /* Now logical-or VALUE into OP0, unless it is zero.  */
1326
1327   if (! all_zero)
1328     {
1329       temp = expand_binop (mode, ior_optab, temp, value,
1330                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1331       temp = force_reg (mode, temp);
1332     }
1333
1334   if (op0 != temp)
1335     {
1336       op0 = copy_rtx (op0);
1337       emit_move_insn (op0, temp);
1338     }
1339 }
1340 \f
1341 /* Store a bit field that is split across multiple accessible memory objects.
1342
1343    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1344    BITSIZE is the field width; BITPOS the position of its first bit
1345    (within the word).
1346    VALUE is the value to store, which has mode VALUE_MODE.
1347    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1348    a BLKmode MEM.
1349
1350    If REVERSE is true, the store is to be done in reverse order.
1351
1352    This does not yet handle fields wider than BITS_PER_WORD.  */
1353
1354 static void
1355 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1356                        unsigned HOST_WIDE_INT bitsize,
1357                        unsigned HOST_WIDE_INT bitpos,
1358                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1359                        rtx value, scalar_int_mode value_mode, bool reverse)
1360 {
1361   unsigned int unit, total_bits, bitsdone = 0;
1362
1363   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1364      much at a time.  */
1365   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1366     unit = BITS_PER_WORD;
1367   else
1368     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1369
1370   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1371      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1372      again, and we will mutually recurse forever.  */
1373   if (MEM_P (op0) && op0_mode.exists ())
1374     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1375
1376   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1377      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1378      that VALUE might be a floating-point constant.  */
1379   if (CONSTANT_P (value) && !CONST_INT_P (value))
1380     {
1381       rtx word = gen_lowpart_common (word_mode, value);
1382
1383       if (word && (value != word))
1384         value = word;
1385       else
1386         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1387       value_mode = word_mode;
1388     }
1389
1390   total_bits = GET_MODE_BITSIZE (value_mode);
1391
1392   while (bitsdone < bitsize)
1393     {
1394       unsigned HOST_WIDE_INT thissize;
1395       unsigned HOST_WIDE_INT thispos;
1396       unsigned HOST_WIDE_INT offset;
1397       rtx part;
1398
1399       offset = (bitpos + bitsdone) / unit;
1400       thispos = (bitpos + bitsdone) % unit;
1401
1402       /* When region of bytes we can touch is restricted, decrease
1403          UNIT close to the end of the region as needed.  If op0 is a REG
1404          or SUBREG of REG, don't do this, as there can't be data races
1405          on a register and we can expand shorter code in some cases.  */
1406       if (maybe_ne (bitregion_end, 0U)
1407           && unit > BITS_PER_UNIT
1408           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1409           && !REG_P (op0)
1410           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1411         {
1412           unit = unit / 2;
1413           continue;
1414         }
1415
1416       /* THISSIZE must not overrun a word boundary.  Otherwise,
1417          store_fixed_bit_field will call us again, and we will mutually
1418          recurse forever.  */
1419       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1420       thissize = MIN (thissize, unit - thispos);
1421
1422       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1423         {
1424           /* Fetch successively less significant portions.  */
1425           if (CONST_INT_P (value))
1426             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1427                              >> (bitsize - bitsdone - thissize))
1428                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1429           /* Likewise, but the source is little-endian.  */
1430           else if (reverse)
1431             part = extract_fixed_bit_field (word_mode, value, value_mode,
1432                                             thissize,
1433                                             bitsize - bitsdone - thissize,
1434                                             NULL_RTX, 1, false);
1435           else
1436             /* The args are chosen so that the last part includes the
1437                lsb.  Give extract_bit_field the value it needs (with
1438                endianness compensation) to fetch the piece we want.  */
1439             part = extract_fixed_bit_field (word_mode, value, value_mode,
1440                                             thissize,
1441                                             total_bits - bitsize + bitsdone,
1442                                             NULL_RTX, 1, false);
1443         }
1444       else
1445         {
1446           /* Fetch successively more significant portions.  */
1447           if (CONST_INT_P (value))
1448             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1449                              >> bitsdone)
1450                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1451           /* Likewise, but the source is big-endian.  */
1452           else if (reverse)
1453             part = extract_fixed_bit_field (word_mode, value, value_mode,
1454                                             thissize,
1455                                             total_bits - bitsdone - thissize,
1456                                             NULL_RTX, 1, false);
1457           else
1458             part = extract_fixed_bit_field (word_mode, value, value_mode,
1459                                             thissize, bitsdone, NULL_RTX,
1460                                             1, false);
1461         }
1462
1463       /* If OP0 is a register, then handle OFFSET here.  */
1464       rtx op0_piece = op0;
1465       opt_scalar_int_mode op0_piece_mode = op0_mode;
1466       if (SUBREG_P (op0) || REG_P (op0))
1467         {
1468           scalar_int_mode imode;
1469           if (op0_mode.exists (&imode)
1470               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1471             {
1472               if (offset)
1473                 op0_piece = const0_rtx;
1474             }
1475           else
1476             {
1477               op0_piece = operand_subword_force (op0,
1478                                                  offset * unit / BITS_PER_WORD,
1479                                                  GET_MODE (op0));
1480               op0_piece_mode = word_mode;
1481             }
1482           offset &= BITS_PER_WORD / unit - 1;
1483         }
1484
1485       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1486          it is just an out-of-bounds access.  Ignore it.  */
1487       if (op0_piece != const0_rtx)
1488         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1489                                offset * unit + thispos, bitregion_start,
1490                                bitregion_end, part, word_mode, reverse);
1491       bitsdone += thissize;
1492     }
1493 }
1494 \f
1495 /* A subroutine of extract_bit_field_1 that converts return value X
1496    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1497    to extract_bit_field.  */
1498
1499 static rtx
1500 convert_extracted_bit_field (rtx x, machine_mode mode,
1501                              machine_mode tmode, bool unsignedp)
1502 {
1503   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1504     return x;
1505
1506   /* If the x mode is not a scalar integral, first convert to the
1507      integer mode of that size and then access it as a floating-point
1508      value via a SUBREG.  */
1509   if (!SCALAR_INT_MODE_P (tmode))
1510     {
1511       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1512       x = convert_to_mode (int_mode, x, unsignedp);
1513       x = force_reg (int_mode, x);
1514       return gen_lowpart (tmode, x);
1515     }
1516
1517   return convert_to_mode (tmode, x, unsignedp);
1518 }
1519
1520 /* Try to use an ext(z)v pattern to extract a field from OP0.
1521    Return the extracted value on success, otherwise return null.
1522    EXTV describes the extraction instruction to use.  If OP0_MODE
1523    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1524    The other arguments are as for extract_bit_field.  */
1525
1526 static rtx
1527 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1528                               opt_scalar_int_mode op0_mode,
1529                               unsigned HOST_WIDE_INT bitsize,
1530                               unsigned HOST_WIDE_INT bitnum,
1531                               int unsignedp, rtx target,
1532                               machine_mode mode, machine_mode tmode)
1533 {
1534   class expand_operand ops[4];
1535   rtx spec_target = target;
1536   rtx spec_target_subreg = 0;
1537   scalar_int_mode ext_mode = extv->field_mode;
1538   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1539
1540   if (bitsize == 0 || unit < bitsize)
1541     return NULL_RTX;
1542
1543   if (MEM_P (op0))
1544     /* Get a reference to the first byte of the field.  */
1545     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1546                                 &bitnum);
1547   else
1548     {
1549       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1550       if (BYTES_BIG_ENDIAN)
1551         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1552
1553       /* If op0 is a register, we need it in EXT_MODE to make it
1554          acceptable to the format of ext(z)v.  */
1555       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1556         return NULL_RTX;
1557       if (REG_P (op0) && op0_mode.require () != ext_mode)
1558         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1559     }
1560
1561   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1562      "backwards" from the size of the unit we are extracting from.
1563      Otherwise, we count bits from the most significant on a
1564      BYTES/BITS_BIG_ENDIAN machine.  */
1565
1566   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1567     bitnum = unit - bitsize - bitnum;
1568
1569   if (target == 0)
1570     target = spec_target = gen_reg_rtx (tmode);
1571
1572   if (GET_MODE (target) != ext_mode)
1573     {
1574       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1575          between the mode of the extraction (word_mode) and the target
1576          mode.  Instead, create a temporary and use convert_move to set
1577          the target.  */
1578       if (REG_P (target)
1579           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1580         {
1581           target = gen_lowpart (ext_mode, target);
1582           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1583             spec_target_subreg = target;
1584         }
1585       else
1586         target = gen_reg_rtx (ext_mode);
1587     }
1588
1589   create_output_operand (&ops[0], target, ext_mode);
1590   create_fixed_operand (&ops[1], op0);
1591   create_integer_operand (&ops[2], bitsize);
1592   create_integer_operand (&ops[3], bitnum);
1593   if (maybe_expand_insn (extv->icode, 4, ops))
1594     {
1595       target = ops[0].value;
1596       if (target == spec_target)
1597         return target;
1598       if (target == spec_target_subreg)
1599         return spec_target;
1600       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1601     }
1602   return NULL_RTX;
1603 }
1604
1605 /* See whether it would be valid to extract the part of OP0 described
1606    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1607    operation.  Return the subreg if so, otherwise return null.  */
1608
1609 static rtx
1610 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1611                              poly_uint64 bitsize, poly_uint64 bitnum)
1612 {
1613   poly_uint64 bytenum;
1614   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1615       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1616       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1617       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1618     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1619   return NULL_RTX;
1620 }
1621
1622 /* A subroutine of extract_bit_field, with the same arguments.
1623    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1624    if we can find no other means of implementing the operation.
1625    if FALLBACK_P is false, return NULL instead.  */
1626
1627 static rtx
1628 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1629                      int unsignedp, rtx target, machine_mode mode,
1630                      machine_mode tmode, bool reverse, bool fallback_p,
1631                      rtx *alt_rtl)
1632 {
1633   rtx op0 = str_rtx;
1634   machine_mode mode1;
1635
1636   if (tmode == VOIDmode)
1637     tmode = mode;
1638
1639   while (GET_CODE (op0) == SUBREG)
1640     {
1641       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1642       op0 = SUBREG_REG (op0);
1643     }
1644
1645   /* If we have an out-of-bounds access to a register, just return an
1646      uninitialized register of the required mode.  This can occur if the
1647      source code contains an out-of-bounds access to a small array.  */
1648   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1649     return gen_reg_rtx (tmode);
1650
1651   if (REG_P (op0)
1652       && mode == GET_MODE (op0)
1653       && known_eq (bitnum, 0U)
1654       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1655     {
1656       if (reverse)
1657         op0 = flip_storage_order (mode, op0);
1658       /* We're trying to extract a full register from itself.  */
1659       return op0;
1660     }
1661
1662   /* First try to check for vector from vector extractions.  */
1663   if (VECTOR_MODE_P (GET_MODE (op0))
1664       && !MEM_P (op0)
1665       && VECTOR_MODE_P (tmode)
1666       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1667       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1668     {
1669       machine_mode new_mode = GET_MODE (op0);
1670       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1671         {
1672           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1673           poly_uint64 nunits;
1674           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1675                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1676               || !related_vector_mode (tmode, inner_mode,
1677                                        nunits).exists (&new_mode)
1678               || maybe_ne (GET_MODE_SIZE (new_mode),
1679                            GET_MODE_SIZE (GET_MODE (op0))))
1680             new_mode = VOIDmode;
1681         }
1682       poly_uint64 pos;
1683       if (new_mode != VOIDmode
1684           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1685               != CODE_FOR_nothing)
1686           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1687         {
1688           class expand_operand ops[3];
1689           machine_mode outermode = new_mode;
1690           machine_mode innermode = tmode;
1691           enum insn_code icode
1692             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1693
1694           if (new_mode != GET_MODE (op0))
1695             op0 = gen_lowpart (new_mode, op0);
1696           create_output_operand (&ops[0], target, innermode);
1697           ops[0].target = 1;
1698           create_input_operand (&ops[1], op0, outermode);
1699           create_integer_operand (&ops[2], pos);
1700           if (maybe_expand_insn (icode, 3, ops))
1701             {
1702               if (alt_rtl && ops[0].target)
1703                 *alt_rtl = target;
1704               target = ops[0].value;
1705               if (GET_MODE (target) != mode)
1706                 return gen_lowpart (tmode, target);
1707               return target;
1708             }
1709         }
1710     }
1711
1712   /* See if we can get a better vector mode before extracting.  */
1713   if (VECTOR_MODE_P (GET_MODE (op0))
1714       && !MEM_P (op0)
1715       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1716     {
1717       machine_mode new_mode;
1718
1719       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1720         new_mode = MIN_MODE_VECTOR_FLOAT;
1721       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1722         new_mode = MIN_MODE_VECTOR_FRACT;
1723       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1724         new_mode = MIN_MODE_VECTOR_UFRACT;
1725       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1726         new_mode = MIN_MODE_VECTOR_ACCUM;
1727       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1728         new_mode = MIN_MODE_VECTOR_UACCUM;
1729       else
1730         new_mode = MIN_MODE_VECTOR_INT;
1731
1732       FOR_EACH_MODE_FROM (new_mode, new_mode)
1733         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1734             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1735             && targetm.vector_mode_supported_p (new_mode))
1736           break;
1737       if (new_mode != VOIDmode)
1738         op0 = gen_lowpart (new_mode, op0);
1739     }
1740
1741   /* Use vec_extract patterns for extracting parts of vectors whenever
1742      available.  If that fails, see whether the current modes and bitregion
1743      give a natural subreg.  */
1744   machine_mode outermode = GET_MODE (op0);
1745   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1746     {
1747       scalar_mode innermode = GET_MODE_INNER (outermode);
1748       enum insn_code icode
1749         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1750       poly_uint64 pos;
1751       if (icode != CODE_FOR_nothing
1752           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1753           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1754         {
1755           class expand_operand ops[3];
1756
1757           create_output_operand (&ops[0], target, innermode);
1758           ops[0].target = 1;
1759           create_input_operand (&ops[1], op0, outermode);
1760           create_integer_operand (&ops[2], pos);
1761           if (maybe_expand_insn (icode, 3, ops))
1762             {
1763               if (alt_rtl && ops[0].target)
1764                 *alt_rtl = target;
1765               target = ops[0].value;
1766               if (GET_MODE (target) != mode)
1767                 return gen_lowpart (tmode, target);
1768               return target;
1769             }
1770         }
1771       /* Using subregs is useful if we're extracting one register vector
1772          from a multi-register vector.  extract_bit_field_as_subreg checks
1773          for valid bitsize and bitnum, so we don't need to do that here.  */
1774       if (VECTOR_MODE_P (mode))
1775         {
1776           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1777           if (sub)
1778             return sub;
1779         }
1780     }
1781
1782   /* Make sure we are playing with integral modes.  Pun with subregs
1783      if we aren't.  */
1784   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1785   scalar_int_mode imode;
1786   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1787     {
1788       if (MEM_P (op0))
1789         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1790                                             0, MEM_SIZE (op0));
1791       else if (op0_mode.exists (&imode))
1792         {
1793           op0 = gen_lowpart (imode, op0);
1794
1795           /* If we got a SUBREG, force it into a register since we
1796              aren't going to be able to do another SUBREG on it.  */
1797           if (GET_CODE (op0) == SUBREG)
1798             op0 = force_reg (imode, op0);
1799         }
1800       else
1801         {
1802           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1803           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1804           emit_move_insn (mem, op0);
1805           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1806         }
1807     }
1808
1809   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1810      If that's wrong, the solution is to test for it and set TARGET to 0
1811      if needed.  */
1812
1813   /* Get the mode of the field to use for atomic access or subreg
1814      conversion.  */
1815   if (!SCALAR_INT_MODE_P (tmode)
1816       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1817     mode1 = mode;
1818   gcc_assert (mode1 != BLKmode);
1819
1820   /* Extraction of a full MODE1 value can be done with a subreg as long
1821      as the least significant bit of the value is the least significant
1822      bit of either OP0 or a word of OP0.  */
1823   if (!MEM_P (op0) && !reverse)
1824     {
1825       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1826       if (sub)
1827         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1828     }
1829
1830   /* Extraction of a full MODE1 value can be done with a load as long as
1831      the field is on a byte boundary and is sufficiently aligned.  */
1832   poly_uint64 bytenum;
1833   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1834     {
1835       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1836       if (reverse)
1837         op0 = flip_storage_order (mode1, op0);
1838       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1839     }
1840
1841   /* If we have a memory source and a non-constant bit offset, restrict
1842      the memory to the referenced bytes.  This is a worst-case fallback
1843      but is useful for things like vector booleans.  */
1844   if (MEM_P (op0) && !bitnum.is_constant ())
1845     {
1846       bytenum = bits_to_bytes_round_down (bitnum);
1847       bitnum = num_trailing_bits (bitnum);
1848       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1849       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1850       op0_mode = opt_scalar_int_mode ();
1851     }
1852
1853   /* It's possible we'll need to handle other cases here for
1854      polynomial bitnum and bitsize.  */
1855
1856   /* From here on we need to be looking at a fixed-size insertion.  */
1857   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1858                                      bitnum.to_constant (), unsignedp,
1859                                      target, mode, tmode, reverse, fallback_p);
1860 }
1861
1862 /* Subroutine of extract_bit_field_1, with the same arguments, except
1863    that BITSIZE and BITNUM are constant.  Handle cases specific to
1864    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1865    otherwise OP0 is a BLKmode MEM.  */
1866
1867 static rtx
1868 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1869                             unsigned HOST_WIDE_INT bitsize,
1870                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1871                             rtx target, machine_mode mode, machine_mode tmode,
1872                             bool reverse, bool fallback_p)
1873 {
1874   /* Handle fields bigger than a word.  */
1875
1876   if (bitsize > BITS_PER_WORD)
1877     {
1878       /* Here we transfer the words of the field
1879          in the order least significant first.
1880          This is because the most significant word is the one which may
1881          be less than full.  */
1882
1883       const bool backwards = WORDS_BIG_ENDIAN;
1884       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1885       unsigned int i;
1886       rtx_insn *last;
1887
1888       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1889         target = gen_reg_rtx (mode);
1890
1891       /* In case we're about to clobber a base register or something
1892          (see gcc.c-torture/execute/20040625-1.c).   */
1893       if (reg_mentioned_p (target, op0))
1894         target = gen_reg_rtx (mode);
1895
1896       /* Indicate for flow that the entire target reg is being set.  */
1897       emit_clobber (target);
1898
1899       /* The mode must be fixed-size, since extract_bit_field_1 handles
1900          extractions from variable-sized objects before calling this
1901          function.  */
1902       unsigned int target_size
1903         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1904       last = get_last_insn ();
1905       for (i = 0; i < nwords; i++)
1906         {
1907           /* If I is 0, use the low-order word in both field and target;
1908              if I is 1, use the next to lowest word; and so on.  */
1909           /* Word number in TARGET to use.  */
1910           unsigned int wordnum
1911             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1912           /* Offset from start of field in OP0.  */
1913           unsigned int bit_offset = (backwards ^ reverse
1914                                      ? MAX ((int) bitsize - ((int) i + 1)
1915                                             * BITS_PER_WORD,
1916                                             0)
1917                                      : (int) i * BITS_PER_WORD);
1918           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1919           rtx result_part
1920             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1921                                              bitsize - i * BITS_PER_WORD),
1922                                    bitnum + bit_offset, 1, target_part,
1923                                    mode, word_mode, reverse, fallback_p, NULL);
1924
1925           gcc_assert (target_part);
1926           if (!result_part)
1927             {
1928               delete_insns_since (last);
1929               return NULL;
1930             }
1931
1932           if (result_part != target_part)
1933             emit_move_insn (target_part, result_part);
1934         }
1935
1936       if (unsignedp)
1937         {
1938           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1939              need to be zero'd out.  */
1940           if (target_size > nwords * UNITS_PER_WORD)
1941             {
1942               unsigned int i, total_words;
1943
1944               total_words = target_size / UNITS_PER_WORD;
1945               for (i = nwords; i < total_words; i++)
1946                 emit_move_insn
1947                   (operand_subword (target,
1948                                     backwards ? total_words - i - 1 : i,
1949                                     1, VOIDmode),
1950                    const0_rtx);
1951             }
1952           return target;
1953         }
1954
1955       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1956       target = expand_shift (LSHIFT_EXPR, mode, target,
1957                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1958       return expand_shift (RSHIFT_EXPR, mode, target,
1959                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1960     }
1961
1962   /* If OP0 is a multi-word register, narrow it to the affected word.
1963      If the region spans two words, defer to extract_split_bit_field.  */
1964   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1965     {
1966       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1967         {
1968           if (!fallback_p)
1969             return NULL_RTX;
1970           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1971                                             unsignedp, reverse);
1972           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1973         }
1974       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1975                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1976       op0_mode = word_mode;
1977       bitnum %= BITS_PER_WORD;
1978     }
1979
1980   /* From here on we know the desired field is smaller than a word.
1981      If OP0 is a register, it too fits within a word.  */
1982   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1983   extraction_insn extv;
1984   if (!MEM_P (op0)
1985       && !reverse
1986       /* ??? We could limit the structure size to the part of OP0 that
1987          contains the field, with appropriate checks for endianness
1988          and TARGET_TRULY_NOOP_TRUNCATION.  */
1989       && get_best_reg_extraction_insn (&extv, pattern,
1990                                        GET_MODE_BITSIZE (op0_mode.require ()),
1991                                        tmode))
1992     {
1993       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1994                                                  bitsize, bitnum,
1995                                                  unsignedp, target, mode,
1996                                                  tmode);
1997       if (result)
1998         return result;
1999     }
2000
2001   /* If OP0 is a memory, try copying it to a register and seeing if a
2002      cheap register alternative is available.  */
2003   if (MEM_P (op0) & !reverse)
2004     {
2005       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2006                                         tmode))
2007         {
2008           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2009                                                      bitsize, bitnum,
2010                                                      unsignedp, target, mode,
2011                                                      tmode);
2012           if (result)
2013             return result;
2014         }
2015
2016       rtx_insn *last = get_last_insn ();
2017
2018       /* Try loading part of OP0 into a register and extracting the
2019          bitfield from that.  */
2020       unsigned HOST_WIDE_INT bitpos;
2021       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2022                                                0, 0, tmode, &bitpos);
2023       if (xop0)
2024         {
2025           xop0 = copy_to_reg (xop0);
2026           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2027                                             unsignedp, target,
2028                                             mode, tmode, reverse, false, NULL);
2029           if (result)
2030             return result;
2031           delete_insns_since (last);
2032         }
2033     }
2034
2035   if (!fallback_p)
2036     return NULL;
2037
2038   /* Find a correspondingly-sized integer field, so we can apply
2039      shifts and masks to it.  */
2040   scalar_int_mode int_mode;
2041   if (!int_mode_for_mode (tmode).exists (&int_mode))
2042     /* If this fails, we should probably push op0 out to memory and then
2043        do a load.  */
2044     int_mode = int_mode_for_mode (mode).require ();
2045
2046   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2047                                     bitnum, target, unsignedp, reverse);
2048
2049   /* Complex values must be reversed piecewise, so we need to undo the global
2050      reversal, convert to the complex mode and reverse again.  */
2051   if (reverse && COMPLEX_MODE_P (tmode))
2052     {
2053       target = flip_storage_order (int_mode, target);
2054       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2055       target = flip_storage_order (tmode, target);
2056     }
2057   else
2058     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2059
2060   return target;
2061 }
2062
2063 /* Generate code to extract a byte-field from STR_RTX
2064    containing BITSIZE bits, starting at BITNUM,
2065    and put it in TARGET if possible (if TARGET is nonzero).
2066    Regardless of TARGET, we return the rtx for where the value is placed.
2067
2068    STR_RTX is the structure containing the byte (a REG or MEM).
2069    UNSIGNEDP is nonzero if this is an unsigned bit field.
2070    MODE is the natural mode of the field value once extracted.
2071    TMODE is the mode the caller would like the value to have;
2072    but the value may be returned with type MODE instead.
2073
2074    If REVERSE is true, the extraction is to be done in reverse order.
2075
2076    If a TARGET is specified and we can store in it at no extra cost,
2077    we do so, and return TARGET.
2078    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2079    if they are equally easy.
2080
2081    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2082    then *ALT_RTL is set to TARGET (before legitimziation).  */
2083
2084 rtx
2085 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2086                    int unsignedp, rtx target, machine_mode mode,
2087                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2088 {
2089   machine_mode mode1;
2090
2091   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2092   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2093     mode1 = GET_MODE (str_rtx);
2094   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2095     mode1 = GET_MODE (target);
2096   else
2097     mode1 = tmode;
2098
2099   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2100   scalar_int_mode int_mode;
2101   if (bitsize.is_constant (&ibitsize)
2102       && bitnum.is_constant (&ibitnum)
2103       && is_a <scalar_int_mode> (mode1, &int_mode)
2104       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2105                                      int_mode, 0, 0))
2106     {
2107       /* Extraction of a full INT_MODE value can be done with a simple load.
2108          We know here that the field can be accessed with one single
2109          instruction.  For targets that support unaligned memory,
2110          an unaligned access may be necessary.  */
2111       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2112         {
2113           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2114                                                 ibitnum / BITS_PER_UNIT);
2115           if (reverse)
2116             result = flip_storage_order (int_mode, result);
2117           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2118           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2119         }
2120
2121       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2122                                       &ibitnum);
2123       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2124       str_rtx = copy_to_reg (str_rtx);
2125       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2126                                   target, mode, tmode, reverse, true, alt_rtl);
2127     }
2128
2129   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2130                               target, mode, tmode, reverse, true, alt_rtl);
2131 }
2132 \f
2133 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2134    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2135    otherwise OP0 is a BLKmode MEM.
2136
2137    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2138    If REVERSE is true, the extraction is to be done in reverse order.
2139
2140    If TARGET is nonzero, attempts to store the value there
2141    and return TARGET, but this is not guaranteed.
2142    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2143
2144 static rtx
2145 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2146                          opt_scalar_int_mode op0_mode,
2147                          unsigned HOST_WIDE_INT bitsize,
2148                          unsigned HOST_WIDE_INT bitnum, rtx target,
2149                          int unsignedp, bool reverse)
2150 {
2151   scalar_int_mode mode;
2152   if (MEM_P (op0))
2153     {
2154       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2155                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2156         /* The only way this should occur is if the field spans word
2157            boundaries.  */
2158         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2159                                         unsignedp, reverse);
2160
2161       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2162     }
2163   else
2164     mode = op0_mode.require ();
2165
2166   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2167                                     target, unsignedp, reverse);
2168 }
2169
2170 /* Helper function for extract_fixed_bit_field, extracts
2171    the bit field always using MODE, which is the mode of OP0.
2172    The other arguments are as for extract_fixed_bit_field.  */
2173
2174 static rtx
2175 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2176                            unsigned HOST_WIDE_INT bitsize,
2177                            unsigned HOST_WIDE_INT bitnum, rtx target,
2178                            int unsignedp, bool reverse)
2179 {
2180   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2181      for invalid input, such as extract equivalent of f5 from
2182      gcc.dg/pr48335-2.c.  */
2183
2184   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2185     /* BITNUM is the distance between our msb and that of OP0.
2186        Convert it to the distance from the lsb.  */
2187     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2188
2189   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2190      We have reduced the big-endian case to the little-endian case.  */
2191   if (reverse)
2192     op0 = flip_storage_order (mode, op0);
2193
2194   if (unsignedp)
2195     {
2196       if (bitnum)
2197         {
2198           /* If the field does not already start at the lsb,
2199              shift it so it does.  */
2200           /* Maybe propagate the target for the shift.  */
2201           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2202           if (tmode != mode)
2203             subtarget = 0;
2204           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2205         }
2206       /* Convert the value to the desired mode.  TMODE must also be a
2207          scalar integer for this conversion to make sense, since we
2208          shouldn't reinterpret the bits.  */
2209       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2210       if (mode != new_mode)
2211         op0 = convert_to_mode (new_mode, op0, 1);
2212
2213       /* Unless the msb of the field used to be the msb when we shifted,
2214          mask out the upper bits.  */
2215
2216       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2217         return expand_binop (new_mode, and_optab, op0,
2218                              mask_rtx (new_mode, 0, bitsize, 0),
2219                              target, 1, OPTAB_LIB_WIDEN);
2220       return op0;
2221     }
2222
2223   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2224      then arithmetic-shift its lsb to the lsb of the word.  */
2225   op0 = force_reg (mode, op0);
2226
2227   /* Find the narrowest integer mode that contains the field.  */
2228
2229   opt_scalar_int_mode mode_iter;
2230   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2231     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2232       break;
2233
2234   mode = mode_iter.require ();
2235   op0 = convert_to_mode (mode, op0, 0);
2236
2237   if (mode != tmode)
2238     target = 0;
2239
2240   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2241     {
2242       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2243       /* Maybe propagate the target for the shift.  */
2244       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2245       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2246     }
2247
2248   return expand_shift (RSHIFT_EXPR, mode, op0,
2249                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2250 }
2251
2252 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2253    VALUE << BITPOS.  */
2254
2255 static rtx
2256 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2257               int bitpos)
2258 {
2259   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2260 }
2261 \f
2262 /* Extract a bit field that is split across two words
2263    and return an RTX for the result.
2264
2265    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2266    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2267    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2268    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2269    a BLKmode MEM.
2270
2271    If REVERSE is true, the extraction is to be done in reverse order.  */
2272
2273 static rtx
2274 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2275                          unsigned HOST_WIDE_INT bitsize,
2276                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2277                          bool reverse)
2278 {
2279   unsigned int unit;
2280   unsigned int bitsdone = 0;
2281   rtx result = NULL_RTX;
2282   int first = 1;
2283
2284   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2285      much at a time.  */
2286   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2287     unit = BITS_PER_WORD;
2288   else
2289     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2290
2291   while (bitsdone < bitsize)
2292     {
2293       unsigned HOST_WIDE_INT thissize;
2294       rtx part;
2295       unsigned HOST_WIDE_INT thispos;
2296       unsigned HOST_WIDE_INT offset;
2297
2298       offset = (bitpos + bitsdone) / unit;
2299       thispos = (bitpos + bitsdone) % unit;
2300
2301       /* THISSIZE must not overrun a word boundary.  Otherwise,
2302          extract_fixed_bit_field will call us again, and we will mutually
2303          recurse forever.  */
2304       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2305       thissize = MIN (thissize, unit - thispos);
2306
2307       /* If OP0 is a register, then handle OFFSET here.  */
2308       rtx op0_piece = op0;
2309       opt_scalar_int_mode op0_piece_mode = op0_mode;
2310       if (SUBREG_P (op0) || REG_P (op0))
2311         {
2312           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2313           op0_piece_mode = word_mode;
2314           offset = 0;
2315         }
2316
2317       /* Extract the parts in bit-counting order,
2318          whose meaning is determined by BYTES_PER_UNIT.
2319          OFFSET is in UNITs, and UNIT is in bits.  */
2320       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2321                                       thissize, offset * unit + thispos,
2322                                       0, 1, reverse);
2323       bitsdone += thissize;
2324
2325       /* Shift this part into place for the result.  */
2326       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2327         {
2328           if (bitsize != bitsdone)
2329             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2330                                  bitsize - bitsdone, 0, 1);
2331         }
2332       else
2333         {
2334           if (bitsdone != thissize)
2335             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2336                                  bitsdone - thissize, 0, 1);
2337         }
2338
2339       if (first)
2340         result = part;
2341       else
2342         /* Combine the parts with bitwise or.  This works
2343            because we extracted each part as an unsigned bit field.  */
2344         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2345                                OPTAB_LIB_WIDEN);
2346
2347       first = 0;
2348     }
2349
2350   /* Unsigned bit field: we are done.  */
2351   if (unsignedp)
2352     return result;
2353   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2354   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2355                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2356   return expand_shift (RSHIFT_EXPR, word_mode, result,
2357                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2358 }
2359 \f
2360 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2361    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2362    MODE, fill the upper bits with zeros.  Fail if the layout of either
2363    mode is unknown (as for CC modes) or if the extraction would involve
2364    unprofitable mode punning.  Return the value on success, otherwise
2365    return null.
2366
2367    This is different from gen_lowpart* in these respects:
2368
2369      - the returned value must always be considered an rvalue
2370
2371      - when MODE is wider than SRC_MODE, the extraction involves
2372        a zero extension
2373
2374      - when MODE is smaller than SRC_MODE, the extraction involves
2375        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2376
2377    In other words, this routine performs a computation, whereas the
2378    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2379    operations.  */
2380
2381 rtx
2382 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2383 {
2384   scalar_int_mode int_mode, src_int_mode;
2385
2386   if (mode == src_mode)
2387     return src;
2388
2389   if (CONSTANT_P (src))
2390     {
2391       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2392          fails, it will happily create (subreg (symbol_ref)) or similar
2393          invalid SUBREGs.  */
2394       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2395       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2396       if (ret)
2397         return ret;
2398
2399       if (GET_MODE (src) == VOIDmode
2400           || !validate_subreg (mode, src_mode, src, byte))
2401         return NULL_RTX;
2402
2403       src = force_reg (GET_MODE (src), src);
2404       return gen_rtx_SUBREG (mode, src, byte);
2405     }
2406
2407   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2408     return NULL_RTX;
2409
2410   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2411       && targetm.modes_tieable_p (mode, src_mode))
2412     {
2413       rtx x = gen_lowpart_common (mode, src);
2414       if (x)
2415         return x;
2416     }
2417
2418   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2419       || !int_mode_for_mode (mode).exists (&int_mode))
2420     return NULL_RTX;
2421
2422   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2423     return NULL_RTX;
2424   if (!targetm.modes_tieable_p (int_mode, mode))
2425     return NULL_RTX;
2426
2427   src = gen_lowpart (src_int_mode, src);
2428   if (!validate_subreg (int_mode, src_int_mode, src,
2429                         subreg_lowpart_offset (int_mode, src_int_mode)))
2430     return NULL_RTX;
2431
2432   src = convert_modes (int_mode, src_int_mode, src, true);
2433   src = gen_lowpart (mode, src);
2434   return src;
2435 }
2436 \f
2437 /* Add INC into TARGET.  */
2438
2439 void
2440 expand_inc (rtx target, rtx inc)
2441 {
2442   rtx value = expand_binop (GET_MODE (target), add_optab,
2443                             target, inc,
2444                             target, 0, OPTAB_LIB_WIDEN);
2445   if (value != target)
2446     emit_move_insn (target, value);
2447 }
2448
2449 /* Subtract DEC from TARGET.  */
2450
2451 void
2452 expand_dec (rtx target, rtx dec)
2453 {
2454   rtx value = expand_binop (GET_MODE (target), sub_optab,
2455                             target, dec,
2456                             target, 0, OPTAB_LIB_WIDEN);
2457   if (value != target)
2458     emit_move_insn (target, value);
2459 }
2460 \f
2461 /* Output a shift instruction for expression code CODE,
2462    with SHIFTED being the rtx for the value to shift,
2463    and AMOUNT the rtx for the amount to shift by.
2464    Store the result in the rtx TARGET, if that is convenient.
2465    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2466    Return the rtx for where the value is.
2467    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2468    in which case 0 is returned.  */
2469
2470 static rtx
2471 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2472                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2473 {
2474   rtx op1, temp = 0;
2475   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2476   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2477   optab lshift_optab = ashl_optab;
2478   optab rshift_arith_optab = ashr_optab;
2479   optab rshift_uns_optab = lshr_optab;
2480   optab lrotate_optab = rotl_optab;
2481   optab rrotate_optab = rotr_optab;
2482   machine_mode op1_mode;
2483   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2484   int attempt;
2485   bool speed = optimize_insn_for_speed_p ();
2486
2487   op1 = amount;
2488   op1_mode = GET_MODE (op1);
2489
2490   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2491      shift amount is a vector, use the vector/vector shift patterns.  */
2492   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2493     {
2494       lshift_optab = vashl_optab;
2495       rshift_arith_optab = vashr_optab;
2496       rshift_uns_optab = vlshr_optab;
2497       lrotate_optab = vrotl_optab;
2498       rrotate_optab = vrotr_optab;
2499     }
2500
2501   /* Previously detected shift-counts computed by NEGATE_EXPR
2502      and shifted in the other direction; but that does not work
2503      on all machines.  */
2504
2505   if (SHIFT_COUNT_TRUNCATED)
2506     {
2507       if (CONST_INT_P (op1)
2508           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2509               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2510         op1 = gen_int_shift_amount (mode,
2511                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2512                                     % GET_MODE_BITSIZE (scalar_mode));
2513       else if (GET_CODE (op1) == SUBREG
2514                && subreg_lowpart_p (op1)
2515                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2516                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2517         op1 = SUBREG_REG (op1);
2518     }
2519
2520   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2521      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2522      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2523      amount instead.  */
2524   if (rotate
2525       && CONST_INT_P (op1)
2526       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2527                    GET_MODE_BITSIZE (scalar_mode) - 1))
2528     {
2529       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2530                                          - INTVAL (op1)));
2531       left = !left;
2532       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2533     }
2534
2535   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2536      Note that this is not the case for bigger values.  For instance a rotation
2537      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2538      0x04030201 (bswapsi).  */
2539   if (rotate
2540       && CONST_INT_P (op1)
2541       && INTVAL (op1) == BITS_PER_UNIT
2542       && GET_MODE_SIZE (scalar_mode) == 2
2543       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2544     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2545
2546   if (op1 == const0_rtx)
2547     return shifted;
2548
2549   /* Check whether its cheaper to implement a left shift by a constant
2550      bit count by a sequence of additions.  */
2551   if (code == LSHIFT_EXPR
2552       && CONST_INT_P (op1)
2553       && INTVAL (op1) > 0
2554       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2555       && INTVAL (op1) < MAX_BITS_PER_WORD
2556       && (shift_cost (speed, mode, INTVAL (op1))
2557           > INTVAL (op1) * add_cost (speed, mode))
2558       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2559     {
2560       int i;
2561       for (i = 0; i < INTVAL (op1); i++)
2562         {
2563           temp = force_reg (mode, shifted);
2564           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2565                                   unsignedp, OPTAB_LIB_WIDEN);
2566         }
2567       return shifted;
2568     }
2569
2570   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2571     {
2572       enum optab_methods methods;
2573
2574       if (attempt == 0)
2575         methods = OPTAB_DIRECT;
2576       else if (attempt == 1)
2577         methods = OPTAB_WIDEN;
2578       else
2579         methods = OPTAB_LIB_WIDEN;
2580
2581       if (rotate)
2582         {
2583           /* Widening does not work for rotation.  */
2584           if (methods == OPTAB_WIDEN)
2585             continue;
2586           else if (methods == OPTAB_LIB_WIDEN)
2587             {
2588               /* If we have been unable to open-code this by a rotation,
2589                  do it as the IOR of two shifts.  I.e., to rotate A
2590                  by N bits, compute
2591                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2592                  where C is the bitsize of A.
2593
2594                  It is theoretically possible that the target machine might
2595                  not be able to perform either shift and hence we would
2596                  be making two libcalls rather than just the one for the
2597                  shift (similarly if IOR could not be done).  We will allow
2598                  this extremely unlikely lossage to avoid complicating the
2599                  code below.  */
2600
2601               rtx subtarget = target == shifted ? 0 : target;
2602               rtx new_amount, other_amount;
2603               rtx temp1;
2604
2605               new_amount = op1;
2606               if (op1 == const0_rtx)
2607                 return shifted;
2608               else if (CONST_INT_P (op1))
2609                 other_amount = gen_int_shift_amount
2610                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2611               else
2612                 {
2613                   other_amount
2614                     = simplify_gen_unary (NEG, GET_MODE (op1),
2615                                           op1, GET_MODE (op1));
2616                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2617                   other_amount
2618                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2619                                            gen_int_mode (mask, GET_MODE (op1)));
2620                 }
2621
2622               shifted = force_reg (mode, shifted);
2623
2624               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2625                                      mode, shifted, new_amount, 0, 1);
2626               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2627                                       mode, shifted, other_amount,
2628                                       subtarget, 1);
2629               return expand_binop (mode, ior_optab, temp, temp1, target,
2630                                    unsignedp, methods);
2631             }
2632
2633           temp = expand_binop (mode,
2634                                left ? lrotate_optab : rrotate_optab,
2635                                shifted, op1, target, unsignedp, methods);
2636         }
2637       else if (unsignedp)
2638         temp = expand_binop (mode,
2639                              left ? lshift_optab : rshift_uns_optab,
2640                              shifted, op1, target, unsignedp, methods);
2641
2642       /* Do arithmetic shifts.
2643          Also, if we are going to widen the operand, we can just as well
2644          use an arithmetic right-shift instead of a logical one.  */
2645       if (temp == 0 && ! rotate
2646           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2647         {
2648           enum optab_methods methods1 = methods;
2649
2650           /* If trying to widen a log shift to an arithmetic shift,
2651              don't accept an arithmetic shift of the same size.  */
2652           if (unsignedp)
2653             methods1 = OPTAB_MUST_WIDEN;
2654
2655           /* Arithmetic shift */
2656
2657           temp = expand_binop (mode,
2658                                left ? lshift_optab : rshift_arith_optab,
2659                                shifted, op1, target, unsignedp, methods1);
2660         }
2661
2662       /* We used to try extzv here for logical right shifts, but that was
2663          only useful for one machine, the VAX, and caused poor code
2664          generation there for lshrdi3, so the code was deleted and a
2665          define_expand for lshrsi3 was added to vax.md.  */
2666     }
2667
2668   gcc_assert (temp != NULL_RTX || may_fail);
2669   return temp;
2670 }
2671
2672 /* Output a shift instruction for expression code CODE,
2673    with SHIFTED being the rtx for the value to shift,
2674    and AMOUNT the amount to shift by.
2675    Store the result in the rtx TARGET, if that is convenient.
2676    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2677    Return the rtx for where the value is.  */
2678
2679 rtx
2680 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2681               poly_int64 amount, rtx target, int unsignedp)
2682 {
2683   return expand_shift_1 (code, mode, shifted,
2684                          gen_int_shift_amount (mode, amount),
2685                          target, unsignedp);
2686 }
2687
2688 /* Likewise, but return 0 if that cannot be done.  */
2689
2690 static rtx
2691 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2692                     int amount, rtx target, int unsignedp)
2693 {
2694   return expand_shift_1 (code, mode,
2695                          shifted, GEN_INT (amount), target, unsignedp, true);
2696 }
2697
2698 /* Output a shift instruction for expression code CODE,
2699    with SHIFTED being the rtx for the value to shift,
2700    and AMOUNT the tree for the amount to shift by.
2701    Store the result in the rtx TARGET, if that is convenient.
2702    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2703    Return the rtx for where the value is.  */
2704
2705 rtx
2706 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2707                        tree amount, rtx target, int unsignedp)
2708 {
2709   return expand_shift_1 (code, mode,
2710                          shifted, expand_normal (amount), target, unsignedp);
2711 }
2712
2713 \f
2714 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2715                         const struct mult_cost *, machine_mode mode);
2716 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2717                               const struct algorithm *, enum mult_variant);
2718 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2719 static rtx extract_high_half (scalar_int_mode, rtx);
2720 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2721 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2722                                        int, int);
2723 /* Compute and return the best algorithm for multiplying by T.
2724    The algorithm must cost less than cost_limit
2725    If retval.cost >= COST_LIMIT, no algorithm was found and all
2726    other field of the returned struct are undefined.
2727    MODE is the machine mode of the multiplication.  */
2728
2729 static void
2730 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2731             const struct mult_cost *cost_limit, machine_mode mode)
2732 {
2733   int m;
2734   struct algorithm *alg_in, *best_alg;
2735   struct mult_cost best_cost;
2736   struct mult_cost new_limit;
2737   int op_cost, op_latency;
2738   unsigned HOST_WIDE_INT orig_t = t;
2739   unsigned HOST_WIDE_INT q;
2740   int maxm, hash_index;
2741   bool cache_hit = false;
2742   enum alg_code cache_alg = alg_zero;
2743   bool speed = optimize_insn_for_speed_p ();
2744   scalar_int_mode imode;
2745   struct alg_hash_entry *entry_ptr;
2746
2747   /* Indicate that no algorithm is yet found.  If no algorithm
2748      is found, this value will be returned and indicate failure.  */
2749   alg_out->cost.cost = cost_limit->cost + 1;
2750   alg_out->cost.latency = cost_limit->latency + 1;
2751
2752   if (cost_limit->cost < 0
2753       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2754     return;
2755
2756   /* Be prepared for vector modes.  */
2757   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2758
2759   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2760
2761   /* Restrict the bits of "t" to the multiplication's mode.  */
2762   t &= GET_MODE_MASK (imode);
2763
2764   /* t == 1 can be done in zero cost.  */
2765   if (t == 1)
2766     {
2767       alg_out->ops = 1;
2768       alg_out->cost.cost = 0;
2769       alg_out->cost.latency = 0;
2770       alg_out->op[0] = alg_m;
2771       return;
2772     }
2773
2774   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2775      fail now.  */
2776   if (t == 0)
2777     {
2778       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2779         return;
2780       else
2781         {
2782           alg_out->ops = 1;
2783           alg_out->cost.cost = zero_cost (speed);
2784           alg_out->cost.latency = zero_cost (speed);
2785           alg_out->op[0] = alg_zero;
2786           return;
2787         }
2788     }
2789
2790   /* We'll be needing a couple extra algorithm structures now.  */
2791
2792   alg_in = XALLOCA (struct algorithm);
2793   best_alg = XALLOCA (struct algorithm);
2794   best_cost = *cost_limit;
2795
2796   /* Compute the hash index.  */
2797   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2798
2799   /* See if we already know what to do for T.  */
2800   entry_ptr = alg_hash_entry_ptr (hash_index);
2801   if (entry_ptr->t == t
2802       && entry_ptr->mode == mode
2803       && entry_ptr->speed == speed
2804       && entry_ptr->alg != alg_unknown)
2805     {
2806       cache_alg = entry_ptr->alg;
2807
2808       if (cache_alg == alg_impossible)
2809         {
2810           /* The cache tells us that it's impossible to synthesize
2811              multiplication by T within entry_ptr->cost.  */
2812           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2813             /* COST_LIMIT is at least as restrictive as the one
2814                recorded in the hash table, in which case we have no
2815                hope of synthesizing a multiplication.  Just
2816                return.  */
2817             return;
2818
2819           /* If we get here, COST_LIMIT is less restrictive than the
2820              one recorded in the hash table, so we may be able to
2821              synthesize a multiplication.  Proceed as if we didn't
2822              have the cache entry.  */
2823         }
2824       else
2825         {
2826           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2827             /* The cached algorithm shows that this multiplication
2828                requires more cost than COST_LIMIT.  Just return.  This
2829                way, we don't clobber this cache entry with
2830                alg_impossible but retain useful information.  */
2831             return;
2832
2833           cache_hit = true;
2834
2835           switch (cache_alg)
2836             {
2837             case alg_shift:
2838               goto do_alg_shift;
2839
2840             case alg_add_t_m2:
2841             case alg_sub_t_m2:
2842               goto do_alg_addsub_t_m2;
2843
2844             case alg_add_factor:
2845             case alg_sub_factor:
2846               goto do_alg_addsub_factor;
2847
2848             case alg_add_t2_m:
2849               goto do_alg_add_t2_m;
2850
2851             case alg_sub_t2_m:
2852               goto do_alg_sub_t2_m;
2853
2854             default:
2855               gcc_unreachable ();
2856             }
2857         }
2858     }
2859
2860   /* If we have a group of zero bits at the low-order part of T, try
2861      multiplying by the remaining bits and then doing a shift.  */
2862
2863   if ((t & 1) == 0)
2864     {
2865     do_alg_shift:
2866       m = ctz_or_zero (t); /* m = number of low zero bits */
2867       if (m < maxm)
2868         {
2869           q = t >> m;
2870           /* The function expand_shift will choose between a shift and
2871              a sequence of additions, so the observed cost is given as
2872              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2873           op_cost = m * add_cost (speed, mode);
2874           if (shift_cost (speed, mode, m) < op_cost)
2875             op_cost = shift_cost (speed, mode, m);
2876           new_limit.cost = best_cost.cost - op_cost;
2877           new_limit.latency = best_cost.latency - op_cost;
2878           synth_mult (alg_in, q, &new_limit, mode);
2879
2880           alg_in->cost.cost += op_cost;
2881           alg_in->cost.latency += op_cost;
2882           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2883             {
2884               best_cost = alg_in->cost;
2885               std::swap (alg_in, best_alg);
2886               best_alg->log[best_alg->ops] = m;
2887               best_alg->op[best_alg->ops] = alg_shift;
2888             }
2889
2890           /* See if treating ORIG_T as a signed number yields a better
2891              sequence.  Try this sequence only for a negative ORIG_T
2892              as it would be useless for a non-negative ORIG_T.  */
2893           if ((HOST_WIDE_INT) orig_t < 0)
2894             {
2895               /* Shift ORIG_T as follows because a right shift of a
2896                  negative-valued signed type is implementation
2897                  defined.  */
2898               q = ~(~orig_t >> m);
2899               /* The function expand_shift will choose between a shift
2900                  and a sequence of additions, so the observed cost is
2901                  given as MIN (m * add_cost(speed, mode),
2902                  shift_cost(speed, mode, m)).  */
2903               op_cost = m * add_cost (speed, mode);
2904               if (shift_cost (speed, mode, m) < op_cost)
2905                 op_cost = shift_cost (speed, mode, m);
2906               new_limit.cost = best_cost.cost - op_cost;
2907               new_limit.latency = best_cost.latency - op_cost;
2908               synth_mult (alg_in, q, &new_limit, mode);
2909
2910               alg_in->cost.cost += op_cost;
2911               alg_in->cost.latency += op_cost;
2912               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2913                 {
2914                   best_cost = alg_in->cost;
2915                   std::swap (alg_in, best_alg);
2916                   best_alg->log[best_alg->ops] = m;
2917                   best_alg->op[best_alg->ops] = alg_shift;
2918                 }
2919             }
2920         }
2921       if (cache_hit)
2922         goto done;
2923     }
2924
2925   /* If we have an odd number, add or subtract one.  */
2926   if ((t & 1) != 0)
2927     {
2928       unsigned HOST_WIDE_INT w;
2929
2930     do_alg_addsub_t_m2:
2931       for (w = 1; (w & t) != 0; w <<= 1)
2932         ;
2933       /* If T was -1, then W will be zero after the loop.  This is another
2934          case where T ends with ...111.  Handling this with (T + 1) and
2935          subtract 1 produces slightly better code and results in algorithm
2936          selection much faster than treating it like the ...0111 case
2937          below.  */
2938       if (w == 0
2939           || (w > 2
2940               /* Reject the case where t is 3.
2941                  Thus we prefer addition in that case.  */
2942               && t != 3))
2943         {
2944           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2945
2946           op_cost = add_cost (speed, mode);
2947           new_limit.cost = best_cost.cost - op_cost;
2948           new_limit.latency = best_cost.latency - op_cost;
2949           synth_mult (alg_in, t + 1, &new_limit, mode);
2950
2951           alg_in->cost.cost += op_cost;
2952           alg_in->cost.latency += op_cost;
2953           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2954             {
2955               best_cost = alg_in->cost;
2956               std::swap (alg_in, best_alg);
2957               best_alg->log[best_alg->ops] = 0;
2958               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2959             }
2960         }
2961       else
2962         {
2963           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2964
2965           op_cost = add_cost (speed, mode);
2966           new_limit.cost = best_cost.cost - op_cost;
2967           new_limit.latency = best_cost.latency - op_cost;
2968           synth_mult (alg_in, t - 1, &new_limit, mode);
2969
2970           alg_in->cost.cost += op_cost;
2971           alg_in->cost.latency += op_cost;
2972           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2973             {
2974               best_cost = alg_in->cost;
2975               std::swap (alg_in, best_alg);
2976               best_alg->log[best_alg->ops] = 0;
2977               best_alg->op[best_alg->ops] = alg_add_t_m2;
2978             }
2979         }
2980
2981       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2982          quickly with a - a * n for some appropriate constant n.  */
2983       m = exact_log2 (-orig_t + 1);
2984       if (m >= 0 && m < maxm)
2985         {
2986           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2987           /* If the target has a cheap shift-and-subtract insn use
2988              that in preference to a shift insn followed by a sub insn.
2989              Assume that the shift-and-sub is "atomic" with a latency
2990              equal to it's cost, otherwise assume that on superscalar
2991              hardware the shift may be executed concurrently with the
2992              earlier steps in the algorithm.  */
2993           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2994             {
2995               op_cost = shiftsub1_cost (speed, mode, m);
2996               op_latency = op_cost;
2997             }
2998           else
2999             op_latency = add_cost (speed, mode);
3000
3001           new_limit.cost = best_cost.cost - op_cost;
3002           new_limit.latency = best_cost.latency - op_latency;
3003           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3004                       &new_limit, mode);
3005
3006           alg_in->cost.cost += op_cost;
3007           alg_in->cost.latency += op_latency;
3008           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3009             {
3010               best_cost = alg_in->cost;
3011               std::swap (alg_in, best_alg);
3012               best_alg->log[best_alg->ops] = m;
3013               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3014             }
3015         }
3016
3017       if (cache_hit)
3018         goto done;
3019     }
3020
3021   /* Look for factors of t of the form
3022      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3023      If we find such a factor, we can multiply by t using an algorithm that
3024      multiplies by q, shift the result by m and add/subtract it to itself.
3025
3026      We search for large factors first and loop down, even if large factors
3027      are less probable than small; if we find a large factor we will find a
3028      good sequence quickly, and therefore be able to prune (by decreasing
3029      COST_LIMIT) the search.  */
3030
3031  do_alg_addsub_factor:
3032   for (m = floor_log2 (t - 1); m >= 2; m--)
3033     {
3034       unsigned HOST_WIDE_INT d;
3035
3036       d = (HOST_WIDE_INT_1U << m) + 1;
3037       if (t % d == 0 && t > d && m < maxm
3038           && (!cache_hit || cache_alg == alg_add_factor))
3039         {
3040           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3041           if (shiftadd_cost (speed, mode, m) <= op_cost)
3042             op_cost = shiftadd_cost (speed, mode, m);
3043
3044           op_latency = op_cost;
3045
3046
3047           new_limit.cost = best_cost.cost - op_cost;
3048           new_limit.latency = best_cost.latency - op_latency;
3049           synth_mult (alg_in, t / d, &new_limit, mode);
3050
3051           alg_in->cost.cost += op_cost;
3052           alg_in->cost.latency += op_latency;
3053           if (alg_in->cost.latency < op_cost)
3054             alg_in->cost.latency = op_cost;
3055           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3056             {
3057               best_cost = alg_in->cost;
3058               std::swap (alg_in, best_alg);
3059               best_alg->log[best_alg->ops] = m;
3060               best_alg->op[best_alg->ops] = alg_add_factor;
3061             }
3062           /* Other factors will have been taken care of in the recursion.  */
3063           break;
3064         }
3065
3066       d = (HOST_WIDE_INT_1U << m) - 1;
3067       if (t % d == 0 && t > d && m < maxm
3068           && (!cache_hit || cache_alg == alg_sub_factor))
3069         {
3070           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3071           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3072             op_cost = shiftsub0_cost (speed, mode, m);
3073
3074           op_latency = op_cost;
3075
3076           new_limit.cost = best_cost.cost - op_cost;
3077           new_limit.latency = best_cost.latency - op_latency;
3078           synth_mult (alg_in, t / d, &new_limit, mode);
3079
3080           alg_in->cost.cost += op_cost;
3081           alg_in->cost.latency += op_latency;
3082           if (alg_in->cost.latency < op_cost)
3083             alg_in->cost.latency = op_cost;
3084           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3085             {
3086               best_cost = alg_in->cost;
3087               std::swap (alg_in, best_alg);
3088               best_alg->log[best_alg->ops] = m;
3089               best_alg->op[best_alg->ops] = alg_sub_factor;
3090             }
3091           break;
3092         }
3093     }
3094   if (cache_hit)
3095     goto done;
3096
3097   /* Try shift-and-add (load effective address) instructions,
3098      i.e. do a*3, a*5, a*9.  */
3099   if ((t & 1) != 0)
3100     {
3101     do_alg_add_t2_m:
3102       q = t - 1;
3103       m = ctz_hwi (q);
3104       if (q && m < maxm)
3105         {
3106           op_cost = shiftadd_cost (speed, mode, m);
3107           new_limit.cost = best_cost.cost - op_cost;
3108           new_limit.latency = best_cost.latency - op_cost;
3109           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3110
3111           alg_in->cost.cost += op_cost;
3112           alg_in->cost.latency += op_cost;
3113           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3114             {
3115               best_cost = alg_in->cost;
3116               std::swap (alg_in, best_alg);
3117               best_alg->log[best_alg->ops] = m;
3118               best_alg->op[best_alg->ops] = alg_add_t2_m;
3119             }
3120         }
3121       if (cache_hit)
3122         goto done;
3123
3124     do_alg_sub_t2_m:
3125       q = t + 1;
3126       m = ctz_hwi (q);
3127       if (q && m < maxm)
3128         {
3129           op_cost = shiftsub0_cost (speed, mode, m);
3130           new_limit.cost = best_cost.cost - op_cost;
3131           new_limit.latency = best_cost.latency - op_cost;
3132           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3133
3134           alg_in->cost.cost += op_cost;
3135           alg_in->cost.latency += op_cost;
3136           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3137             {
3138               best_cost = alg_in->cost;
3139               std::swap (alg_in, best_alg);
3140               best_alg->log[best_alg->ops] = m;
3141               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3142             }
3143         }
3144       if (cache_hit)
3145         goto done;
3146     }
3147
3148  done:
3149   /* If best_cost has not decreased, we have not found any algorithm.  */
3150   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3151     {
3152       /* We failed to find an algorithm.  Record alg_impossible for
3153          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3154          we are asked to find an algorithm for T within the same or
3155          lower COST_LIMIT, we can immediately return to the
3156          caller.  */
3157       entry_ptr->t = t;
3158       entry_ptr->mode = mode;
3159       entry_ptr->speed = speed;
3160       entry_ptr->alg = alg_impossible;
3161       entry_ptr->cost = *cost_limit;
3162       return;
3163     }
3164
3165   /* Cache the result.  */
3166   if (!cache_hit)
3167     {
3168       entry_ptr->t = t;
3169       entry_ptr->mode = mode;
3170       entry_ptr->speed = speed;
3171       entry_ptr->alg = best_alg->op[best_alg->ops];
3172       entry_ptr->cost.cost = best_cost.cost;
3173       entry_ptr->cost.latency = best_cost.latency;
3174     }
3175
3176   /* If we are getting a too long sequence for `struct algorithm'
3177      to record, make this search fail.  */
3178   if (best_alg->ops == MAX_BITS_PER_WORD)
3179     return;
3180
3181   /* Copy the algorithm from temporary space to the space at alg_out.
3182      We avoid using structure assignment because the majority of
3183      best_alg is normally undefined, and this is a critical function.  */
3184   alg_out->ops = best_alg->ops + 1;
3185   alg_out->cost = best_cost;
3186   memcpy (alg_out->op, best_alg->op,
3187           alg_out->ops * sizeof *alg_out->op);
3188   memcpy (alg_out->log, best_alg->log,
3189           alg_out->ops * sizeof *alg_out->log);
3190 }
3191 \f
3192 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3193    Try three variations:
3194
3195        - a shift/add sequence based on VAL itself
3196        - a shift/add sequence based on -VAL, followed by a negation
3197        - a shift/add sequence based on VAL - 1, followed by an addition.
3198
3199    Return true if the cheapest of these cost less than MULT_COST,
3200    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3201
3202 bool
3203 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3204                      struct algorithm *alg, enum mult_variant *variant,
3205                      int mult_cost)
3206 {
3207   struct algorithm alg2;
3208   struct mult_cost limit;
3209   int op_cost;
3210   bool speed = optimize_insn_for_speed_p ();
3211
3212   /* Fail quickly for impossible bounds.  */
3213   if (mult_cost < 0)
3214     return false;
3215
3216   /* Ensure that mult_cost provides a reasonable upper bound.
3217      Any constant multiplication can be performed with less
3218      than 2 * bits additions.  */
3219   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3220   if (mult_cost > op_cost)
3221     mult_cost = op_cost;
3222
3223   *variant = basic_variant;
3224   limit.cost = mult_cost;
3225   limit.latency = mult_cost;
3226   synth_mult (alg, val, &limit, mode);
3227
3228   /* This works only if the inverted value actually fits in an
3229      `unsigned int' */
3230   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3231     {
3232       op_cost = neg_cost (speed, mode);
3233       if (MULT_COST_LESS (&alg->cost, mult_cost))
3234         {
3235           limit.cost = alg->cost.cost - op_cost;
3236           limit.latency = alg->cost.latency - op_cost;
3237         }
3238       else
3239         {
3240           limit.cost = mult_cost - op_cost;
3241           limit.latency = mult_cost - op_cost;
3242         }
3243
3244       synth_mult (&alg2, -val, &limit, mode);
3245       alg2.cost.cost += op_cost;
3246       alg2.cost.latency += op_cost;
3247       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3248         *alg = alg2, *variant = negate_variant;
3249     }
3250
3251   /* This proves very useful for division-by-constant.  */
3252   op_cost = add_cost (speed, mode);
3253   if (MULT_COST_LESS (&alg->cost, mult_cost))
3254     {
3255       limit.cost = alg->cost.cost - op_cost;
3256       limit.latency = alg->cost.latency - op_cost;
3257     }
3258   else
3259     {
3260       limit.cost = mult_cost - op_cost;
3261       limit.latency = mult_cost - op_cost;
3262     }
3263
3264   synth_mult (&alg2, val - 1, &limit, mode);
3265   alg2.cost.cost += op_cost;
3266   alg2.cost.latency += op_cost;
3267   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3268     *alg = alg2, *variant = add_variant;
3269
3270   return MULT_COST_LESS (&alg->cost, mult_cost);
3271 }
3272
3273 /* A subroutine of expand_mult, used for constant multiplications.
3274    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3275    convenient.  Use the shift/add sequence described by ALG and apply
3276    the final fixup specified by VARIANT.  */
3277
3278 static rtx
3279 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3280                    rtx target, const struct algorithm *alg,
3281                    enum mult_variant variant)
3282 {
3283   unsigned HOST_WIDE_INT val_so_far;
3284   rtx_insn *insn;
3285   rtx accum, tem;
3286   int opno;
3287   machine_mode nmode;
3288
3289   /* Avoid referencing memory over and over and invalid sharing
3290      on SUBREGs.  */
3291   op0 = force_reg (mode, op0);
3292
3293   /* ACCUM starts out either as OP0 or as a zero, depending on
3294      the first operation.  */
3295
3296   if (alg->op[0] == alg_zero)
3297     {
3298       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3299       val_so_far = 0;
3300     }
3301   else if (alg->op[0] == alg_m)
3302     {
3303       accum = copy_to_mode_reg (mode, op0);
3304       val_so_far = 1;
3305     }
3306   else
3307     gcc_unreachable ();
3308
3309   for (opno = 1; opno < alg->ops; opno++)
3310     {
3311       int log = alg->log[opno];
3312       rtx shift_subtarget = optimize ? 0 : accum;
3313       rtx add_target
3314         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3315            && !optimize)
3316           ? target : 0;
3317       rtx accum_target = optimize ? 0 : accum;
3318       rtx accum_inner;
3319
3320       switch (alg->op[opno])
3321         {
3322         case alg_shift:
3323           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3324           /* REG_EQUAL note will be attached to the following insn.  */
3325           emit_move_insn (accum, tem);
3326           val_so_far <<= log;
3327           break;
3328
3329         case alg_add_t_m2:
3330           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3331           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3332                                  add_target ? add_target : accum_target);
3333           val_so_far += HOST_WIDE_INT_1U << log;
3334           break;
3335
3336         case alg_sub_t_m2:
3337           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3338           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3339                                  add_target ? add_target : accum_target);
3340           val_so_far -= HOST_WIDE_INT_1U << log;
3341           break;
3342
3343         case alg_add_t2_m:
3344           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3345                                 log, shift_subtarget, 0);
3346           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3347                                  add_target ? add_target : accum_target);
3348           val_so_far = (val_so_far << log) + 1;
3349           break;
3350
3351         case alg_sub_t2_m:
3352           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3353                                 log, shift_subtarget, 0);
3354           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3355                                  add_target ? add_target : accum_target);
3356           val_so_far = (val_so_far << log) - 1;
3357           break;
3358
3359         case alg_add_factor:
3360           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3361           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3362                                  add_target ? add_target : accum_target);
3363           val_so_far += val_so_far << log;
3364           break;
3365
3366         case alg_sub_factor:
3367           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3368           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3369                                  (add_target
3370                                   ? add_target : (optimize ? 0 : tem)));
3371           val_so_far = (val_so_far << log) - val_so_far;
3372           break;
3373
3374         default:
3375           gcc_unreachable ();
3376         }
3377
3378       if (SCALAR_INT_MODE_P (mode))
3379         {
3380           /* Write a REG_EQUAL note on the last insn so that we can cse
3381              multiplication sequences.  Note that if ACCUM is a SUBREG,
3382              we've set the inner register and must properly indicate that.  */
3383           tem = op0, nmode = mode;
3384           accum_inner = accum;
3385           if (GET_CODE (accum) == SUBREG)
3386             {
3387               accum_inner = SUBREG_REG (accum);
3388               nmode = GET_MODE (accum_inner);
3389               tem = gen_lowpart (nmode, op0);
3390             }
3391
3392           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3393              In that case, only the low bits of accum would be guaranteed to
3394              be equal to the content of the REG_EQUAL note, the upper bits
3395              can be anything.  */
3396           if (!paradoxical_subreg_p (tem))
3397             {
3398               insn = get_last_insn ();
3399               wide_int wval_so_far
3400                 = wi::uhwi (val_so_far,
3401                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3402               rtx c = immed_wide_int_const (wval_so_far, nmode);
3403               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3404                                 accum_inner);
3405             }
3406         }
3407     }
3408
3409   if (variant == negate_variant)
3410     {
3411       val_so_far = -val_so_far;
3412       accum = expand_unop (mode, neg_optab, accum, target, 0);
3413     }
3414   else if (variant == add_variant)
3415     {
3416       val_so_far = val_so_far + 1;
3417       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3418     }
3419
3420   /* Compare only the bits of val and val_so_far that are significant
3421      in the result mode, to avoid sign-/zero-extension confusion.  */
3422   nmode = GET_MODE_INNER (mode);
3423   val &= GET_MODE_MASK (nmode);
3424   val_so_far &= GET_MODE_MASK (nmode);
3425   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3426
3427   return accum;
3428 }
3429
3430 /* Perform a multiplication and return an rtx for the result.
3431    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3432    TARGET is a suggestion for where to store the result (an rtx).
3433
3434    We check specially for a constant integer as OP1.
3435    If you want this check for OP0 as well, then before calling
3436    you should swap the two operands if OP0 would be constant.  */
3437
3438 rtx
3439 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3440              int unsignedp, bool no_libcall)
3441 {
3442   enum mult_variant variant;
3443   struct algorithm algorithm;
3444   rtx scalar_op1;
3445   int max_cost;
3446   bool speed = optimize_insn_for_speed_p ();
3447   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3448
3449   if (CONSTANT_P (op0))
3450     std::swap (op0, op1);
3451
3452   /* For vectors, there are several simplifications that can be made if
3453      all elements of the vector constant are identical.  */
3454   scalar_op1 = unwrap_const_vec_duplicate (op1);
3455
3456   if (INTEGRAL_MODE_P (mode))
3457     {
3458       rtx fake_reg;
3459       HOST_WIDE_INT coeff;
3460       bool is_neg;
3461       int mode_bitsize;
3462
3463       if (op1 == CONST0_RTX (mode))
3464         return op1;
3465       if (op1 == CONST1_RTX (mode))
3466         return op0;
3467       if (op1 == CONSTM1_RTX (mode))
3468         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3469                             op0, target, 0);
3470
3471       if (do_trapv)
3472         goto skip_synth;
3473
3474       /* If mode is integer vector mode, check if the backend supports
3475          vector lshift (by scalar or vector) at all.  If not, we can't use
3476          synthetized multiply.  */
3477       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3478           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3479           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3480         goto skip_synth;
3481
3482       /* These are the operations that are potentially turned into
3483          a sequence of shifts and additions.  */
3484       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3485
3486       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3487          less than or equal in size to `unsigned int' this doesn't matter.
3488          If the mode is larger than `unsigned int', then synth_mult works
3489          only if the constant value exactly fits in an `unsigned int' without
3490          any truncation.  This means that multiplying by negative values does
3491          not work; results are off by 2^32 on a 32 bit machine.  */
3492       if (CONST_INT_P (scalar_op1))
3493         {
3494           coeff = INTVAL (scalar_op1);
3495           is_neg = coeff < 0;
3496         }
3497 #if TARGET_SUPPORTS_WIDE_INT
3498       else if (CONST_WIDE_INT_P (scalar_op1))
3499 #else
3500       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3501 #endif
3502         {
3503           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3504           /* Perfect power of 2 (other than 1, which is handled above).  */
3505           if (shift > 0)
3506             return expand_shift (LSHIFT_EXPR, mode, op0,
3507                                  shift, target, unsignedp);
3508           else
3509             goto skip_synth;
3510         }
3511       else
3512         goto skip_synth;
3513
3514       /* We used to test optimize here, on the grounds that it's better to
3515          produce a smaller program when -O is not used.  But this causes
3516          such a terrible slowdown sometimes that it seems better to always
3517          use synth_mult.  */
3518
3519       /* Special case powers of two.  */
3520       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3521           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3522         return expand_shift (LSHIFT_EXPR, mode, op0,
3523                              floor_log2 (coeff), target, unsignedp);
3524
3525       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3526
3527       /* Attempt to handle multiplication of DImode values by negative
3528          coefficients, by performing the multiplication by a positive
3529          multiplier and then inverting the result.  */
3530       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3531         {
3532           /* Its safe to use -coeff even for INT_MIN, as the
3533              result is interpreted as an unsigned coefficient.
3534              Exclude cost of op0 from max_cost to match the cost
3535              calculation of the synth_mult.  */
3536           coeff = -(unsigned HOST_WIDE_INT) coeff;
3537           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3538                                     mode, speed)
3539                       - neg_cost (speed, mode));
3540           if (max_cost <= 0)
3541             goto skip_synth;
3542
3543           /* Special case powers of two.  */
3544           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3545             {
3546               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3547                                        floor_log2 (coeff), target, unsignedp);
3548               return expand_unop (mode, neg_optab, temp, target, 0);
3549             }
3550
3551           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3552                                    max_cost))
3553             {
3554               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3555                                             &algorithm, variant);
3556               return expand_unop (mode, neg_optab, temp, target, 0);
3557             }
3558           goto skip_synth;
3559         }
3560
3561       /* Exclude cost of op0 from max_cost to match the cost
3562          calculation of the synth_mult.  */
3563       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3564       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3565         return expand_mult_const (mode, op0, coeff, target,
3566                                   &algorithm, variant);
3567     }
3568  skip_synth:
3569
3570   /* Expand x*2.0 as x+x.  */
3571   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3572       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3573     {
3574       op0 = force_reg (GET_MODE (op0), op0);
3575       return expand_binop (mode, add_optab, op0, op0,
3576                            target, unsignedp,
3577                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3578     }
3579
3580   /* This used to use umul_optab if unsigned, but for non-widening multiply
3581      there is no difference between signed and unsigned.  */
3582   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3583                       op0, op1, target, unsignedp,
3584                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3585   gcc_assert (op0 || no_libcall);
3586   return op0;
3587 }
3588
3589 /* Return a cost estimate for multiplying a register by the given
3590    COEFFicient in the given MODE and SPEED.  */
3591
3592 int
3593 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3594 {
3595   int max_cost;
3596   struct algorithm algorithm;
3597   enum mult_variant variant;
3598
3599   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3600   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3601                            mode, speed);
3602   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3603     return algorithm.cost.cost;
3604   else
3605     return max_cost;
3606 }
3607
3608 /* Perform a widening multiplication and return an rtx for the result.
3609    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3610    TARGET is a suggestion for where to store the result (an rtx).
3611    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3612    or smul_widen_optab.
3613
3614    We check specially for a constant integer as OP1, comparing the
3615    cost of a widening multiply against the cost of a sequence of shifts
3616    and adds.  */
3617
3618 rtx
3619 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3620                       int unsignedp, optab this_optab)
3621 {
3622   bool speed = optimize_insn_for_speed_p ();
3623   rtx cop1;
3624
3625   if (CONST_INT_P (op1)
3626       && GET_MODE (op0) != VOIDmode
3627       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3628                                 this_optab == umul_widen_optab))
3629       && CONST_INT_P (cop1)
3630       && (INTVAL (cop1) >= 0
3631           || HWI_COMPUTABLE_MODE_P (mode)))
3632     {
3633       HOST_WIDE_INT coeff = INTVAL (cop1);
3634       int max_cost;
3635       enum mult_variant variant;
3636       struct algorithm algorithm;
3637
3638       if (coeff == 0)
3639         return CONST0_RTX (mode);
3640
3641       /* Special case powers of two.  */
3642       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3643         {
3644           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3645           return expand_shift (LSHIFT_EXPR, mode, op0,
3646                                floor_log2 (coeff), target, unsignedp);
3647         }
3648
3649       /* Exclude cost of op0 from max_cost to match the cost
3650          calculation of the synth_mult.  */
3651       max_cost = mul_widen_cost (speed, mode);
3652       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3653                                max_cost))
3654         {
3655           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3656           return expand_mult_const (mode, op0, coeff, target,
3657                                     &algorithm, variant);
3658         }
3659     }
3660   return expand_binop (mode, this_optab, op0, op1, target,
3661                        unsignedp, OPTAB_LIB_WIDEN);
3662 }
3663 \f
3664 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3665    replace division by D, and put the least significant N bits of the result
3666    in *MULTIPLIER_PTR and return the most significant bit.
3667
3668    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3669    needed precision is in PRECISION (should be <= N).
3670
3671    PRECISION should be as small as possible so this function can choose
3672    multiplier more freely.
3673
3674    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3675    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3676
3677    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3678    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3679
3680 unsigned HOST_WIDE_INT
3681 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3682                    unsigned HOST_WIDE_INT *multiplier_ptr,
3683                    int *post_shift_ptr, int *lgup_ptr)
3684 {
3685   int lgup, post_shift;
3686   int pow, pow2;
3687
3688   /* lgup = ceil(log2(divisor)); */
3689   lgup = ceil_log2 (d);
3690
3691   gcc_assert (lgup <= n);
3692
3693   pow = n + lgup;
3694   pow2 = n + lgup - precision;
3695
3696   /* mlow = 2^(N + lgup)/d */
3697   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3698   wide_int mlow = wi::udiv_trunc (val, d);
3699
3700   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3701   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3702   wide_int mhigh = wi::udiv_trunc (val, d);
3703
3704   /* If precision == N, then mlow, mhigh exceed 2^N
3705      (but they do not exceed 2^(N+1)).  */
3706
3707   /* Reduce to lowest terms.  */
3708   for (post_shift = lgup; post_shift > 0; post_shift--)
3709     {
3710       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3711                                                        HOST_BITS_PER_WIDE_INT);
3712       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3713                                                        HOST_BITS_PER_WIDE_INT);
3714       if (ml_lo >= mh_lo)
3715         break;
3716
3717       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3718       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3719     }
3720
3721   *post_shift_ptr = post_shift;
3722   *lgup_ptr = lgup;
3723   if (n < HOST_BITS_PER_WIDE_INT)
3724     {
3725       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3726       *multiplier_ptr = mhigh.to_uhwi () & mask;
3727       return mhigh.to_uhwi () > mask;
3728     }
3729   else
3730     {
3731       *multiplier_ptr = mhigh.to_uhwi ();
3732       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3733     }
3734 }
3735
3736 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3737    congruent to 1 (mod 2**N).  */
3738
3739 static unsigned HOST_WIDE_INT
3740 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3741 {
3742   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3743
3744   /* The algorithm notes that the choice y = x satisfies
3745      x*y == 1 mod 2^3, since x is assumed odd.
3746      Each iteration doubles the number of bits of significance in y.  */
3747
3748   unsigned HOST_WIDE_INT mask;
3749   unsigned HOST_WIDE_INT y = x;
3750   int nbit = 3;
3751
3752   mask = (n == HOST_BITS_PER_WIDE_INT
3753           ? HOST_WIDE_INT_M1U
3754           : (HOST_WIDE_INT_1U << n) - 1);
3755
3756   while (nbit < n)
3757     {
3758       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3759       nbit *= 2;
3760     }
3761   return y;
3762 }
3763
3764 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3765    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3766    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3767    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3768    become signed.
3769
3770    The result is put in TARGET if that is convenient.
3771
3772    MODE is the mode of operation.  */
3773
3774 rtx
3775 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3776                              rtx op1, rtx target, int unsignedp)
3777 {
3778   rtx tem;
3779   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3780
3781   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3782                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3783   tem = expand_and (mode, tem, op1, NULL_RTX);
3784   adj_operand
3785     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3786                      adj_operand);
3787
3788   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3789                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3790   tem = expand_and (mode, tem, op0, NULL_RTX);
3791   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3792                           target);
3793
3794   return target;
3795 }
3796
3797 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3798
3799 static rtx
3800 extract_high_half (scalar_int_mode mode, rtx op)
3801 {
3802   if (mode == word_mode)
3803     return gen_highpart (mode, op);
3804
3805   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3806
3807   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3808                      GET_MODE_BITSIZE (mode), 0, 1);
3809   return convert_modes (mode, wider_mode, op, 0);
3810 }
3811
3812 /* Like expmed_mult_highpart, but only consider using a multiplication
3813    optab.  OP1 is an rtx for the constant operand.  */
3814
3815 static rtx
3816 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3817                             rtx target, int unsignedp, int max_cost)
3818 {
3819   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3820   optab moptab;
3821   rtx tem;
3822   int size;
3823   bool speed = optimize_insn_for_speed_p ();
3824
3825   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3826
3827   size = GET_MODE_BITSIZE (mode);
3828
3829   /* Firstly, try using a multiplication insn that only generates the needed
3830      high part of the product, and in the sign flavor of unsignedp.  */
3831   if (mul_highpart_cost (speed, mode) < max_cost)
3832     {
3833       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3834       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3835                           unsignedp, OPTAB_DIRECT);
3836       if (tem)
3837         return tem;
3838     }
3839
3840   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3841      Need to adjust the result after the multiplication.  */
3842   if (size - 1 < BITS_PER_WORD
3843       && (mul_highpart_cost (speed, mode)
3844           + 2 * shift_cost (speed, mode, size-1)
3845           + 4 * add_cost (speed, mode) < max_cost))
3846     {
3847       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3848       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3849                           unsignedp, OPTAB_DIRECT);
3850       if (tem)
3851         /* We used the wrong signedness.  Adjust the result.  */
3852         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3853                                             tem, unsignedp);
3854     }
3855
3856   /* Try widening multiplication.  */
3857   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3858   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3859       && mul_widen_cost (speed, wider_mode) < max_cost)
3860     {
3861       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3862                           unsignedp, OPTAB_WIDEN);
3863       if (tem)
3864         return extract_high_half (mode, tem);
3865     }
3866
3867   /* Try widening the mode and perform a non-widening multiplication.  */
3868   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3869       && size - 1 < BITS_PER_WORD
3870       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3871           < max_cost))
3872     {
3873       rtx_insn *insns;
3874       rtx wop0, wop1;
3875
3876       /* We need to widen the operands, for example to ensure the
3877          constant multiplier is correctly sign or zero extended.
3878          Use a sequence to clean-up any instructions emitted by
3879          the conversions if things don't work out.  */
3880       start_sequence ();
3881       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3882       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3883       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3884                           unsignedp, OPTAB_WIDEN);
3885       insns = get_insns ();
3886       end_sequence ();
3887
3888       if (tem)
3889         {
3890           emit_insn (insns);
3891           return extract_high_half (mode, tem);
3892         }
3893     }
3894
3895   /* Try widening multiplication of opposite signedness, and adjust.  */
3896   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3897   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3898       && size - 1 < BITS_PER_WORD
3899       && (mul_widen_cost (speed, wider_mode)
3900           + 2 * shift_cost (speed, mode, size-1)
3901           + 4 * add_cost (speed, mode) < max_cost))
3902     {
3903       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3904                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3905       if (tem != 0)
3906         {
3907           tem = extract_high_half (mode, tem);
3908           /* We used the wrong signedness.  Adjust the result.  */
3909           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3910                                               target, unsignedp);
3911         }
3912     }
3913
3914   return 0;
3915 }
3916
3917 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3918    putting the high half of the result in TARGET if that is convenient,
3919    and return where the result is.  If the operation cannot be performed,
3920    0 is returned.
3921
3922    MODE is the mode of operation and result.
3923
3924    UNSIGNEDP nonzero means unsigned multiply.
3925
3926    MAX_COST is the total allowed cost for the expanded RTL.  */
3927
3928 static rtx
3929 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3930                       rtx target, int unsignedp, int max_cost)
3931 {
3932   unsigned HOST_WIDE_INT cnst1;
3933   int extra_cost;
3934   bool sign_adjust = false;
3935   enum mult_variant variant;
3936   struct algorithm alg;
3937   rtx tem;
3938   bool speed = optimize_insn_for_speed_p ();
3939
3940   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3941   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3942
3943   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3944
3945   /* We can't optimize modes wider than BITS_PER_WORD.
3946      ??? We might be able to perform double-word arithmetic if
3947      mode == word_mode, however all the cost calculations in
3948      synth_mult etc. assume single-word operations.  */
3949   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3950   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3951     return expmed_mult_highpart_optab (mode, op0, op1, target,
3952                                        unsignedp, max_cost);
3953
3954   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3955
3956   /* Check whether we try to multiply by a negative constant.  */
3957   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3958     {
3959       sign_adjust = true;
3960       extra_cost += add_cost (speed, mode);
3961     }
3962
3963   /* See whether shift/add multiplication is cheap enough.  */
3964   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3965                            max_cost - extra_cost))
3966     {
3967       /* See whether the specialized multiplication optabs are
3968          cheaper than the shift/add version.  */
3969       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3970                                         alg.cost.cost + extra_cost);
3971       if (tem)
3972         return tem;
3973
3974       tem = convert_to_mode (wider_mode, op0, unsignedp);
3975       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3976       tem = extract_high_half (mode, tem);
3977
3978       /* Adjust result for signedness.  */
3979       if (sign_adjust)
3980         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3981
3982       return tem;
3983     }
3984   return expmed_mult_highpart_optab (mode, op0, op1, target,
3985                                      unsignedp, max_cost);
3986 }
3987
3988
3989 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3990
3991 static rtx
3992 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3993 {
3994   rtx result, temp, shift;
3995   rtx_code_label *label;
3996   int logd;
3997   int prec = GET_MODE_PRECISION (mode);
3998
3999   logd = floor_log2 (d);
4000   result = gen_reg_rtx (mode);
4001
4002   /* Avoid conditional branches when they're expensive.  */
4003   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4004       && optimize_insn_for_speed_p ())
4005     {
4006       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4007                                       mode, 0, -1);
4008       if (signmask)
4009         {
4010           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4011           signmask = force_reg (mode, signmask);
4012           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4013
4014           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4015              which instruction sequence to use.  If logical right shifts
4016              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4017              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4018
4019           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4020           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4021               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4022                   > COSTS_N_INSNS (2)))
4023             {
4024               temp = expand_binop (mode, xor_optab, op0, signmask,
4025                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4026               temp = expand_binop (mode, sub_optab, temp, signmask,
4027                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4028               temp = expand_binop (mode, and_optab, temp,
4029                                    gen_int_mode (masklow, mode),
4030                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4031               temp = expand_binop (mode, xor_optab, temp, signmask,
4032                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4033               temp = expand_binop (mode, sub_optab, temp, signmask,
4034                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4035             }
4036           else
4037             {
4038               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4039                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4040               signmask = force_reg (mode, signmask);
4041
4042               temp = expand_binop (mode, add_optab, op0, signmask,
4043                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4044               temp = expand_binop (mode, and_optab, temp,
4045                                    gen_int_mode (masklow, mode),
4046                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4047               temp = expand_binop (mode, sub_optab, temp, signmask,
4048                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4049             }
4050           return temp;
4051         }
4052     }
4053
4054   /* Mask contains the mode's signbit and the significant bits of the
4055      modulus.  By including the signbit in the operation, many targets
4056      can avoid an explicit compare operation in the following comparison
4057      against zero.  */
4058   wide_int mask = wi::mask (logd, false, prec);
4059   mask = wi::set_bit (mask, prec - 1);
4060
4061   temp = expand_binop (mode, and_optab, op0,
4062                        immed_wide_int_const (mask, mode),
4063                        result, 1, OPTAB_LIB_WIDEN);
4064   if (temp != result)
4065     emit_move_insn (result, temp);
4066
4067   label = gen_label_rtx ();
4068   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4069
4070   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4071                        0, OPTAB_LIB_WIDEN);
4072
4073   mask = wi::mask (logd, true, prec);
4074   temp = expand_binop (mode, ior_optab, temp,
4075                        immed_wide_int_const (mask, mode),
4076                        result, 1, OPTAB_LIB_WIDEN);
4077   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4078                        0, OPTAB_LIB_WIDEN);
4079   if (temp != result)
4080     emit_move_insn (result, temp);
4081   emit_label (label);
4082   return result;
4083 }
4084
4085 /* Expand signed division of OP0 by a power of two D in mode MODE.
4086    This routine is only called for positive values of D.  */
4087
4088 static rtx
4089 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4090 {
4091   rtx temp;
4092   rtx_code_label *label;
4093   int logd;
4094
4095   logd = floor_log2 (d);
4096
4097   if (d == 2
4098       && BRANCH_COST (optimize_insn_for_speed_p (),
4099                       false) >= 1)
4100     {
4101       temp = gen_reg_rtx (mode);
4102       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4103       if (temp != NULL_RTX)
4104         {
4105           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4106                                0, OPTAB_LIB_WIDEN);
4107           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4108         }
4109     }
4110
4111   if (HAVE_conditional_move
4112       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4113     {
4114       rtx temp2;
4115
4116       start_sequence ();
4117       temp2 = copy_to_mode_reg (mode, op0);
4118       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4119                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4120       temp = force_reg (mode, temp);
4121
4122       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4123       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4124                                      mode, temp, temp2, mode, 0);
4125       if (temp2)
4126         {
4127           rtx_insn *seq = get_insns ();
4128           end_sequence ();
4129           emit_insn (seq);
4130           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4131         }
4132       end_sequence ();
4133     }
4134
4135   if (BRANCH_COST (optimize_insn_for_speed_p (),
4136                    false) >= 2)
4137     {
4138       int ushift = GET_MODE_BITSIZE (mode) - logd;
4139
4140       temp = gen_reg_rtx (mode);
4141       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4142       if (temp != NULL_RTX)
4143         {
4144           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4145               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4146               > COSTS_N_INSNS (1))
4147             temp = expand_binop (mode, and_optab, temp,
4148                                  gen_int_mode (d - 1, mode),
4149                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4150           else
4151             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4152                                  ushift, NULL_RTX, 1);
4153           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4154                                0, OPTAB_LIB_WIDEN);
4155           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4156         }
4157     }
4158
4159   label = gen_label_rtx ();
4160   temp = copy_to_mode_reg (mode, op0);
4161   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4162   expand_inc (temp, gen_int_mode (d - 1, mode));
4163   emit_label (label);
4164   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4165 }
4166 \f
4167 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4168    if that is convenient, and returning where the result is.
4169    You may request either the quotient or the remainder as the result;
4170    specify REM_FLAG nonzero to get the remainder.
4171
4172    CODE is the expression code for which kind of division this is;
4173    it controls how rounding is done.  MODE is the machine mode to use.
4174    UNSIGNEDP nonzero means do unsigned division.  */
4175
4176 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4177    and then correct it by or'ing in missing high bits
4178    if result of ANDI is nonzero.
4179    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4180    This could optimize to a bfexts instruction.
4181    But C doesn't use these operations, so their optimizations are
4182    left for later.  */
4183 /* ??? For modulo, we don't actually need the highpart of the first product,
4184    the low part will do nicely.  And for small divisors, the second multiply
4185    can also be a low-part only multiply or even be completely left out.
4186    E.g. to calculate the remainder of a division by 3 with a 32 bit
4187    multiply, multiply with 0x55555556 and extract the upper two bits;
4188    the result is exact for inputs up to 0x1fffffff.
4189    The input range can be reduced by using cross-sum rules.
4190    For odd divisors >= 3, the following table gives right shift counts
4191    so that if a number is shifted by an integer multiple of the given
4192    amount, the remainder stays the same:
4193    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4194    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4195    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4196    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4197    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4198
4199    Cross-sum rules for even numbers can be derived by leaving as many bits
4200    to the right alone as the divisor has zeros to the right.
4201    E.g. if x is an unsigned 32 bit number:
4202    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4203    */
4204
4205 rtx
4206 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4207                rtx op0, rtx op1, rtx target, int unsignedp,
4208                enum optab_methods methods)
4209 {
4210   machine_mode compute_mode;
4211   rtx tquotient;
4212   rtx quotient = 0, remainder = 0;
4213   rtx_insn *last;
4214   rtx_insn *insn;
4215   optab optab1, optab2;
4216   int op1_is_constant, op1_is_pow2 = 0;
4217   int max_cost, extra_cost;
4218   static HOST_WIDE_INT last_div_const = 0;
4219   bool speed = optimize_insn_for_speed_p ();
4220
4221   op1_is_constant = CONST_INT_P (op1);
4222   if (op1_is_constant)
4223     {
4224       wide_int ext_op1 = rtx_mode_t (op1, mode);
4225       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4226                      || (! unsignedp
4227                          && wi::popcount (wi::neg (ext_op1)) == 1));
4228     }
4229
4230   /*
4231      This is the structure of expand_divmod:
4232
4233      First comes code to fix up the operands so we can perform the operations
4234      correctly and efficiently.
4235
4236      Second comes a switch statement with code specific for each rounding mode.
4237      For some special operands this code emits all RTL for the desired
4238      operation, for other cases, it generates only a quotient and stores it in
4239      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4240      to indicate that it has not done anything.
4241
4242      Last comes code that finishes the operation.  If QUOTIENT is set and
4243      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4244      QUOTIENT is not set, it is computed using trunc rounding.
4245
4246      We try to generate special code for division and remainder when OP1 is a
4247      constant.  If |OP1| = 2**n we can use shifts and some other fast
4248      operations.  For other values of OP1, we compute a carefully selected
4249      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4250      by m.
4251
4252      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4253      half of the product.  Different strategies for generating the product are
4254      implemented in expmed_mult_highpart.
4255
4256      If what we actually want is the remainder, we generate that by another
4257      by-constant multiplication and a subtraction.  */
4258
4259   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4260      code below will malfunction if we are, so check here and handle
4261      the special case if so.  */
4262   if (op1 == const1_rtx)
4263     return rem_flag ? const0_rtx : op0;
4264
4265     /* When dividing by -1, we could get an overflow.
4266      negv_optab can handle overflows.  */
4267   if (! unsignedp && op1 == constm1_rtx)
4268     {
4269       if (rem_flag)
4270         return const0_rtx;
4271       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4272                           ? negv_optab : neg_optab, op0, target, 0);
4273     }
4274
4275   if (target
4276       /* Don't use the function value register as a target
4277          since we have to read it as well as write it,
4278          and function-inlining gets confused by this.  */
4279       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4280           /* Don't clobber an operand while doing a multi-step calculation.  */
4281           || ((rem_flag || op1_is_constant)
4282               && (reg_mentioned_p (target, op0)
4283                   || (MEM_P (op0) && MEM_P (target))))
4284           || reg_mentioned_p (target, op1)
4285           || (MEM_P (op1) && MEM_P (target))))
4286     target = 0;
4287
4288   /* Get the mode in which to perform this computation.  Normally it will
4289      be MODE, but sometimes we can't do the desired operation in MODE.
4290      If so, pick a wider mode in which we can do the operation.  Convert
4291      to that mode at the start to avoid repeated conversions.
4292
4293      First see what operations we need.  These depend on the expression
4294      we are evaluating.  (We assume that divxx3 insns exist under the
4295      same conditions that modxx3 insns and that these insns don't normally
4296      fail.  If these assumptions are not correct, we may generate less
4297      efficient code in some cases.)
4298
4299      Then see if we find a mode in which we can open-code that operation
4300      (either a division, modulus, or shift).  Finally, check for the smallest
4301      mode for which we can do the operation with a library call.  */
4302
4303   /* We might want to refine this now that we have division-by-constant
4304      optimization.  Since expmed_mult_highpart tries so many variants, it is
4305      not straightforward to generalize this.  Maybe we should make an array
4306      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4307
4308   optab1 = (op1_is_pow2
4309             ? (unsignedp ? lshr_optab : ashr_optab)
4310             : (unsignedp ? udiv_optab : sdiv_optab));
4311   optab2 = (op1_is_pow2 ? optab1
4312             : (unsignedp ? udivmod_optab : sdivmod_optab));
4313
4314   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4315     {
4316       FOR_EACH_MODE_FROM (compute_mode, mode)
4317       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4318           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4319         break;
4320
4321       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4322         FOR_EACH_MODE_FROM (compute_mode, mode)
4323           if (optab_libfunc (optab1, compute_mode)
4324               || optab_libfunc (optab2, compute_mode))
4325             break;
4326     }
4327   else
4328     compute_mode = mode;
4329
4330   /* If we still couldn't find a mode, use MODE, but expand_binop will
4331      probably die.  */
4332   if (compute_mode == VOIDmode)
4333     compute_mode = mode;
4334
4335   if (target && GET_MODE (target) == compute_mode)
4336     tquotient = target;
4337   else
4338     tquotient = gen_reg_rtx (compute_mode);
4339
4340 #if 0
4341   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4342      (mode), and thereby get better code when OP1 is a constant.  Do that
4343      later.  It will require going over all usages of SIZE below.  */
4344   size = GET_MODE_BITSIZE (mode);
4345 #endif
4346
4347   /* Only deduct something for a REM if the last divide done was
4348      for a different constant.   Then set the constant of the last
4349      divide.  */
4350   max_cost = (unsignedp
4351               ? udiv_cost (speed, compute_mode)
4352               : sdiv_cost (speed, compute_mode));
4353   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4354                      && INTVAL (op1) == last_div_const))
4355     max_cost -= (mul_cost (speed, compute_mode)
4356                  + add_cost (speed, compute_mode));
4357
4358   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4359
4360   /* Now convert to the best mode to use.  */
4361   if (compute_mode != mode)
4362     {
4363       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4364       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4365
4366       /* convert_modes may have placed op1 into a register, so we
4367          must recompute the following.  */
4368       op1_is_constant = CONST_INT_P (op1);
4369       if (op1_is_constant)
4370         {
4371           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4372           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4373                          || (! unsignedp
4374                              && wi::popcount (wi::neg (ext_op1)) == 1));
4375         }
4376       else
4377         op1_is_pow2 = 0;
4378     }
4379
4380   /* If one of the operands is a volatile MEM, copy it into a register.  */
4381
4382   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4383     op0 = force_reg (compute_mode, op0);
4384   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4385     op1 = force_reg (compute_mode, op1);
4386
4387   /* If we need the remainder or if OP1 is constant, we need to
4388      put OP0 in a register in case it has any queued subexpressions.  */
4389   if (rem_flag || op1_is_constant)
4390     op0 = force_reg (compute_mode, op0);
4391
4392   last = get_last_insn ();
4393
4394   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4395   if (unsignedp)
4396     {
4397       if (code == FLOOR_DIV_EXPR)
4398         code = TRUNC_DIV_EXPR;
4399       if (code == FLOOR_MOD_EXPR)
4400         code = TRUNC_MOD_EXPR;
4401       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4402         code = TRUNC_DIV_EXPR;
4403     }
4404
4405   if (op1 != const0_rtx)
4406     switch (code)
4407       {
4408       case TRUNC_MOD_EXPR:
4409       case TRUNC_DIV_EXPR:
4410         if (op1_is_constant)
4411           {
4412             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4413             int size = GET_MODE_BITSIZE (int_mode);
4414             if (unsignedp)
4415               {
4416                 unsigned HOST_WIDE_INT mh, ml;
4417                 int pre_shift, post_shift;
4418                 int dummy;
4419                 wide_int wd = rtx_mode_t (op1, int_mode);
4420                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4421
4422                 if (wi::popcount (wd) == 1)
4423                   {
4424                     pre_shift = floor_log2 (d);
4425                     if (rem_flag)
4426                       {
4427                         unsigned HOST_WIDE_INT mask
4428                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4429                         remainder
4430                           = expand_binop (int_mode, and_optab, op0,
4431                                           gen_int_mode (mask, int_mode),
4432                                           remainder, 1, methods);
4433                         if (remainder)
4434                           return gen_lowpart (mode, remainder);
4435                       }
4436                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4437                                              pre_shift, tquotient, 1);
4438                   }
4439                 else if (size <= HOST_BITS_PER_WIDE_INT)
4440                   {
4441                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4442                       {
4443                         /* Most significant bit of divisor is set; emit an scc
4444                            insn.  */
4445                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4446                                                           int_mode, 1, 1);
4447                       }
4448                     else
4449                       {
4450                         /* Find a suitable multiplier and right shift count
4451                            instead of multiplying with D.  */
4452
4453                         mh = choose_multiplier (d, size, size,
4454                                                 &ml, &post_shift, &dummy);
4455
4456                         /* If the suggested multiplier is more than SIZE bits,
4457                            we can do better for even divisors, using an
4458                            initial right shift.  */
4459                         if (mh != 0 && (d & 1) == 0)
4460                           {
4461                             pre_shift = ctz_or_zero (d);
4462                             mh = choose_multiplier (d >> pre_shift, size,
4463                                                     size - pre_shift,
4464                                                     &ml, &post_shift, &dummy);
4465                             gcc_assert (!mh);
4466                           }
4467                         else
4468                           pre_shift = 0;
4469
4470                         if (mh != 0)
4471                           {
4472                             rtx t1, t2, t3, t4;
4473
4474                             if (post_shift - 1 >= BITS_PER_WORD)
4475                               goto fail1;
4476
4477                             extra_cost
4478                               = (shift_cost (speed, int_mode, post_shift - 1)
4479                                  + shift_cost (speed, int_mode, 1)
4480                                  + 2 * add_cost (speed, int_mode));
4481                             t1 = expmed_mult_highpart
4482                               (int_mode, op0, gen_int_mode (ml, int_mode),
4483                                NULL_RTX, 1, max_cost - extra_cost);
4484                             if (t1 == 0)
4485                               goto fail1;
4486                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4487                                                                op0, t1),
4488                                                 NULL_RTX);
4489                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4490                                                t2, 1, NULL_RTX, 1);
4491                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4492                                                               t1, t3),
4493                                                 NULL_RTX);
4494                             quotient = expand_shift
4495                               (RSHIFT_EXPR, int_mode, t4,
4496                                post_shift - 1, tquotient, 1);
4497                           }
4498                         else
4499                           {
4500                             rtx t1, t2;
4501
4502                             if (pre_shift >= BITS_PER_WORD
4503                                 || post_shift >= BITS_PER_WORD)
4504                               goto fail1;
4505
4506                             t1 = expand_shift
4507                               (RSHIFT_EXPR, int_mode, op0,
4508                                pre_shift, NULL_RTX, 1);
4509                             extra_cost
4510                               = (shift_cost (speed, int_mode, pre_shift)
4511                                  + shift_cost (speed, int_mode, post_shift));
4512                             t2 = expmed_mult_highpart
4513                               (int_mode, t1,
4514                                gen_int_mode (ml, int_mode),
4515                                NULL_RTX, 1, max_cost - extra_cost);
4516                             if (t2 == 0)
4517                               goto fail1;
4518                             quotient = expand_shift
4519                               (RSHIFT_EXPR, int_mode, t2,
4520                                post_shift, tquotient, 1);
4521                           }
4522                       }
4523                   }
4524                 else            /* Too wide mode to use tricky code */
4525                   break;
4526
4527                 insn = get_last_insn ();
4528                 if (insn != last)
4529                   set_dst_reg_note (insn, REG_EQUAL,
4530                                     gen_rtx_UDIV (int_mode, op0, op1),
4531                                     quotient);
4532               }
4533             else                /* TRUNC_DIV, signed */
4534               {
4535                 unsigned HOST_WIDE_INT ml;
4536                 int lgup, post_shift;
4537                 rtx mlr;
4538                 HOST_WIDE_INT d = INTVAL (op1);
4539                 unsigned HOST_WIDE_INT abs_d;
4540
4541                 /* Not prepared to handle division/remainder by
4542                    0xffffffffffffffff8000000000000000 etc.  */
4543                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4544                   break;
4545
4546                 /* Since d might be INT_MIN, we have to cast to
4547                    unsigned HOST_WIDE_INT before negating to avoid
4548                    undefined signed overflow.  */
4549                 abs_d = (d >= 0
4550                          ? (unsigned HOST_WIDE_INT) d
4551                          : - (unsigned HOST_WIDE_INT) d);
4552
4553                 /* n rem d = n rem -d */
4554                 if (rem_flag && d < 0)
4555                   {
4556                     d = abs_d;
4557                     op1 = gen_int_mode (abs_d, int_mode);
4558                   }
4559
4560                 if (d == 1)
4561                   quotient = op0;
4562                 else if (d == -1)
4563                   quotient = expand_unop (int_mode, neg_optab, op0,
4564                                           tquotient, 0);
4565                 else if (size <= HOST_BITS_PER_WIDE_INT
4566                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4567                   {
4568                     /* This case is not handled correctly below.  */
4569                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4570                                                 int_mode, 1, 1);
4571                     if (quotient == 0)
4572                       goto fail1;
4573                   }
4574                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4575                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4576                          && (rem_flag
4577                              ? smod_pow2_cheap (speed, int_mode)
4578                              : sdiv_pow2_cheap (speed, int_mode))
4579                          /* We assume that cheap metric is true if the
4580                             optab has an expander for this mode.  */
4581                          && ((optab_handler ((rem_flag ? smod_optab
4582                                               : sdiv_optab),
4583                                              int_mode)
4584                               != CODE_FOR_nothing)
4585                              || (optab_handler (sdivmod_optab, int_mode)
4586                                  != CODE_FOR_nothing)))
4587                   ;
4588                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4589                   {
4590                     if (rem_flag)
4591                       {
4592                         remainder = expand_smod_pow2 (int_mode, op0, d);
4593                         if (remainder)
4594                           return gen_lowpart (mode, remainder);
4595                       }
4596
4597                     if (sdiv_pow2_cheap (speed, int_mode)
4598                         && ((optab_handler (sdiv_optab, int_mode)
4599                              != CODE_FOR_nothing)
4600                             || (optab_handler (sdivmod_optab, int_mode)
4601                                 != CODE_FOR_nothing)))
4602                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4603                                                 int_mode, op0,
4604                                                 gen_int_mode (abs_d,
4605                                                               int_mode),
4606                                                 NULL_RTX, 0);
4607                     else
4608                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4609
4610                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4611                        negate the quotient.  */
4612                     if (d < 0)
4613                       {
4614                         insn = get_last_insn ();
4615                         if (insn != last
4616                             && abs_d < (HOST_WIDE_INT_1U
4617                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4618                           set_dst_reg_note (insn, REG_EQUAL,
4619                                             gen_rtx_DIV (int_mode, op0,
4620                                                          gen_int_mode
4621                                                            (abs_d,
4622                                                             int_mode)),
4623                                             quotient);
4624
4625                         quotient = expand_unop (int_mode, neg_optab,
4626                                                 quotient, quotient, 0);
4627                       }
4628                   }
4629                 else if (size <= HOST_BITS_PER_WIDE_INT)
4630                   {
4631                     choose_multiplier (abs_d, size, size - 1,
4632                                        &ml, &post_shift, &lgup);
4633                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4634                       {
4635                         rtx t1, t2, t3;
4636
4637                         if (post_shift >= BITS_PER_WORD
4638                             || size - 1 >= BITS_PER_WORD)
4639                           goto fail1;
4640
4641                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4642                                       + shift_cost (speed, int_mode, size - 1)
4643                                       + add_cost (speed, int_mode));
4644                         t1 = expmed_mult_highpart
4645                           (int_mode, op0, gen_int_mode (ml, int_mode),
4646                            NULL_RTX, 0, max_cost - extra_cost);
4647                         if (t1 == 0)
4648                           goto fail1;
4649                         t2 = expand_shift
4650                           (RSHIFT_EXPR, int_mode, t1,
4651                            post_shift, NULL_RTX, 0);
4652                         t3 = expand_shift
4653                           (RSHIFT_EXPR, int_mode, op0,
4654                            size - 1, NULL_RTX, 0);
4655                         if (d < 0)
4656                           quotient
4657                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4658                                              tquotient);
4659                         else
4660                           quotient
4661                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4662                                              tquotient);
4663                       }
4664                     else
4665                       {
4666                         rtx t1, t2, t3, t4;
4667
4668                         if (post_shift >= BITS_PER_WORD
4669                             || size - 1 >= BITS_PER_WORD)
4670                           goto fail1;
4671
4672                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4673                         mlr = gen_int_mode (ml, int_mode);
4674                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4675                                       + shift_cost (speed, int_mode, size - 1)
4676                                       + 2 * add_cost (speed, int_mode));
4677                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4678                                                    NULL_RTX, 0,
4679                                                    max_cost - extra_cost);
4680                         if (t1 == 0)
4681                           goto fail1;
4682                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4683                                             NULL_RTX);
4684                         t3 = expand_shift
4685                           (RSHIFT_EXPR, int_mode, t2,
4686                            post_shift, NULL_RTX, 0);
4687                         t4 = expand_shift
4688                           (RSHIFT_EXPR, int_mode, op0,
4689                            size - 1, NULL_RTX, 0);
4690                         if (d < 0)
4691                           quotient
4692                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4693                                              tquotient);
4694                         else
4695                           quotient
4696                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4697                                              tquotient);
4698                       }
4699                   }
4700                 else            /* Too wide mode to use tricky code */
4701                   break;
4702
4703                 insn = get_last_insn ();
4704                 if (insn != last)
4705                   set_dst_reg_note (insn, REG_EQUAL,
4706                                     gen_rtx_DIV (int_mode, op0, op1),
4707                                     quotient);
4708               }
4709             break;
4710           }
4711       fail1:
4712         delete_insns_since (last);
4713         break;
4714
4715       case FLOOR_DIV_EXPR:
4716       case FLOOR_MOD_EXPR:
4717       /* We will come here only for signed operations.  */
4718         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4719           {
4720             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4721             int size = GET_MODE_BITSIZE (int_mode);
4722             unsigned HOST_WIDE_INT mh, ml;
4723             int pre_shift, lgup, post_shift;
4724             HOST_WIDE_INT d = INTVAL (op1);
4725
4726             if (d > 0)
4727               {
4728                 /* We could just as easily deal with negative constants here,
4729                    but it does not seem worth the trouble for GCC 2.6.  */
4730                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4731                   {
4732                     pre_shift = floor_log2 (d);
4733                     if (rem_flag)
4734                       {
4735                         unsigned HOST_WIDE_INT mask
4736                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4737                         remainder = expand_binop
4738                           (int_mode, and_optab, op0,
4739                            gen_int_mode (mask, int_mode),
4740                            remainder, 0, methods);
4741                         if (remainder)
4742                           return gen_lowpart (mode, remainder);
4743                       }
4744                     quotient = expand_shift
4745                       (RSHIFT_EXPR, int_mode, op0,
4746                        pre_shift, tquotient, 0);
4747                   }
4748                 else
4749                   {
4750                     rtx t1, t2, t3, t4;
4751
4752                     mh = choose_multiplier (d, size, size - 1,
4753                                             &ml, &post_shift, &lgup);
4754                     gcc_assert (!mh);
4755
4756                     if (post_shift < BITS_PER_WORD
4757                         && size - 1 < BITS_PER_WORD)
4758                       {
4759                         t1 = expand_shift
4760                           (RSHIFT_EXPR, int_mode, op0,
4761                            size - 1, NULL_RTX, 0);
4762                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4763                                            NULL_RTX, 0, OPTAB_WIDEN);
4764                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4765                                       + shift_cost (speed, int_mode, size - 1)
4766                                       + 2 * add_cost (speed, int_mode));
4767                         t3 = expmed_mult_highpart
4768                           (int_mode, t2, gen_int_mode (ml, int_mode),
4769                            NULL_RTX, 1, max_cost - extra_cost);
4770                         if (t3 != 0)
4771                           {
4772                             t4 = expand_shift
4773                               (RSHIFT_EXPR, int_mode, t3,
4774                                post_shift, NULL_RTX, 1);
4775                             quotient = expand_binop (int_mode, xor_optab,
4776                                                      t4, t1, tquotient, 0,
4777                                                      OPTAB_WIDEN);
4778                           }
4779                       }
4780                   }
4781               }
4782             else
4783               {
4784                 rtx nsign, t1, t2, t3, t4;
4785                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4786                                                   op0, constm1_rtx), NULL_RTX);
4787                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4788                                    0, OPTAB_WIDEN);
4789                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4790                                       size - 1, NULL_RTX, 0);
4791                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4792                                     NULL_RTX);
4793                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4794                                     NULL_RTX, 0);
4795                 if (t4)
4796                   {
4797                     rtx t5;
4798                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4799                                       NULL_RTX, 0);
4800                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4801                                               tquotient);
4802                   }
4803               }
4804           }
4805
4806         if (quotient != 0)
4807           break;
4808         delete_insns_since (last);
4809
4810         /* Try using an instruction that produces both the quotient and
4811            remainder, using truncation.  We can easily compensate the quotient
4812            or remainder to get floor rounding, once we have the remainder.
4813            Notice that we compute also the final remainder value here,
4814            and return the result right away.  */
4815         if (target == 0 || GET_MODE (target) != compute_mode)
4816           target = gen_reg_rtx (compute_mode);
4817
4818         if (rem_flag)
4819           {
4820             remainder
4821               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4822             quotient = gen_reg_rtx (compute_mode);
4823           }
4824         else
4825           {
4826             quotient
4827               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4828             remainder = gen_reg_rtx (compute_mode);
4829           }
4830
4831         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4832                                  quotient, remainder, 0))
4833           {
4834             /* This could be computed with a branch-less sequence.
4835                Save that for later.  */
4836             rtx tem;
4837             rtx_code_label *label = gen_label_rtx ();
4838             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4839             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4840                                 NULL_RTX, 0, OPTAB_WIDEN);
4841             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4842             expand_dec (quotient, const1_rtx);
4843             expand_inc (remainder, op1);
4844             emit_label (label);
4845             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4846           }
4847
4848         /* No luck with division elimination or divmod.  Have to do it
4849            by conditionally adjusting op0 *and* the result.  */
4850         {
4851           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4852           rtx adjusted_op0;
4853           rtx tem;
4854
4855           quotient = gen_reg_rtx (compute_mode);
4856           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4857           label1 = gen_label_rtx ();
4858           label2 = gen_label_rtx ();
4859           label3 = gen_label_rtx ();
4860           label4 = gen_label_rtx ();
4861           label5 = gen_label_rtx ();
4862           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4863           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4864           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4865                               quotient, 0, methods);
4866           if (tem != quotient)
4867             emit_move_insn (quotient, tem);
4868           emit_jump_insn (targetm.gen_jump (label5));
4869           emit_barrier ();
4870           emit_label (label1);
4871           expand_inc (adjusted_op0, const1_rtx);
4872           emit_jump_insn (targetm.gen_jump (label4));
4873           emit_barrier ();
4874           emit_label (label2);
4875           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4876           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4877                               quotient, 0, methods);
4878           if (tem != quotient)
4879             emit_move_insn (quotient, tem);
4880           emit_jump_insn (targetm.gen_jump (label5));
4881           emit_barrier ();
4882           emit_label (label3);
4883           expand_dec (adjusted_op0, const1_rtx);
4884           emit_label (label4);
4885           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4886                               quotient, 0, methods);
4887           if (tem != quotient)
4888             emit_move_insn (quotient, tem);
4889           expand_dec (quotient, const1_rtx);
4890           emit_label (label5);
4891         }
4892         break;
4893
4894       case CEIL_DIV_EXPR:
4895       case CEIL_MOD_EXPR:
4896         if (unsignedp)
4897           {
4898             if (op1_is_constant
4899                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4900                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4901                     || INTVAL (op1) >= 0))
4902               {
4903                 scalar_int_mode int_mode
4904                   = as_a <scalar_int_mode> (compute_mode);
4905                 rtx t1, t2, t3;
4906                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4907                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4908                                    floor_log2 (d), tquotient, 1);
4909                 t2 = expand_binop (int_mode, and_optab, op0,
4910                                    gen_int_mode (d - 1, int_mode),
4911                                    NULL_RTX, 1, methods);
4912                 t3 = gen_reg_rtx (int_mode);
4913                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4914                 if (t3 == 0)
4915                   {
4916                     rtx_code_label *lab;
4917                     lab = gen_label_rtx ();
4918                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4919                     expand_inc (t1, const1_rtx);
4920                     emit_label (lab);
4921                     quotient = t1;
4922                   }
4923                 else
4924                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4925                                             tquotient);
4926                 break;
4927               }
4928
4929             /* Try using an instruction that produces both the quotient and
4930                remainder, using truncation.  We can easily compensate the
4931                quotient or remainder to get ceiling rounding, once we have the
4932                remainder.  Notice that we compute also the final remainder
4933                value here, and return the result right away.  */
4934             if (target == 0 || GET_MODE (target) != compute_mode)
4935               target = gen_reg_rtx (compute_mode);
4936
4937             if (rem_flag)
4938               {
4939                 remainder = (REG_P (target)
4940                              ? target : gen_reg_rtx (compute_mode));
4941                 quotient = gen_reg_rtx (compute_mode);
4942               }
4943             else
4944               {
4945                 quotient = (REG_P (target)
4946                             ? target : gen_reg_rtx (compute_mode));
4947                 remainder = gen_reg_rtx (compute_mode);
4948               }
4949
4950             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4951                                      remainder, 1))
4952               {
4953                 /* This could be computed with a branch-less sequence.
4954                    Save that for later.  */
4955                 rtx_code_label *label = gen_label_rtx ();
4956                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4957                                  compute_mode, label);
4958                 expand_inc (quotient, const1_rtx);
4959                 expand_dec (remainder, op1);
4960                 emit_label (label);
4961                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4962               }
4963
4964             /* No luck with division elimination or divmod.  Have to do it
4965                by conditionally adjusting op0 *and* the result.  */
4966             {
4967               rtx_code_label *label1, *label2;
4968               rtx adjusted_op0, tem;
4969
4970               quotient = gen_reg_rtx (compute_mode);
4971               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4972               label1 = gen_label_rtx ();
4973               label2 = gen_label_rtx ();
4974               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4975                                compute_mode, label1);
4976               emit_move_insn  (quotient, const0_rtx);
4977               emit_jump_insn (targetm.gen_jump (label2));
4978               emit_barrier ();
4979               emit_label (label1);
4980               expand_dec (adjusted_op0, const1_rtx);
4981               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4982                                   quotient, 1, methods);
4983               if (tem != quotient)
4984                 emit_move_insn (quotient, tem);
4985               expand_inc (quotient, const1_rtx);
4986               emit_label (label2);
4987             }
4988           }
4989         else /* signed */
4990           {
4991             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4992                 && INTVAL (op1) >= 0)
4993               {
4994                 /* This is extremely similar to the code for the unsigned case
4995                    above.  For 2.7 we should merge these variants, but for
4996                    2.6.1 I don't want to touch the code for unsigned since that
4997                    get used in C.  The signed case will only be used by other
4998                    languages (Ada).  */
4999
5000                 rtx t1, t2, t3;
5001                 unsigned HOST_WIDE_INT d = INTVAL (op1);
5002                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5003                                    floor_log2 (d), tquotient, 0);
5004                 t2 = expand_binop (compute_mode, and_optab, op0,
5005                                    gen_int_mode (d - 1, compute_mode),
5006                                    NULL_RTX, 1, methods);
5007                 t3 = gen_reg_rtx (compute_mode);
5008                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5009                                       compute_mode, 1, 1);
5010                 if (t3 == 0)
5011                   {
5012                     rtx_code_label *lab;
5013                     lab = gen_label_rtx ();
5014                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5015                     expand_inc (t1, const1_rtx);
5016                     emit_label (lab);
5017                     quotient = t1;
5018                   }
5019                 else
5020                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5021                                                           t1, t3),
5022                                             tquotient);
5023                 break;
5024               }
5025
5026             /* Try using an instruction that produces both the quotient and
5027                remainder, using truncation.  We can easily compensate the
5028                quotient or remainder to get ceiling rounding, once we have the
5029                remainder.  Notice that we compute also the final remainder
5030                value here, and return the result right away.  */
5031             if (target == 0 || GET_MODE (target) != compute_mode)
5032               target = gen_reg_rtx (compute_mode);
5033             if (rem_flag)
5034               {
5035                 remainder= (REG_P (target)
5036                             ? target : gen_reg_rtx (compute_mode));
5037                 quotient = gen_reg_rtx (compute_mode);
5038               }
5039             else
5040               {
5041                 quotient = (REG_P (target)
5042                             ? target : gen_reg_rtx (compute_mode));
5043                 remainder = gen_reg_rtx (compute_mode);
5044               }
5045
5046             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5047                                      remainder, 0))
5048               {
5049                 /* This could be computed with a branch-less sequence.
5050                    Save that for later.  */
5051                 rtx tem;
5052                 rtx_code_label *label = gen_label_rtx ();
5053                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5054                                  compute_mode, label);
5055                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5056                                     NULL_RTX, 0, OPTAB_WIDEN);
5057                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5058                 expand_inc (quotient, const1_rtx);
5059                 expand_dec (remainder, op1);
5060                 emit_label (label);
5061                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5062               }
5063
5064             /* No luck with division elimination or divmod.  Have to do it
5065                by conditionally adjusting op0 *and* the result.  */
5066             {
5067               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5068               rtx adjusted_op0;
5069               rtx tem;
5070
5071               quotient = gen_reg_rtx (compute_mode);
5072               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5073               label1 = gen_label_rtx ();
5074               label2 = gen_label_rtx ();
5075               label3 = gen_label_rtx ();
5076               label4 = gen_label_rtx ();
5077               label5 = gen_label_rtx ();
5078               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5079               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5080                                compute_mode, label1);
5081               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5082                                   quotient, 0, methods);
5083               if (tem != quotient)
5084                 emit_move_insn (quotient, tem);
5085               emit_jump_insn (targetm.gen_jump (label5));
5086               emit_barrier ();
5087               emit_label (label1);
5088               expand_dec (adjusted_op0, const1_rtx);
5089               emit_jump_insn (targetm.gen_jump (label4));
5090               emit_barrier ();
5091               emit_label (label2);
5092               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5093                                compute_mode, label3);
5094               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5095                                   quotient, 0, methods);
5096               if (tem != quotient)
5097                 emit_move_insn (quotient, tem);
5098               emit_jump_insn (targetm.gen_jump (label5));
5099               emit_barrier ();
5100               emit_label (label3);
5101               expand_inc (adjusted_op0, const1_rtx);
5102               emit_label (label4);
5103               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5104                                   quotient, 0, methods);
5105               if (tem != quotient)
5106                 emit_move_insn (quotient, tem);
5107               expand_inc (quotient, const1_rtx);
5108               emit_label (label5);
5109             }
5110           }
5111         break;
5112
5113       case EXACT_DIV_EXPR:
5114         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5115           {
5116             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5117             int size = GET_MODE_BITSIZE (int_mode);
5118             HOST_WIDE_INT d = INTVAL (op1);
5119             unsigned HOST_WIDE_INT ml;
5120             int pre_shift;
5121             rtx t1;
5122
5123             pre_shift = ctz_or_zero (d);
5124             ml = invert_mod2n (d >> pre_shift, size);
5125             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5126                                pre_shift, NULL_RTX, unsignedp);
5127             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5128                                     NULL_RTX, 1);
5129
5130             insn = get_last_insn ();
5131             set_dst_reg_note (insn, REG_EQUAL,
5132                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5133                                               int_mode, op0, op1),
5134                               quotient);
5135           }
5136         break;
5137
5138       case ROUND_DIV_EXPR:
5139       case ROUND_MOD_EXPR:
5140         if (unsignedp)
5141           {
5142             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5143             rtx tem;
5144             rtx_code_label *label;
5145             label = gen_label_rtx ();
5146             quotient = gen_reg_rtx (int_mode);
5147             remainder = gen_reg_rtx (int_mode);
5148             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5149               {
5150                 rtx tem;
5151                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5152                                          quotient, 1, methods);
5153                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5154                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5155                                           remainder, 1, methods);
5156               }
5157             tem = plus_constant (int_mode, op1, -1);
5158             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5159             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5160             expand_inc (quotient, const1_rtx);
5161             expand_dec (remainder, op1);
5162             emit_label (label);
5163           }
5164         else
5165           {
5166             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5167             int size = GET_MODE_BITSIZE (int_mode);
5168             rtx abs_rem, abs_op1, tem, mask;
5169             rtx_code_label *label;
5170             label = gen_label_rtx ();
5171             quotient = gen_reg_rtx (int_mode);
5172             remainder = gen_reg_rtx (int_mode);
5173             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5174               {
5175                 rtx tem;
5176                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5177                                          quotient, 0, methods);
5178                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5179                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5180                                           remainder, 0, methods);
5181               }
5182             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5183             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5184             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5185                                 1, NULL_RTX, 1);
5186             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5187             tem = expand_binop (int_mode, xor_optab, op0, op1,
5188                                 NULL_RTX, 0, OPTAB_WIDEN);
5189             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5190                                  size - 1, NULL_RTX, 0);
5191             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5192                                 NULL_RTX, 0, OPTAB_WIDEN);
5193             tem = expand_binop (int_mode, sub_optab, tem, mask,
5194                                 NULL_RTX, 0, OPTAB_WIDEN);
5195             expand_inc (quotient, tem);
5196             tem = expand_binop (int_mode, xor_optab, mask, op1,
5197                                 NULL_RTX, 0, OPTAB_WIDEN);
5198             tem = expand_binop (int_mode, sub_optab, tem, mask,
5199                                 NULL_RTX, 0, OPTAB_WIDEN);
5200             expand_dec (remainder, tem);
5201             emit_label (label);
5202           }
5203         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5204
5205       default:
5206         gcc_unreachable ();
5207       }
5208
5209   if (quotient == 0)
5210     {
5211       if (target && GET_MODE (target) != compute_mode)
5212         target = 0;
5213
5214       if (rem_flag)
5215         {
5216           /* Try to produce the remainder without producing the quotient.
5217              If we seem to have a divmod pattern that does not require widening,
5218              don't try widening here.  We should really have a WIDEN argument
5219              to expand_twoval_binop, since what we'd really like to do here is
5220              1) try a mod insn in compute_mode
5221              2) try a divmod insn in compute_mode
5222              3) try a div insn in compute_mode and multiply-subtract to get
5223                 remainder
5224              4) try the same things with widening allowed.  */
5225           remainder
5226             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5227                                  op0, op1, target,
5228                                  unsignedp,
5229                                  ((optab_handler (optab2, compute_mode)
5230                                    != CODE_FOR_nothing)
5231                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5232           if (remainder == 0)
5233             {
5234               /* No luck there.  Can we do remainder and divide at once
5235                  without a library call?  */
5236               remainder = gen_reg_rtx (compute_mode);
5237               if (! expand_twoval_binop ((unsignedp
5238                                           ? udivmod_optab
5239                                           : sdivmod_optab),
5240                                          op0, op1,
5241                                          NULL_RTX, remainder, unsignedp))
5242                 remainder = 0;
5243             }
5244
5245           if (remainder)
5246             return gen_lowpart (mode, remainder);
5247         }
5248
5249       /* Produce the quotient.  Try a quotient insn, but not a library call.
5250          If we have a divmod in this mode, use it in preference to widening
5251          the div (for this test we assume it will not fail). Note that optab2
5252          is set to the one of the two optabs that the call below will use.  */
5253       quotient
5254         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5255                              op0, op1, rem_flag ? NULL_RTX : target,
5256                              unsignedp,
5257                              ((optab_handler (optab2, compute_mode)
5258                                != CODE_FOR_nothing)
5259                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5260
5261       if (quotient == 0)
5262         {
5263           /* No luck there.  Try a quotient-and-remainder insn,
5264              keeping the quotient alone.  */
5265           quotient = gen_reg_rtx (compute_mode);
5266           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5267                                      op0, op1,
5268                                      quotient, NULL_RTX, unsignedp))
5269             {
5270               quotient = 0;
5271               if (! rem_flag)
5272                 /* Still no luck.  If we are not computing the remainder,
5273                    use a library call for the quotient.  */
5274                 quotient = sign_expand_binop (compute_mode,
5275                                               udiv_optab, sdiv_optab,
5276                                               op0, op1, target,
5277                                               unsignedp, methods);
5278             }
5279         }
5280     }
5281
5282   if (rem_flag)
5283     {
5284       if (target && GET_MODE (target) != compute_mode)
5285         target = 0;
5286
5287       if (quotient == 0)
5288         {
5289           /* No divide instruction either.  Use library for remainder.  */
5290           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5291                                          op0, op1, target,
5292                                          unsignedp, methods);
5293           /* No remainder function.  Try a quotient-and-remainder
5294              function, keeping the remainder.  */
5295           if (!remainder
5296               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5297             {
5298               remainder = gen_reg_rtx (compute_mode);
5299               if (!expand_twoval_binop_libfunc
5300                   (unsignedp ? udivmod_optab : sdivmod_optab,
5301                    op0, op1,
5302                    NULL_RTX, remainder,
5303                    unsignedp ? UMOD : MOD))
5304                 remainder = NULL_RTX;
5305             }
5306         }
5307       else
5308         {
5309           /* We divided.  Now finish doing X - Y * (X / Y).  */
5310           remainder = expand_mult (compute_mode, quotient, op1,
5311                                    NULL_RTX, unsignedp);
5312           remainder = expand_binop (compute_mode, sub_optab, op0,
5313                                     remainder, target, unsignedp,
5314                                     methods);
5315         }
5316     }
5317
5318   if (methods != OPTAB_LIB_WIDEN
5319       && (rem_flag ? remainder : quotient) == NULL_RTX)
5320     return NULL_RTX;
5321
5322   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5323 }
5324 \f
5325 /* Return a tree node with data type TYPE, describing the value of X.
5326    Usually this is an VAR_DECL, if there is no obvious better choice.
5327    X may be an expression, however we only support those expressions
5328    generated by loop.c.  */
5329
5330 tree
5331 make_tree (tree type, rtx x)
5332 {
5333   tree t;
5334
5335   switch (GET_CODE (x))
5336     {
5337     case CONST_INT:
5338     case CONST_WIDE_INT:
5339       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5340       return t;
5341
5342     case CONST_DOUBLE:
5343       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5344       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5345         t = wide_int_to_tree (type,
5346                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5347                                                     HOST_BITS_PER_WIDE_INT * 2));
5348       else
5349         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5350
5351       return t;
5352
5353     case CONST_VECTOR:
5354       {
5355         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5356         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5357         tree itype = TREE_TYPE (type);
5358
5359         /* Build a tree with vector elements.  */
5360         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5361         unsigned int count = elts.encoded_nelts ();
5362         for (unsigned int i = 0; i < count; ++i)
5363           {
5364             rtx elt = CONST_VECTOR_ELT (x, i);
5365             elts.quick_push (make_tree (itype, elt));
5366           }
5367
5368         return elts.build ();
5369       }
5370
5371     case PLUS:
5372       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5373                           make_tree (type, XEXP (x, 1)));
5374
5375     case MINUS:
5376       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5377                           make_tree (type, XEXP (x, 1)));
5378
5379     case NEG:
5380       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5381
5382     case MULT:
5383       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5384                           make_tree (type, XEXP (x, 1)));
5385
5386     case ASHIFT:
5387       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5388                           make_tree (type, XEXP (x, 1)));
5389
5390     case LSHIFTRT:
5391       t = unsigned_type_for (type);
5392       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5393                                          make_tree (t, XEXP (x, 0)),
5394                                          make_tree (type, XEXP (x, 1))));
5395
5396     case ASHIFTRT:
5397       t = signed_type_for (type);
5398       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5399                                          make_tree (t, XEXP (x, 0)),
5400                                          make_tree (type, XEXP (x, 1))));
5401
5402     case DIV:
5403       if (TREE_CODE (type) != REAL_TYPE)
5404         t = signed_type_for (type);
5405       else
5406         t = type;
5407
5408       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5409                                          make_tree (t, XEXP (x, 0)),
5410                                          make_tree (t, XEXP (x, 1))));
5411     case UDIV:
5412       t = unsigned_type_for (type);
5413       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5414                                          make_tree (t, XEXP (x, 0)),
5415                                          make_tree (t, XEXP (x, 1))));
5416
5417     case SIGN_EXTEND:
5418     case ZERO_EXTEND:
5419       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5420                                           GET_CODE (x) == ZERO_EXTEND);
5421       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5422
5423     case CONST:
5424       return make_tree (type, XEXP (x, 0));
5425
5426     case SYMBOL_REF:
5427       t = SYMBOL_REF_DECL (x);
5428       if (t)
5429         return fold_convert (type, build_fold_addr_expr (t));
5430       /* fall through.  */
5431
5432     default:
5433       if (CONST_POLY_INT_P (x))
5434         return wide_int_to_tree (t, const_poly_int_value (x));
5435
5436       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5437
5438       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5439          address mode to pointer mode.  */
5440       if (POINTER_TYPE_P (type))
5441         x = convert_memory_address_addr_space
5442           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5443
5444       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5445          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5446       t->decl_with_rtl.rtl = x;
5447
5448       return t;
5449     }
5450 }
5451 \f
5452 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5453    and returning TARGET.
5454
5455    If TARGET is 0, a pseudo-register or constant is returned.  */
5456
5457 rtx
5458 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5459 {
5460   rtx tem = 0;
5461
5462   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5463     tem = simplify_binary_operation (AND, mode, op0, op1);
5464   if (tem == 0)
5465     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5466
5467   if (target == 0)
5468     target = tem;
5469   else if (tem != target)
5470     emit_move_insn (target, tem);
5471   return target;
5472 }
5473
5474 /* Helper function for emit_store_flag.  */
5475 rtx
5476 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5477              machine_mode mode, machine_mode compare_mode,
5478              int unsignedp, rtx x, rtx y, int normalizep,
5479              machine_mode target_mode)
5480 {
5481   class expand_operand ops[4];
5482   rtx op0, comparison, subtarget;
5483   rtx_insn *last;
5484   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5485   scalar_int_mode int_target_mode;
5486
5487   last = get_last_insn ();
5488   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5489   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5490   if (!x || !y)
5491     {
5492       delete_insns_since (last);
5493       return NULL_RTX;
5494     }
5495
5496   if (target_mode == VOIDmode)
5497     int_target_mode = result_mode;
5498   else
5499     int_target_mode = as_a <scalar_int_mode> (target_mode);
5500   if (!target)
5501     target = gen_reg_rtx (int_target_mode);
5502
5503   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5504
5505   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5506   create_fixed_operand (&ops[1], comparison);
5507   create_fixed_operand (&ops[2], x);
5508   create_fixed_operand (&ops[3], y);
5509   if (!maybe_expand_insn (icode, 4, ops))
5510     {
5511       delete_insns_since (last);
5512       return NULL_RTX;
5513     }
5514   subtarget = ops[0].value;
5515
5516   /* If we are converting to a wider mode, first convert to
5517      INT_TARGET_MODE, then normalize.  This produces better combining
5518      opportunities on machines that have a SIGN_EXTRACT when we are
5519      testing a single bit.  This mostly benefits the 68k.
5520
5521      If STORE_FLAG_VALUE does not have the sign bit set when
5522      interpreted in MODE, we can do this conversion as unsigned, which
5523      is usually more efficient.  */
5524   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5525     {
5526       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5527                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5528
5529       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5530       convert_move (target, subtarget, unsignedp);
5531
5532       op0 = target;
5533       result_mode = int_target_mode;
5534     }
5535   else
5536     op0 = subtarget;
5537
5538   /* If we want to keep subexpressions around, don't reuse our last
5539      target.  */
5540   if (optimize)
5541     subtarget = 0;
5542
5543   /* Now normalize to the proper value in MODE.  Sometimes we don't
5544      have to do anything.  */
5545   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5546     ;
5547   /* STORE_FLAG_VALUE might be the most negative number, so write
5548      the comparison this way to avoid a compiler-time warning.  */
5549   else if (- normalizep == STORE_FLAG_VALUE)
5550     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5551
5552   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5553      it hard to use a value of just the sign bit due to ANSI integer
5554      constant typing rules.  */
5555   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5556     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5557                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5558                         normalizep == 1);
5559   else
5560     {
5561       gcc_assert (STORE_FLAG_VALUE & 1);
5562
5563       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5564       if (normalizep == -1)
5565         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5566     }
5567
5568   /* If we were converting to a smaller mode, do the conversion now.  */
5569   if (int_target_mode != result_mode)
5570     {
5571       convert_move (target, op0, 0);
5572       return target;
5573     }
5574   else
5575     return op0;
5576 }
5577
5578
5579 /* A subroutine of emit_store_flag only including "tricks" that do not
5580    need a recursive call.  These are kept separate to avoid infinite
5581    loops.  */
5582
5583 static rtx
5584 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5585                    machine_mode mode, int unsignedp, int normalizep,
5586                    machine_mode target_mode)
5587 {
5588   rtx subtarget;
5589   enum insn_code icode;
5590   machine_mode compare_mode;
5591   enum mode_class mclass;
5592   enum rtx_code scode;
5593
5594   if (unsignedp)
5595     code = unsigned_condition (code);
5596   scode = swap_condition (code);
5597
5598   /* If one operand is constant, make it the second one.  Only do this
5599      if the other operand is not constant as well.  */
5600
5601   if (swap_commutative_operands_p (op0, op1))
5602     {
5603       std::swap (op0, op1);
5604       code = swap_condition (code);
5605     }
5606
5607   if (mode == VOIDmode)
5608     mode = GET_MODE (op0);
5609
5610   if (CONST_SCALAR_INT_P (op1))
5611     canonicalize_comparison (mode, &code, &op1);
5612
5613   /* For some comparisons with 1 and -1, we can convert this to
5614      comparisons with zero.  This will often produce more opportunities for
5615      store-flag insns.  */
5616
5617   switch (code)
5618     {
5619     case LT:
5620       if (op1 == const1_rtx)
5621         op1 = const0_rtx, code = LE;
5622       break;
5623     case LE:
5624       if (op1 == constm1_rtx)
5625         op1 = const0_rtx, code = LT;
5626       break;
5627     case GE:
5628       if (op1 == const1_rtx)
5629         op1 = const0_rtx, code = GT;
5630       break;
5631     case GT:
5632       if (op1 == constm1_rtx)
5633         op1 = const0_rtx, code = GE;
5634       break;
5635     case GEU:
5636       if (op1 == const1_rtx)
5637         op1 = const0_rtx, code = NE;
5638       break;
5639     case LTU:
5640       if (op1 == const1_rtx)
5641         op1 = const0_rtx, code = EQ;
5642       break;
5643     default:
5644       break;
5645     }
5646
5647   /* If we are comparing a double-word integer with zero or -1, we can
5648      convert the comparison into one involving a single word.  */
5649   scalar_int_mode int_mode;
5650   if (is_int_mode (mode, &int_mode)
5651       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5652       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5653     {
5654       rtx tem;
5655       if ((code == EQ || code == NE)
5656           && (op1 == const0_rtx || op1 == constm1_rtx))
5657         {
5658           rtx op00, op01;
5659
5660           /* Do a logical OR or AND of the two words and compare the
5661              result.  */
5662           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5663           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5664           tem = expand_binop (word_mode,
5665                               op1 == const0_rtx ? ior_optab : and_optab,
5666                               op00, op01, NULL_RTX, unsignedp,
5667                               OPTAB_DIRECT);
5668
5669           if (tem != 0)
5670             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5671                                    unsignedp, normalizep);
5672         }
5673       else if ((code == LT || code == GE) && op1 == const0_rtx)
5674         {
5675           rtx op0h;
5676
5677           /* If testing the sign bit, can just test on high word.  */
5678           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5679                                       subreg_highpart_offset (word_mode,
5680                                                               int_mode));
5681           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5682                                  unsignedp, normalizep);
5683         }
5684       else
5685         tem = NULL_RTX;
5686
5687       if (tem)
5688         {
5689           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5690             return tem;
5691           if (!target)
5692             target = gen_reg_rtx (target_mode);
5693
5694           convert_move (target, tem,
5695                         !val_signbit_known_set_p (word_mode,
5696                                                   (normalizep ? normalizep
5697                                                    : STORE_FLAG_VALUE)));
5698           return target;
5699         }
5700     }
5701
5702   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5703      complement of A (for GE) and shifting the sign bit to the low bit.  */
5704   if (op1 == const0_rtx && (code == LT || code == GE)
5705       && is_int_mode (mode, &int_mode)
5706       && (normalizep || STORE_FLAG_VALUE == 1
5707           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5708     {
5709       scalar_int_mode int_target_mode;
5710       subtarget = target;
5711
5712       if (!target)
5713         int_target_mode = int_mode;
5714       else
5715         {
5716           /* If the result is to be wider than OP0, it is best to convert it
5717              first.  If it is to be narrower, it is *incorrect* to convert it
5718              first.  */
5719           int_target_mode = as_a <scalar_int_mode> (target_mode);
5720           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5721             {
5722               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5723               int_mode = int_target_mode;
5724             }
5725         }
5726
5727       if (int_target_mode != int_mode)
5728         subtarget = 0;
5729
5730       if (code == GE)
5731         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5732                            ((STORE_FLAG_VALUE == 1 || normalizep)
5733                             ? 0 : subtarget), 0);
5734
5735       if (STORE_FLAG_VALUE == 1 || normalizep)
5736         /* If we are supposed to produce a 0/1 value, we want to do
5737            a logical shift from the sign bit to the low-order bit; for
5738            a -1/0 value, we do an arithmetic shift.  */
5739         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5740                             GET_MODE_BITSIZE (int_mode) - 1,
5741                             subtarget, normalizep != -1);
5742
5743       if (int_mode != int_target_mode)
5744         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5745
5746       return op0;
5747     }
5748
5749   mclass = GET_MODE_CLASS (mode);
5750   FOR_EACH_MODE_FROM (compare_mode, mode)
5751     {
5752      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5753      icode = optab_handler (cstore_optab, optab_mode);
5754      if (icode != CODE_FOR_nothing)
5755         {
5756           do_pending_stack_adjust ();
5757           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5758                                  unsignedp, op0, op1, normalizep, target_mode);
5759           if (tem)
5760             return tem;
5761
5762           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5763             {
5764               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5765                                  unsignedp, op1, op0, normalizep, target_mode);
5766               if (tem)
5767                 return tem;
5768             }
5769           break;
5770         }
5771     }
5772
5773   return 0;
5774 }
5775
5776 /* Subroutine of emit_store_flag that handles cases in which the operands
5777    are scalar integers.  SUBTARGET is the target to use for temporary
5778    operations and TRUEVAL is the value to store when the condition is
5779    true.  All other arguments are as for emit_store_flag.  */
5780
5781 rtx
5782 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5783                      rtx op1, scalar_int_mode mode, int unsignedp,
5784                      int normalizep, rtx trueval)
5785 {
5786   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5787   rtx_insn *last = get_last_insn ();
5788
5789   /* If this is an equality comparison of integers, we can try to exclusive-or
5790      (or subtract) the two operands and use a recursive call to try the
5791      comparison with zero.  Don't do any of these cases if branches are
5792      very cheap.  */
5793
5794   if ((code == EQ || code == NE) && op1 != const0_rtx)
5795     {
5796       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5797                               OPTAB_WIDEN);
5798
5799       if (tem == 0)
5800         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5801                             OPTAB_WIDEN);
5802       if (tem != 0)
5803         tem = emit_store_flag (target, code, tem, const0_rtx,
5804                                mode, unsignedp, normalizep);
5805       if (tem != 0)
5806         return tem;
5807
5808       delete_insns_since (last);
5809     }
5810
5811   /* For integer comparisons, try the reverse comparison.  However, for
5812      small X and if we'd have anyway to extend, implementing "X != 0"
5813      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5814   rtx_code rcode = reverse_condition (code);
5815   if (can_compare_p (rcode, mode, ccp_store_flag)
5816       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5817             && code == NE
5818             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5819             && op1 == const0_rtx))
5820     {
5821       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5822                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5823
5824       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5825       if (want_add
5826           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5827                        optimize_insn_for_speed_p ()) == 0)
5828         {
5829           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5830                                        STORE_FLAG_VALUE, target_mode);
5831           if (tem != 0)
5832             tem = expand_binop (target_mode, add_optab, tem,
5833                                 gen_int_mode (normalizep, target_mode),
5834                                 target, 0, OPTAB_WIDEN);
5835           if (tem != 0)
5836             return tem;
5837         }
5838       else if (!want_add
5839                && rtx_cost (trueval, mode, XOR, 1,
5840                             optimize_insn_for_speed_p ()) == 0)
5841         {
5842           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5843                                        normalizep, target_mode);
5844           if (tem != 0)
5845             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5846                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5847           if (tem != 0)
5848             return tem;
5849         }
5850
5851       delete_insns_since (last);
5852     }
5853
5854   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5855      the constant zero.  Reject all other comparisons at this point.  Only
5856      do LE and GT if branches are expensive since they are expensive on
5857      2-operand machines.  */
5858
5859   if (op1 != const0_rtx
5860       || (code != EQ && code != NE
5861           && (BRANCH_COST (optimize_insn_for_speed_p (),
5862                            false) <= 1 || (code != LE && code != GT))))
5863     return 0;
5864
5865   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5866      do the necessary operation below.  */
5867
5868   rtx tem = 0;
5869
5870   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5871      the sign bit set.  */
5872
5873   if (code == LE)
5874     {
5875       /* This is destructive, so SUBTARGET can't be OP0.  */
5876       if (rtx_equal_p (subtarget, op0))
5877         subtarget = 0;
5878
5879       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5880                           OPTAB_WIDEN);
5881       if (tem)
5882         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5883                             OPTAB_WIDEN);
5884     }
5885
5886   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5887      number of bits in the mode of OP0, minus one.  */
5888
5889   if (code == GT)
5890     {
5891       if (rtx_equal_p (subtarget, op0))
5892         subtarget = 0;
5893
5894       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5895                                 GET_MODE_BITSIZE (mode) - 1,
5896                                 subtarget, 0);
5897       if (tem)
5898         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5899                             OPTAB_WIDEN);
5900     }
5901
5902   if (code == EQ || code == NE)
5903     {
5904       /* For EQ or NE, one way to do the comparison is to apply an operation
5905          that converts the operand into a positive number if it is nonzero
5906          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5907          for NE we negate.  This puts the result in the sign bit.  Then we
5908          normalize with a shift, if needed.
5909
5910          Two operations that can do the above actions are ABS and FFS, so try
5911          them.  If that doesn't work, and MODE is smaller than a full word,
5912          we can use zero-extension to the wider mode (an unsigned conversion)
5913          as the operation.  */
5914
5915       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5916          that is compensated by the subsequent overflow when subtracting
5917          one / negating.  */
5918
5919       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5920         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5921       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5922         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5923       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5924         {
5925           tem = convert_modes (word_mode, mode, op0, 1);
5926           mode = word_mode;
5927         }
5928
5929       if (tem != 0)
5930         {
5931           if (code == EQ)
5932             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5933                                 0, OPTAB_WIDEN);
5934           else
5935             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5936         }
5937
5938       /* If we couldn't do it that way, for NE we can "or" the two's complement
5939          of the value with itself.  For EQ, we take the one's complement of
5940          that "or", which is an extra insn, so we only handle EQ if branches
5941          are expensive.  */
5942
5943       if (tem == 0
5944           && (code == NE
5945               || BRANCH_COST (optimize_insn_for_speed_p (),
5946                               false) > 1))
5947         {
5948           if (rtx_equal_p (subtarget, op0))
5949             subtarget = 0;
5950
5951           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5952           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5953                               OPTAB_WIDEN);
5954
5955           if (tem && code == EQ)
5956             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5957         }
5958     }
5959
5960   if (tem && normalizep)
5961     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5962                               GET_MODE_BITSIZE (mode) - 1,
5963                               subtarget, normalizep == 1);
5964
5965   if (tem)
5966     {
5967       if (!target)
5968         ;
5969       else if (GET_MODE (tem) != target_mode)
5970         {
5971           convert_move (target, tem, 0);
5972           tem = target;
5973         }
5974       else if (!subtarget)
5975         {
5976           emit_move_insn (target, tem);
5977           tem = target;
5978         }
5979     }
5980   else
5981     delete_insns_since (last);
5982
5983   return tem;
5984 }
5985
5986 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5987    and storing in TARGET.  Normally return TARGET.
5988    Return 0 if that cannot be done.
5989
5990    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5991    it is VOIDmode, they cannot both be CONST_INT.
5992
5993    UNSIGNEDP is for the case where we have to widen the operands
5994    to perform the operation.  It says to use zero-extension.
5995
5996    NORMALIZEP is 1 if we should convert the result to be either zero
5997    or one.  Normalize is -1 if we should convert the result to be
5998    either zero or -1.  If NORMALIZEP is zero, the result will be left
5999    "raw" out of the scc insn.  */
6000
6001 rtx
6002 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6003                  machine_mode mode, int unsignedp, int normalizep)
6004 {
6005   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6006   enum rtx_code rcode;
6007   rtx subtarget;
6008   rtx tem, trueval;
6009   rtx_insn *last;
6010
6011   /* If we compare constants, we shouldn't use a store-flag operation,
6012      but a constant load.  We can get there via the vanilla route that
6013      usually generates a compare-branch sequence, but will in this case
6014      fold the comparison to a constant, and thus elide the branch.  */
6015   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6016     return NULL_RTX;
6017
6018   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6019                            target_mode);
6020   if (tem)
6021     return tem;
6022
6023   /* If we reached here, we can't do this with a scc insn, however there
6024      are some comparisons that can be done in other ways.  Don't do any
6025      of these cases if branches are very cheap.  */
6026   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6027     return 0;
6028
6029   /* See what we need to return.  We can only return a 1, -1, or the
6030      sign bit.  */
6031
6032   if (normalizep == 0)
6033     {
6034       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6035         normalizep = STORE_FLAG_VALUE;
6036
6037       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6038         ;
6039       else
6040         return 0;
6041     }
6042
6043   last = get_last_insn ();
6044
6045   /* If optimizing, use different pseudo registers for each insn, instead
6046      of reusing the same pseudo.  This leads to better CSE, but slows
6047      down the compiler, since there are more pseudos.  */
6048   subtarget = (!optimize
6049                && (target_mode == mode)) ? target : NULL_RTX;
6050   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6051
6052   /* For floating-point comparisons, try the reverse comparison or try
6053      changing the "orderedness" of the comparison.  */
6054   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6055     {
6056       enum rtx_code first_code;
6057       bool and_them;
6058
6059       rcode = reverse_condition_maybe_unordered (code);
6060       if (can_compare_p (rcode, mode, ccp_store_flag)
6061           && (code == ORDERED || code == UNORDERED
6062               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6063               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6064         {
6065           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6066                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6067
6068           /* For the reverse comparison, use either an addition or a XOR.  */
6069           if (want_add
6070               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6071                            optimize_insn_for_speed_p ()) == 0)
6072             {
6073               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6074                                        STORE_FLAG_VALUE, target_mode);
6075               if (tem)
6076                 return expand_binop (target_mode, add_optab, tem,
6077                                      gen_int_mode (normalizep, target_mode),
6078                                      target, 0, OPTAB_WIDEN);
6079             }
6080           else if (!want_add
6081                    && rtx_cost (trueval, mode, XOR, 1,
6082                                 optimize_insn_for_speed_p ()) == 0)
6083             {
6084               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6085                                        normalizep, target_mode);
6086               if (tem)
6087                 return expand_binop (target_mode, xor_optab, tem, trueval,
6088                                      target, INTVAL (trueval) >= 0,
6089                                      OPTAB_WIDEN);
6090             }
6091         }
6092
6093       delete_insns_since (last);
6094
6095       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6096       if (code == ORDERED || code == UNORDERED)
6097         return 0;
6098
6099       and_them = split_comparison (code, mode, &first_code, &code);
6100
6101       /* If there are no NaNs, the first comparison should always fall through.
6102          Effectively change the comparison to the other one.  */
6103       if (!HONOR_NANS (mode))
6104         {
6105           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6106           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6107                                     target_mode);
6108         }
6109
6110       if (!HAVE_conditional_move)
6111         return 0;
6112
6113       /* Do not turn a trapping comparison into a non-trapping one.  */
6114       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6115           && flag_trapping_math)
6116         return 0;
6117
6118       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6119          conditional move.  */
6120       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6121                                normalizep, target_mode);
6122       if (tem == 0)
6123         return 0;
6124
6125       if (and_them)
6126         tem = emit_conditional_move (target, code, op0, op1, mode,
6127                                      tem, const0_rtx, GET_MODE (tem), 0);
6128       else
6129         tem = emit_conditional_move (target, code, op0, op1, mode,
6130                                      trueval, tem, GET_MODE (tem), 0);
6131
6132       if (tem == 0)
6133         delete_insns_since (last);
6134       return tem;
6135     }
6136
6137   /* The remaining tricks only apply to integer comparisons.  */
6138
6139   scalar_int_mode int_mode;
6140   if (is_int_mode (mode, &int_mode))
6141     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6142                                 unsignedp, normalizep, trueval);
6143
6144   return 0;
6145 }
6146
6147 /* Like emit_store_flag, but always succeeds.  */
6148
6149 rtx
6150 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6151                        machine_mode mode, int unsignedp, int normalizep)
6152 {
6153   rtx tem;
6154   rtx_code_label *label;
6155   rtx trueval, falseval;
6156
6157   /* First see if emit_store_flag can do the job.  */
6158   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6159   if (tem != 0)
6160     return tem;
6161
6162   /* If one operand is constant, make it the second one.  Only do this
6163      if the other operand is not constant as well.  */
6164   if (swap_commutative_operands_p (op0, op1))
6165     {
6166       std::swap (op0, op1);
6167       code = swap_condition (code);
6168     }
6169
6170   if (mode == VOIDmode)
6171     mode = GET_MODE (op0);
6172
6173   if (!target)
6174     target = gen_reg_rtx (word_mode);
6175
6176   /* If this failed, we have to do this with set/compare/jump/set code.
6177      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6178   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6179   if (code == NE
6180       && GET_MODE_CLASS (mode) == MODE_INT
6181       && REG_P (target)
6182       && op0 == target
6183       && op1 == const0_rtx)
6184     {
6185       label = gen_label_rtx ();
6186       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6187                                NULL_RTX, NULL, label,
6188                                profile_probability::uninitialized ());
6189       emit_move_insn (target, trueval);
6190       emit_label (label);
6191       return target;
6192     }
6193
6194   if (!REG_P (target)
6195       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6196     target = gen_reg_rtx (GET_MODE (target));
6197
6198   /* Jump in the right direction if the target cannot implement CODE
6199      but can jump on its reverse condition.  */
6200   falseval = const0_rtx;
6201   if (! can_compare_p (code, mode, ccp_jump)
6202       && (! FLOAT_MODE_P (mode)
6203           || code == ORDERED || code == UNORDERED
6204           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6205           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6206     {
6207       enum rtx_code rcode;
6208       if (FLOAT_MODE_P (mode))
6209         rcode = reverse_condition_maybe_unordered (code);
6210       else
6211         rcode = reverse_condition (code);
6212
6213       /* Canonicalize to UNORDERED for the libcall.  */
6214       if (can_compare_p (rcode, mode, ccp_jump)
6215           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6216         {
6217           falseval = trueval;
6218           trueval = const0_rtx;
6219           code = rcode;
6220         }
6221     }
6222
6223   emit_move_insn (target, trueval);
6224   label = gen_label_rtx ();
6225   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6226                            label, profile_probability::uninitialized ());
6227
6228   emit_move_insn (target, falseval);
6229   emit_label (label);
6230
6231   return target;
6232 }
6233
6234 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6235    and exclusive ranges in order to create an equivalent comparison.  See
6236    canonicalize_cmp_for_target for the possible cases.  */
6237
6238 static enum rtx_code
6239 equivalent_cmp_code (enum rtx_code code)
6240 {
6241   switch (code)
6242     {
6243     case GT:
6244       return GE;
6245     case GE:
6246       return GT;
6247     case LT:
6248       return LE;
6249     case LE:
6250       return LT;
6251     case GTU:
6252       return GEU;
6253     case GEU:
6254       return GTU;
6255     case LTU:
6256       return LEU;
6257     case LEU:
6258       return LTU;
6259
6260     default:
6261       return code;
6262     }
6263 }
6264
6265 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6266    purpose of this is to end up with an immediate which can be loaded into a
6267    register in fewer moves, if possible.
6268
6269    For each integer comparison there exists an equivalent choice:
6270      i)   a >  b or a >= b + 1
6271      ii)  a <= b or a <  b + 1
6272      iii) a >= b or a >  b - 1
6273      iv)  a <  b or a <= b - 1
6274
6275    MODE is the mode of the first operand.
6276    CODE points to the comparison code.
6277    IMM points to the rtx containing the immediate.  *IMM must satisfy
6278    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6279    on exit.  */
6280
6281 void
6282 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6283 {
6284   if (!SCALAR_INT_MODE_P (mode))
6285     return;
6286
6287   int to_add = 0;
6288   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6289
6290   /* Extract the immediate value from the rtx.  */
6291   wide_int imm_val = rtx_mode_t (*imm, mode);
6292
6293   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6294     to_add = 1;
6295   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6296     to_add = -1;
6297   else
6298     return;
6299
6300   /* Check for overflow/underflow in the case of signed values and
6301      wrapping around in the case of unsigned values.  If any occur
6302      cancel the optimization.  */
6303   wi::overflow_type overflow = wi::OVF_NONE;
6304   wide_int imm_modif;
6305
6306   if (to_add == 1)
6307     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6308   else
6309     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6310
6311   if (overflow)
6312     return;
6313
6314   /* The following creates a pseudo; if we cannot do that, bail out.  */
6315   if (!can_create_pseudo_p ())
6316     return;
6317
6318   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6319   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6320
6321   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6322   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6323
6324   /* Update the immediate and the code.  */
6325   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6326     {
6327       *code = equivalent_cmp_code (*code);
6328       *imm = new_imm;
6329     }
6330 }
6331
6332
6333 \f
6334 /* Perform possibly multi-word comparison and conditional jump to LABEL
6335    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6336    now a thin wrapper around do_compare_rtx_and_jump.  */
6337
6338 static void
6339 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6340                  rtx_code_label *label)
6341 {
6342   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6343   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6344                            NULL, label, profile_probability::uninitialized ());
6345 }