gcc/expmed.cc

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2024 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46 #include "recog.h"
  47
  48 struct target_expmed default_target_expmed;
  49 #if SWITCHABLE_TARGET
  50 struct target_expmed *this_target_expmed = &default_target_expmed;
  51 #endif
  52
  53 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  54                                       unsigned HOST_WIDE_INT,
  55                                       unsigned HOST_WIDE_INT,
  56                                       poly_uint64, poly_uint64,
  57                                       machine_mode, rtx, bool, bool);
  58 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  59                                    unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    poly_uint64, poly_uint64,
  62                                    rtx, scalar_int_mode, bool);
  63 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  64                                      unsigned HOST_WIDE_INT,
  65                                      unsigned HOST_WIDE_INT,
  66                                      rtx, scalar_int_mode, bool);
  67 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  68                                    unsigned HOST_WIDE_INT,
  69                                    unsigned HOST_WIDE_INT,
  70                                    poly_uint64, poly_uint64,
  71                                    rtx, scalar_int_mode, bool);
  72 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  73                                        unsigned HOST_WIDE_INT,
  74                                        unsigned HOST_WIDE_INT, int, rtx,
  75                                        machine_mode, machine_mode, bool, bool);
  76 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  77                                     unsigned HOST_WIDE_INT,
  78                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  79 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  80                                       unsigned HOST_WIDE_INT,
  81                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  82 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  83 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  84                                     unsigned HOST_WIDE_INT,
  85                                     unsigned HOST_WIDE_INT, int, bool);
  86 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  87 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  89
  90 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  91    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  92    The mask is truncated if necessary to the width of mode MODE.  The
  93    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  94
  95 static inline rtx
  96 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  97 {
  98   return immed_wide_int_const
  99     (wi::shifted_mask (bitpos, bitsize, complement,
 100                        GET_MODE_PRECISION (mode)), mode);
 101 }
 102
 103 /* Test whether a value is zero of a power of two.  */
 104 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 105   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 106
 107 struct init_expmed_rtl
 108 {
 109   rtx reg;
 110   rtx plus;
 111   rtx neg;
 112   rtx mult;
 113   rtx sdiv;
 114   rtx udiv;
 115   rtx sdiv_32;
 116   rtx smod_32;
 117   rtx wide_mult;
 118   rtx wide_lshr;
 119   rtx wide_trunc;
 120   rtx shift;
 121   rtx shift_mult;
 122   rtx shift_add;
 123   rtx shift_sub0;
 124   rtx shift_sub1;
 125   rtx zext;
 126   rtx trunc;
 127
 128   rtx pow2[MAX_BITS_PER_WORD];
 129   rtx cint[MAX_BITS_PER_WORD];
 130 };
 131
 132 static void
 133 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 134                       scalar_int_mode from_mode, bool speed)
 135 {
 136   int to_size, from_size;
 137   rtx which;
 138
 139   to_size = GET_MODE_PRECISION (to_mode);
 140   from_size = GET_MODE_PRECISION (from_mode);
 141
 142   /* Most partial integers have a precision less than the "full"
 143      integer it requires for storage.  In case one doesn't, for
 144      comparison purposes here, reduce the bit size by one in that
 145      case.  */
 146   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 147       && pow2p_hwi (to_size))
 148     to_size --;
 149   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 150       && pow2p_hwi (from_size))
 151     from_size --;
 152
 153   /* Assume cost of zero-extend and sign-extend is the same.  */
 154   which = (to_size < from_size ? all->trunc : all->zext);
 155
 156   PUT_MODE (all->reg, from_mode);
 157   set_convert_cost (to_mode, from_mode, speed,
 158                     set_src_cost (which, to_mode, speed));
 159   /* Restore all->reg's mode.  */
 160   PUT_MODE (all->reg, to_mode);
 161 }
 162
 163 static void
 164 init_expmed_one_mode (struct init_expmed_rtl *all,
 165                       machine_mode mode, int speed)
 166 {
 167   int m, n, mode_bitsize;
 168   machine_mode mode_from;
 169
 170   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 171
 172   PUT_MODE (all->reg, mode);
 173   PUT_MODE (all->plus, mode);
 174   PUT_MODE (all->neg, mode);
 175   PUT_MODE (all->mult, mode);
 176   PUT_MODE (all->sdiv, mode);
 177   PUT_MODE (all->udiv, mode);
 178   PUT_MODE (all->sdiv_32, mode);
 179   PUT_MODE (all->smod_32, mode);
 180   PUT_MODE (all->wide_trunc, mode);
 181   PUT_MODE (all->shift, mode);
 182   PUT_MODE (all->shift_mult, mode);
 183   PUT_MODE (all->shift_add, mode);
 184   PUT_MODE (all->shift_sub0, mode);
 185   PUT_MODE (all->shift_sub1, mode);
 186   PUT_MODE (all->zext, mode);
 187   PUT_MODE (all->trunc, mode);
 188
 189   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 190   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 191   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 192   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 193   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 194
 195   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 196                                      <= 2 * add_cost (speed, mode)));
 197   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 198                                      <= 4 * add_cost (speed, mode)));
 199
 200   set_shift_cost (speed, mode, 0, 0);
 201   {
 202     int cost = add_cost (speed, mode);
 203     set_shiftadd_cost (speed, mode, 0, cost);
 204     set_shiftsub0_cost (speed, mode, 0, cost);
 205     set_shiftsub1_cost (speed, mode, 0, cost);
 206   }
 207
 208   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 209   for (m = 1; m < n; m++)
 210     {
 211       XEXP (all->shift, 1) = all->cint[m];
 212       XEXP (all->shift_mult, 1) = all->pow2[m];
 213
 214       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 215       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 216                                                        speed));
 217       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 218                                                         speed));
 219       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 220                                                         speed));
 221     }
 222
 223   scalar_int_mode int_mode_to;
 224   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 225     {
 226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 227            mode_from = (machine_mode)(mode_from + 1))
 228         init_expmed_one_conv (all, int_mode_to,
 229                               as_a <scalar_int_mode> (mode_from), speed);
 230
 231       scalar_int_mode wider_mode;
 232       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 233           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 234         {
 235           PUT_MODE (all->reg, mode);
 236           PUT_MODE (all->zext, wider_mode);
 237           PUT_MODE (all->wide_mult, wider_mode);
 238           PUT_MODE (all->wide_lshr, wider_mode);
 239           XEXP (all->wide_lshr, 1)
 240             = gen_int_shift_amount (wider_mode, mode_bitsize);
 241
 242           set_mul_widen_cost (speed, wider_mode,
 243                               set_src_cost (all->wide_mult, wider_mode, speed));
 244           set_mul_highpart_cost (speed, int_mode_to,
 245                                  set_src_cost (all->wide_trunc,
 246                                                int_mode_to, speed));
 247         }
 248     }
 249 }
 250
 251 void
 252 init_expmed (void)
 253 {
 254   struct init_expmed_rtl all;
 255   machine_mode mode = QImode;
 256   int m, speed;
 257
 258   memset (&all, 0, sizeof all);
 259   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 260     {
 261       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 262       all.cint[m] = GEN_INT (m);
 263     }
 264
 265   /* Avoid using hard regs in ways which may be unsupported.  */
 266   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 267   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 268   all.neg = gen_rtx_NEG (mode, all.reg);
 269   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 270   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 271   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 272   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 273   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 274   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 275   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 276   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 277   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 278   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 279   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 280   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 282   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 283   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 284
 285   for (speed = 0; speed < 2; speed++)
 286     {
 287       crtl->maybe_hot_insn_p = speed;
 288       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 289
 290       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 291            mode = (machine_mode)(mode + 1))
 292         init_expmed_one_mode (&all, mode, speed);
 293
 294       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 295         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 296              mode = (machine_mode)(mode + 1))
 297           init_expmed_one_mode (&all, mode, speed);
 298
 299       if (MIN_MODE_VECTOR_INT != VOIDmode)
 300         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 301              mode = (machine_mode)(mode + 1))
 302           init_expmed_one_mode (&all, mode, speed);
 303     }
 304
 305   if (alg_hash_used_p ())
 306     {
 307       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 308       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 309     }
 310   else
 311     set_alg_hash_used_p (true);
 312   default_rtl_profile ();
 313
 314   ggc_free (all.trunc);
 315   ggc_free (all.shift_sub1);
 316   ggc_free (all.shift_sub0);
 317   ggc_free (all.shift_add);
 318   ggc_free (all.shift_mult);
 319   ggc_free (all.shift);
 320   ggc_free (all.wide_trunc);
 321   ggc_free (all.wide_lshr);
 322   ggc_free (all.wide_mult);
 323   ggc_free (all.zext);
 324   ggc_free (all.smod_32);
 325   ggc_free (all.sdiv_32);
 326   ggc_free (all.udiv);
 327   ggc_free (all.sdiv);
 328   ggc_free (all.mult);
 329   ggc_free (all.neg);
 330   ggc_free (all.plus);
 331   ggc_free (all.reg);
 332 }
 333
 334 /* Return an rtx representing minus the value of X.
 335    MODE is the intended mode of the result,
 336    useful if X is a CONST_INT.  */
 337
 338 rtx
 339 negate_rtx (machine_mode mode, rtx x)
 340 {
 341   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 342
 343   if (result == 0)
 344     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 345
 346   return result;
 347 }
 348
 349 /* Whether reverse storage order is supported on the target.  */
 350 static int reverse_storage_order_supported = -1;
 351
 352 /* Check whether reverse storage order is supported on the target.  */
 353
 354 static void
 355 check_reverse_storage_order_support (void)
 356 {
 357   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 358     {
 359       reverse_storage_order_supported = 0;
 360       sorry ("reverse scalar storage order");
 361     }
 362   else
 363     reverse_storage_order_supported = 1;
 364 }
 365
 366 /* Whether reverse FP storage order is supported on the target.  */
 367 static int reverse_float_storage_order_supported = -1;
 368
 369 /* Check whether reverse FP storage order is supported on the target.  */
 370
 371 static void
 372 check_reverse_float_storage_order_support (void)
 373 {
 374   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 375     {
 376       reverse_float_storage_order_supported = 0;
 377       sorry ("reverse floating-point scalar storage order");
 378     }
 379   else
 380     reverse_float_storage_order_supported = 1;
 381 }
 382
 383 /* Return an rtx representing value of X with reverse storage order.
 384    MODE is the intended mode of the result,
 385    useful if X is a CONST_INT.  */
 386
 387 rtx
 388 flip_storage_order (machine_mode mode, rtx x)
 389 {
 390   scalar_int_mode int_mode;
 391   rtx result;
 392
 393   if (mode == QImode)
 394     return x;
 395
 396   if (COMPLEX_MODE_P (mode))
 397     {
 398       rtx real = read_complex_part (x, false);
 399       rtx imag = read_complex_part (x, true);
 400
 401       real = flip_storage_order (GET_MODE_INNER (mode), real);
 402       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 403
 404       return gen_rtx_CONCAT (mode, real, imag);
 405     }
 406
 407   if (UNLIKELY (reverse_storage_order_supported < 0))
 408     check_reverse_storage_order_support ();
 409
 410   if (!is_a <scalar_int_mode> (mode, &int_mode))
 411     {
 412       if (FLOAT_MODE_P (mode)
 413           && UNLIKELY (reverse_float_storage_order_supported < 0))
 414         check_reverse_float_storage_order_support ();
 415
 416       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 417           || !targetm.scalar_mode_supported_p (int_mode))
 418         {
 419           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 420           return x;
 421         }
 422       x = gen_lowpart (int_mode, x);
 423     }
 424
 425   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 426   if (result == 0)
 427     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 428
 429   if (int_mode != mode)
 430     result = gen_lowpart (mode, result);
 431
 432   return result;
 433 }
 434
 435 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 436    first unit of mode MODE that contains a bitfield of size BITSIZE at
 437    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 438    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 439    of the field within the new memory.  */
 440
 441 static rtx
 442 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 443                       unsigned HOST_WIDE_INT bitsize,
 444                       unsigned HOST_WIDE_INT bitnum,
 445                       unsigned HOST_WIDE_INT *new_bitnum)
 446 {
 447   scalar_int_mode imode;
 448   if (mode.exists (&imode))
 449     {
 450       unsigned int unit = GET_MODE_BITSIZE (imode);
 451       *new_bitnum = bitnum % unit;
 452       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 453       return adjust_bitfield_address (mem, imode, offset);
 454     }
 455   else
 456     {
 457       *new_bitnum = bitnum % BITS_PER_UNIT;
 458       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 459       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 460                             / BITS_PER_UNIT);
 461       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 462     }
 463 }
 464
 465 /* The caller wants to perform insertion or extraction PATTERN on a
 466    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 467    BITREGION_START and BITREGION_END are as for store_bit_field
 468    and FIELDMODE is the natural mode of the field.
 469
 470    Search for a mode that is compatible with the memory access
 471    restrictions and (where applicable) with a register insertion or
 472    extraction.  Return the new memory on success, storing the adjusted
 473    bit position in *NEW_BITNUM.  Return null otherwise.  */
 474
 475 static rtx
 476 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 477                               rtx op0, HOST_WIDE_INT bitsize,
 478                               HOST_WIDE_INT bitnum,
 479                               poly_uint64 bitregion_start,
 480                               poly_uint64 bitregion_end,
 481                               machine_mode fieldmode,
 482                               unsigned HOST_WIDE_INT *new_bitnum)
 483 {
 484   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 485                                 bitregion_end, MEM_ALIGN (op0),
 486                                 MEM_VOLATILE_P (op0));
 487   scalar_int_mode best_mode;
 488   if (iter.next_mode (&best_mode))
 489     {
 490       /* We can use a memory in BEST_MODE.  See whether this is true for
 491          any wider modes.  All other things being equal, we prefer to
 492          use the widest mode possible because it tends to expose more
 493          CSE opportunities.  */
 494       if (!iter.prefer_smaller_modes ())
 495         {
 496           /* Limit the search to the mode required by the corresponding
 497              register insertion or extraction instruction, if any.  */
 498           scalar_int_mode limit_mode = word_mode;
 499           extraction_insn insn;
 500           if (get_best_reg_extraction_insn (&insn, pattern,
 501                                             GET_MODE_BITSIZE (best_mode),
 502                                             fieldmode))
 503             limit_mode = insn.field_mode;
 504
 505           scalar_int_mode wider_mode;
 506           while (iter.next_mode (&wider_mode)
 507                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 508             best_mode = wider_mode;
 509         }
 510       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 511                                    new_bitnum);
 512     }
 513   return NULL_RTX;
 514 }
 515
 516 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 517    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 518    offset is then BITNUM / BITS_PER_UNIT.  */
 519
 520 static bool
 521 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 522                      machine_mode struct_mode)
 523 {
 524   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 525   if (BYTES_BIG_ENDIAN)
 526     return (multiple_p (bitnum, BITS_PER_UNIT)
 527             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 528                 || multiple_p (bitnum + bitsize,
 529                                regsize * BITS_PER_UNIT)));
 530   else
 531     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 532 }
 533
 534 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 535    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 536    Return false if the access would touch memory outside the range
 537    BITREGION_START to BITREGION_END for conformance to the C++ memory
 538    model.  */
 539
 540 static bool
 541 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 542                             unsigned HOST_WIDE_INT bitnum,
 543                             scalar_int_mode fieldmode,
 544                             poly_uint64 bitregion_start,
 545                             poly_uint64 bitregion_end)
 546 {
 547   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 548
 549   /* -fstrict-volatile-bitfields must be enabled and we must have a
 550      volatile MEM.  */
 551   if (!MEM_P (op0)
 552       || !MEM_VOLATILE_P (op0)
 553       || flag_strict_volatile_bitfields <= 0)
 554     return false;
 555
 556   /* The bit size must not be larger than the field mode, and
 557      the field mode must not be larger than a word.  */
 558   if (bitsize > modesize || modesize > BITS_PER_WORD)
 559     return false;
 560
 561   /* Check for cases of unaligned fields that must be split.  */
 562   if (bitnum % modesize + bitsize > modesize)
 563     return false;
 564
 565   /* The memory must be sufficiently aligned for a MODESIZE access.
 566      This condition guarantees, that the memory access will not
 567      touch anything after the end of the structure.  */
 568   if (MEM_ALIGN (op0) < modesize)
 569     return false;
 570
 571   /* Check for cases where the C++ memory model applies.  */
 572   if (maybe_ne (bitregion_end, 0U)
 573       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 574           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 575                        bitregion_end)))
 576     return false;
 577
 578   return true;
 579 }
 580
 581 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 582    bit number BITNUM can be treated as a simple value of mode MODE.
 583    Store the byte offset in *BYTENUM if so.  */
 584
 585 static bool
 586 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 587                        machine_mode mode, poly_uint64 *bytenum)
 588 {
 589   return (MEM_P (op0)
 590           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 591           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 592           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 593               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 594                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 595 }
 596 \f
 597 /* Try to use instruction INSV to store VALUE into a field of OP0.
 598    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 599    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 600    are as for store_bit_field.  */
 601
 602 static bool
 603 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 604                             opt_scalar_int_mode op0_mode,
 605                             unsigned HOST_WIDE_INT bitsize,
 606                             unsigned HOST_WIDE_INT bitnum,
 607                             rtx value, scalar_int_mode value_mode)
 608 {
 609   class expand_operand ops[4];
 610   rtx value1;
 611   rtx xop0 = op0;
 612   rtx_insn *last = get_last_insn ();
 613   bool copy_back = false;
 614
 615   scalar_int_mode op_mode = insv->field_mode;
 616   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 617   if (bitsize == 0 || bitsize > unit)
 618     return false;
 619
 620   if (MEM_P (xop0))
 621     /* Get a reference to the first byte of the field.  */
 622     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 623                                  &bitnum);
 624   else
 625     {
 626       /* Convert from counting within OP0 to counting in OP_MODE.  */
 627       if (BYTES_BIG_ENDIAN)
 628         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 629
 630       /* If xop0 is a register, we need it in OP_MODE
 631          to make it acceptable to the format of insv.  */
 632       if (GET_CODE (xop0) == SUBREG)
 633         {
 634           /* If such a SUBREG can't be created, give up.  */
 635           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 636                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 637             return false;
 638           /* We can't just change the mode, because this might clobber op0,
 639              and we will need the original value of op0 if insv fails.  */
 640           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 641                                  SUBREG_BYTE (xop0));
 642         }
 643       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 644         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 645     }
 646
 647   /* If the destination is a paradoxical subreg such that we need a
 648      truncate to the inner mode, perform the insertion on a temporary and
 649      truncate the result to the original destination.  Note that we can't
 650      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 651      X) 0)) is (reg:N X).  */
 652   if (GET_CODE (xop0) == SUBREG
 653       && REG_P (SUBREG_REG (xop0))
 654       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 655                                          op_mode))
 656     {
 657       rtx tem = gen_reg_rtx (op_mode);
 658       emit_move_insn (tem, xop0);
 659       xop0 = tem;
 660       copy_back = true;
 661     }
 662
 663   /* There are similar overflow check at the start of store_bit_field_1,
 664      but that only check the situation where the field lies completely
 665      outside the register, while there do have situation where the field
 666      lies partialy in the register, we need to adjust bitsize for this
 667      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 668      will broken on those arch support bit insert instruction, like arm, aarch64
 669      etc.  */
 670   if (bitsize + bitnum > unit && bitnum < unit)
 671     {
 672       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 673                "destination object, data truncated into %wu-bit",
 674                bitsize, unit - bitnum);
 675       bitsize = unit - bitnum;
 676     }
 677
 678   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 679      "backwards" from the size of the unit we are inserting into.
 680      Otherwise, we count bits from the most significant on a
 681      BYTES/BITS_BIG_ENDIAN machine.  */
 682
 683   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 684     bitnum = unit - bitsize - bitnum;
 685
 686   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 687   value1 = value;
 688   if (value_mode != op_mode)
 689     {
 690       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 691         {
 692           rtx tmp;
 693           /* Optimization: Don't bother really extending VALUE
 694              if it has all the bits we will actually use.  However,
 695              if we must narrow it, be sure we do it correctly.  */
 696
 697           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 698             {
 699               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 700               if (! tmp)
 701                 tmp = simplify_gen_subreg (op_mode,
 702                                            force_reg (value_mode, value1),
 703                                            value_mode, 0);
 704             }
 705           else
 706             {
 707               if (targetm.mode_rep_extended (op_mode, value_mode) != UNKNOWN)
 708                 tmp = simplify_gen_unary (TRUNCATE, op_mode,
 709                                           value1, value_mode);
 710               else
 711                 {
 712                   tmp = gen_lowpart_if_possible (op_mode, value1);
 713                   if (! tmp)
 714                     tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 715                 }
 716             }
 717           value1 = tmp;
 718         }
 719       else if (CONST_INT_P (value))
 720         value1 = gen_int_mode (INTVAL (value), op_mode);
 721       else
 722         /* Parse phase is supposed to make VALUE's data type
 723            match that of the component reference, which is a type
 724            at least as wide as the field; so VALUE should have
 725            a mode that corresponds to that type.  */
 726         gcc_assert (CONSTANT_P (value));
 727     }
 728
 729   create_fixed_operand (&ops[0], xop0);
 730   create_integer_operand (&ops[1], bitsize);
 731   create_integer_operand (&ops[2], bitnum);
 732   create_input_operand (&ops[3], value1, op_mode);
 733   if (maybe_expand_insn (insv->icode, 4, ops))
 734     {
 735       if (copy_back)
 736         convert_move (op0, xop0, true);
 737       return true;
 738     }
 739   delete_insns_since (last);
 740   return false;
 741 }
 742
 743 /* A subroutine of store_bit_field, with the same arguments.  Return true
 744    if the operation could be implemented.
 745
 746    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 747    no other way of implementing the operation.  If FALLBACK_P is false,
 748    return false instead.
 749
 750    if UNDEFINED_P is true then STR_RTX is undefined and may be set using
 751    a subreg instead.  */
 752
 753 static bool
 754 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 755                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 756                    machine_mode fieldmode,
 757                    rtx value, bool reverse, bool fallback_p, bool undefined_p)
 758 {
 759   rtx op0 = str_rtx;
 760
 761   while (GET_CODE (op0) == SUBREG)
 762     {
 763       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 764       op0 = SUBREG_REG (op0);
 765     }
 766
 767   /* No action is needed if the target is a register and if the field
 768      lies completely outside that register.  This can occur if the source
 769      code contains an out-of-bounds access to a small array.  */
 770   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 771     return true;
 772
 773   /* Use vec_set patterns for inserting parts of vectors whenever
 774      available.  */
 775   machine_mode outermode = GET_MODE (op0);
 776   scalar_mode innermode = GET_MODE_INNER (outermode);
 777   poly_uint64 pos;
 778   if (VECTOR_MODE_P (outermode)
 779       && !MEM_P (op0)
 780       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 781       && fieldmode == innermode
 782       && known_eq (bitsize, GET_MODE_PRECISION (innermode))
 783       && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
 784     {
 785       class expand_operand ops[3];
 786       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 787
 788       create_fixed_operand (&ops[0], op0);
 789       create_input_operand (&ops[1], value, innermode);
 790       create_integer_operand (&ops[2], pos);
 791       if (maybe_expand_insn (icode, 3, ops))
 792         return true;
 793     }
 794
 795   /* If the target is a register, overwriting the entire object, or storing
 796      a full-word or multi-word field can be done with just a SUBREG.  */
 797   if (!MEM_P (op0)
 798       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 799     {
 800       /* Use the subreg machinery either to narrow OP0 to the required
 801          words or to cope with mode punning between equal-sized modes.
 802          In the latter case, use subreg on the rhs side, not lhs.  */
 803       rtx sub;
 804       poly_uint64 bytenum;
 805       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 806       if (known_eq (bitnum, 0U)
 807           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 808         {
 809           sub = force_subreg (GET_MODE (op0), value, fieldmode, 0);
 810           if (sub)
 811             {
 812               if (reverse)
 813                 sub = flip_storage_order (GET_MODE (op0), sub);
 814               emit_move_insn (op0, sub);
 815               return true;
 816             }
 817         }
 818       else if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
 819                && (undefined_p
 820                    || (multiple_p (bitnum, regsize * BITS_PER_UNIT)
 821                        && multiple_p (bitsize, regsize * BITS_PER_UNIT)))
 822                && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
 823         {
 824           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), bytenum);
 825           if (sub)
 826             {
 827               if (reverse)
 828                 value = flip_storage_order (fieldmode, value);
 829               emit_move_insn (sub, value);
 830               return true;
 831             }
 832         }
 833     }
 834
 835   /* If the target is memory, storing any naturally aligned field can be
 836      done with a simple store.  For targets that support fast unaligned
 837      memory, any naturally sized, unit aligned field can be done directly.  */
 838   poly_uint64 bytenum;
 839   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 840     {
 841       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 842       if (reverse)
 843         value = flip_storage_order (fieldmode, value);
 844       emit_move_insn (op0, value);
 845       return true;
 846     }
 847
 848   /* It's possible we'll need to handle other cases here for
 849      polynomial bitnum and bitsize.  */
 850
 851   /* From here on we need to be looking at a fixed-size insertion.  */
 852   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 853   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 854
 855   /* Make sure we are playing with integral modes.  Pun with subregs
 856      if we aren't.  This must come after the entire register case above,
 857      since that case is valid for any mode.  The following cases are only
 858      valid for integral modes.  */
 859   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 860   scalar_int_mode imode;
 861   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 862     {
 863       if (MEM_P (op0))
 864         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 865                                             0, MEM_SIZE (op0));
 866       else if (!op0_mode.exists ())
 867         {
 868           if (ibitnum == 0
 869               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 870               && MEM_P (value)
 871               && !reverse)
 872             {
 873               value = adjust_address (value, GET_MODE (op0), 0);
 874               emit_move_insn (op0, value);
 875               return true;
 876             }
 877           if (!fallback_p)
 878             return false;
 879           rtx temp = assign_stack_temp (GET_MODE (op0),
 880                                         GET_MODE_SIZE (GET_MODE (op0)));
 881           emit_move_insn (temp, op0);
 882           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 883                              reverse, fallback_p, undefined_p);
 884           emit_move_insn (op0, temp);
 885           return true;
 886         }
 887       else
 888         op0 = gen_lowpart (op0_mode.require (), op0);
 889     }
 890
 891   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 892                                    bitregion_start, bitregion_end,
 893                                    fieldmode, value, reverse, fallback_p);
 894 }
 895
 896 /* Subroutine of store_bit_field_1, with the same arguments, except
 897    that BITSIZE and BITNUM are constant.  Handle cases specific to
 898    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 899    otherwise OP0 is a BLKmode MEM.  */
 900
 901 static bool
 902 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 903                           unsigned HOST_WIDE_INT bitsize,
 904                           unsigned HOST_WIDE_INT bitnum,
 905                           poly_uint64 bitregion_start,
 906                           poly_uint64 bitregion_end,
 907                           machine_mode fieldmode,
 908                           rtx value, bool reverse, bool fallback_p)
 909 {
 910   /* Storing an lsb-aligned field in a register
 911      can be done with a movstrict instruction.  */
 912
 913   if (!MEM_P (op0)
 914       && !reverse
 915       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 916       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 917       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 918     {
 919       class expand_operand ops[2];
 920       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 921       rtx arg0 = op0;
 922       unsigned HOST_WIDE_INT subreg_off;
 923
 924       if (GET_CODE (arg0) == SUBREG)
 925         {
 926           /* Else we've got some float mode source being extracted into
 927              a different float mode destination -- this combination of
 928              subregs results in Severe Tire Damage.  */
 929           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 930                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 931                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 932           arg0 = SUBREG_REG (arg0);
 933         }
 934
 935       subreg_off = bitnum / BITS_PER_UNIT;
 936       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
 937           /* STRICT_LOW_PART must have a non-paradoxical subreg as
 938              operand.  */
 939           && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
 940         {
 941           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 942
 943           create_fixed_operand (&ops[0], arg0);
 944           /* Shrink the source operand to FIELDMODE.  */
 945           create_convert_operand_to (&ops[1], value, fieldmode, false);
 946           if (maybe_expand_insn (icode, 2, ops))
 947             return true;
 948         }
 949     }
 950
 951   /* Handle fields bigger than a word.  */
 952
 953   if (bitsize > BITS_PER_WORD)
 954     {
 955       /* Here we transfer the words of the field
 956          in the order least significant first.
 957          This is because the most significant word is the one which may
 958          be less than full.
 959          However, only do that if the value is not BLKmode.  */
 960
 961       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 962       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 963       rtx_insn *last;
 964
 965       /* This is the mode we must force value to, so that there will be enough
 966          subwords to extract.  Note that fieldmode will often (always?) be
 967          VOIDmode, because that is what store_field uses to indicate that this
 968          is a bit field, but passing VOIDmode to operand_subword_force
 969          is not allowed.
 970
 971          The mode must be fixed-size, since insertions into variable-sized
 972          objects are meant to be handled before calling this function.  */
 973       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 974       if (value_mode == VOIDmode)
 975         value_mode
 976           = smallest_int_mode_for_size (nwords * BITS_PER_WORD).require ();
 977
 978       last = get_last_insn ();
 979       for (int i = 0; i < nwords; i++)
 980         {
 981           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 982              except maybe for the last iteration.  */
 983           const unsigned HOST_WIDE_INT new_bitsize
 984             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 985           /* Bit offset from the starting bit number in the target.  */
 986           const unsigned int bit_offset
 987             = backwards ^ reverse
 988               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 989               : i * BITS_PER_WORD;
 990
 991           /* No further action is needed if the target is a register and if
 992              this field lies completely outside that register.  */
 993           if (REG_P (op0) && known_ge (bitnum + bit_offset,
 994                                        GET_MODE_BITSIZE (GET_MODE (op0))))
 995             {
 996               if (backwards ^ reverse)
 997                 continue;
 998               /* For forward operation we are finished.  */
 999               return true;
1000             }
1001
1002           /* Starting word number in the value.  */
1003           const unsigned int wordnum
1004             = backwards
1005               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
1006               : i;
1007           /* The chunk of the value in word_mode.  We use bit-field extraction
1008               in BLKmode to handle unaligned memory references and to shift the
1009               last chunk right on big-endian machines if need be.  */
1010           rtx value_word
1011             = fieldmode == BLKmode
1012               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
1013                                    1, NULL_RTX, word_mode, word_mode, false,
1014                                    NULL)
1015               : operand_subword_force (value, wordnum, value_mode);
1016
1017           if (!store_bit_field_1 (op0, new_bitsize,
1018                                   bitnum + bit_offset,
1019                                   bitregion_start, bitregion_end,
1020                                   word_mode,
1021                                   value_word, reverse, fallback_p, false))
1022             {
1023               delete_insns_since (last);
1024               return false;
1025             }
1026         }
1027       return true;
1028     }
1029
1030   /* If VALUE has a floating-point or complex mode, access it as an
1031      integer of the corresponding size.  This can occur on a machine
1032      with 64 bit registers that uses SFmode for float.  It can also
1033      occur for unaligned float or complex fields.  */
1034   rtx orig_value = value;
1035   scalar_int_mode value_mode;
1036   if (GET_MODE (value) == VOIDmode)
1037     /* By this point we've dealt with values that are bigger than a word,
1038        so word_mode is a conservatively correct choice.  */
1039     value_mode = word_mode;
1040   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1041     {
1042       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1043       value = gen_reg_rtx (value_mode);
1044       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1045     }
1046
1047   /* If OP0 is a multi-word register, narrow it to the affected word.
1048      If the region spans two words, defer to store_split_bit_field.
1049      Don't do this if op0 is a single hard register wider than word
1050      such as a float or vector register.  */
1051   if (!MEM_P (op0)
1052       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1053       && (!REG_P (op0)
1054           || !HARD_REGISTER_P (op0)
1055           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1056     {
1057       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1058         {
1059           if (!fallback_p)
1060             return false;
1061
1062           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1063                                  bitregion_start, bitregion_end,
1064                                  value, value_mode, reverse);
1065           return true;
1066         }
1067       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1068                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1069       gcc_assert (op0);
1070       op0_mode = word_mode;
1071       bitnum %= BITS_PER_WORD;
1072     }
1073
1074   /* From here on we can assume that the field to be stored in fits
1075      within a word.  If the destination is a register, it too fits
1076      in a word.  */
1077
1078   extraction_insn insv;
1079   if (!MEM_P (op0)
1080       && !reverse
1081       && get_best_reg_extraction_insn (&insv, EP_insv,
1082                                        GET_MODE_BITSIZE (op0_mode.require ()),
1083                                        fieldmode)
1084       && store_bit_field_using_insv (&insv, op0, op0_mode,
1085                                      bitsize, bitnum, value, value_mode))
1086     return true;
1087
1088   /* If OP0 is a memory, try copying it to a register and seeing if a
1089      cheap register alternative is available.  */
1090   if (MEM_P (op0) && !reverse)
1091     {
1092       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1093                                         fieldmode)
1094           && store_bit_field_using_insv (&insv, op0, op0_mode,
1095                                          bitsize, bitnum, value, value_mode))
1096         return true;
1097
1098       rtx_insn *last = get_last_insn ();
1099
1100       /* Try loading part of OP0 into a register, inserting the bitfield
1101          into that, and then copying the result back to OP0.  */
1102       unsigned HOST_WIDE_INT bitpos;
1103       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1104                                                bitregion_start, bitregion_end,
1105                                                fieldmode, &bitpos);
1106       if (xop0)
1107         {
1108           rtx tempreg = copy_to_reg (xop0);
1109           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1110                                  bitregion_start, bitregion_end,
1111                                  fieldmode, orig_value, reverse, false, false))
1112             {
1113               emit_move_insn (xop0, tempreg);
1114               return true;
1115             }
1116           delete_insns_since (last);
1117         }
1118     }
1119
1120   if (!fallback_p)
1121     return false;
1122
1123   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1124                          bitregion_end, value, value_mode, reverse);
1125   return true;
1126 }
1127
1128 /* Generate code to store value from rtx VALUE
1129    into a bit-field within structure STR_RTX
1130    containing BITSIZE bits starting at bit BITNUM.
1131
1132    BITREGION_START is bitpos of the first bitfield in this region.
1133    BITREGION_END is the bitpos of the ending bitfield in this region.
1134    These two fields are 0, if the C++ memory model does not apply,
1135    or we are not interested in keeping track of bitfield regions.
1136
1137    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1138
1139    If REVERSE is true, the store is to be done in reverse order.
1140
1141    If UNDEFINED_P is true then STR_RTX is currently undefined.  */
1142
1143 void
1144 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1145                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1146                  machine_mode fieldmode,
1147                  rtx value, bool reverse, bool undefined_p)
1148 {
1149   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1150   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1151   scalar_int_mode int_mode;
1152   if (bitsize.is_constant (&ibitsize)
1153       && bitnum.is_constant (&ibitnum)
1154       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1155       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1156                                      bitregion_start, bitregion_end))
1157     {
1158       /* Storing of a full word can be done with a simple store.
1159          We know here that the field can be accessed with one single
1160          instruction.  For targets that support unaligned memory,
1161          an unaligned access may be necessary.  */
1162       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1163         {
1164           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1165                                              ibitnum / BITS_PER_UNIT);
1166           if (reverse)
1167             value = flip_storage_order (int_mode, value);
1168           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1169           emit_move_insn (str_rtx, value);
1170         }
1171       else
1172         {
1173           rtx temp;
1174
1175           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1176                                           ibitnum, &ibitnum);
1177           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1178           temp = copy_to_reg (str_rtx);
1179           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1180                                   int_mode, value, reverse, true, undefined_p))
1181             gcc_unreachable ();
1182
1183           emit_move_insn (str_rtx, temp);
1184         }
1185
1186       return;
1187     }
1188
1189   /* Under the C++0x memory model, we must not touch bits outside the
1190      bit region.  Adjust the address to start at the beginning of the
1191      bit region.  */
1192   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1193     {
1194       scalar_int_mode best_mode;
1195       machine_mode addr_mode = VOIDmode;
1196
1197       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1198       bitnum -= bitregion_start;
1199       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1200       bitregion_end -= bitregion_start;
1201       bitregion_start = 0;
1202       if (bitsize.is_constant (&ibitsize)
1203           && bitnum.is_constant (&ibitnum)
1204           && get_best_mode (ibitsize, ibitnum,
1205                             bitregion_start, bitregion_end,
1206                             MEM_ALIGN (str_rtx), INT_MAX,
1207                             MEM_VOLATILE_P (str_rtx), &best_mode))
1208         addr_mode = best_mode;
1209       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1210                                               offset, size);
1211     }
1212
1213   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1214                           bitregion_start, bitregion_end,
1215                           fieldmode, value, reverse, true, undefined_p))
1216     gcc_unreachable ();
1217 }
1218 \f
1219 /* Use shifts and boolean operations to store VALUE into a bit field of
1220    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1221    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1222    the mode of VALUE.
1223
1224    If REVERSE is true, the store is to be done in reverse order.  */
1225
1226 static void
1227 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1228                        unsigned HOST_WIDE_INT bitsize,
1229                        unsigned HOST_WIDE_INT bitnum,
1230                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1231                        rtx value, scalar_int_mode value_mode, bool reverse)
1232 {
1233   /* There is a case not handled here:
1234      a structure with a known alignment of just a halfword
1235      and a field split across two aligned halfwords within the structure.
1236      Or likewise a structure with a known alignment of just a byte
1237      and a field split across two bytes.
1238      Such cases are not supposed to be able to occur.  */
1239
1240   scalar_int_mode best_mode;
1241   if (MEM_P (op0))
1242     {
1243       unsigned int max_bitsize = BITS_PER_WORD;
1244       scalar_int_mode imode;
1245       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1246         max_bitsize = GET_MODE_BITSIZE (imode);
1247
1248       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1249                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1250                           &best_mode))
1251         {
1252           /* The only way this should occur is if the field spans word
1253              boundaries.  */
1254           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1255                                  bitregion_start, bitregion_end,
1256                                  value, value_mode, reverse);
1257           return;
1258         }
1259
1260       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1261     }
1262   else
1263     best_mode = op0_mode.require ();
1264
1265   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1266                            value, value_mode, reverse);
1267 }
1268
1269 /* Helper function for store_fixed_bit_field, stores
1270    the bit field always using MODE, which is the mode of OP0.  The other
1271    arguments are as for store_fixed_bit_field.  */
1272
1273 static void
1274 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1275                          unsigned HOST_WIDE_INT bitsize,
1276                          unsigned HOST_WIDE_INT bitnum,
1277                          rtx value, scalar_int_mode value_mode, bool reverse)
1278 {
1279   rtx temp;
1280   int all_zero = 0;
1281   int all_one = 0;
1282
1283   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1284      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1285
1286   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1287     /* BITNUM is the distance between our msb
1288        and that of the containing datum.
1289        Convert it to the distance from the lsb.  */
1290     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1291
1292   /* Now BITNUM is always the distance between our lsb
1293      and that of OP0.  */
1294
1295   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1296      we must first convert its mode to MODE.  */
1297
1298   if (CONST_INT_P (value))
1299     {
1300       unsigned HOST_WIDE_INT v = UINTVAL (value);
1301
1302       if (bitsize < HOST_BITS_PER_WIDE_INT)
1303         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1304
1305       if (v == 0)
1306         all_zero = 1;
1307       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1308                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1309                || (bitsize == HOST_BITS_PER_WIDE_INT
1310                    && v == HOST_WIDE_INT_M1U))
1311         all_one = 1;
1312
1313       value = lshift_value (mode, v, bitnum);
1314     }
1315   else
1316     {
1317       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1318                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1319
1320       if (value_mode != mode)
1321         value = convert_to_mode (mode, value, 1);
1322
1323       if (must_and)
1324         value = expand_binop (mode, and_optab, value,
1325                               mask_rtx (mode, 0, bitsize, 0),
1326                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1327       if (bitnum > 0)
1328         value = expand_shift (LSHIFT_EXPR, mode, value,
1329                               bitnum, NULL_RTX, 1);
1330     }
1331
1332   if (reverse)
1333     value = flip_storage_order (mode, value);
1334
1335   /* Now clear the chosen bits in OP0,
1336      except that if VALUE is -1 we need not bother.  */
1337   /* We keep the intermediates in registers to allow CSE to combine
1338      consecutive bitfield assignments.  */
1339
1340   temp = force_reg (mode, op0);
1341
1342   if (! all_one)
1343     {
1344       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1345       if (reverse)
1346         mask = flip_storage_order (mode, mask);
1347       temp = expand_binop (mode, and_optab, temp, mask,
1348                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1349       temp = force_reg (mode, temp);
1350     }
1351
1352   /* Now logical-or VALUE into OP0, unless it is zero.  */
1353
1354   if (! all_zero)
1355     {
1356       temp = expand_binop (mode, ior_optab, temp, value,
1357                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1358       temp = force_reg (mode, temp);
1359     }
1360
1361   if (op0 != temp)
1362     {
1363       op0 = copy_rtx (op0);
1364       emit_move_insn (op0, temp);
1365     }
1366 }
1367 \f
1368 /* Store a bit field that is split across multiple accessible memory objects.
1369
1370    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1371    BITSIZE is the field width; BITPOS the position of its first bit
1372    (within the word).
1373    VALUE is the value to store, which has mode VALUE_MODE.
1374    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1375    a BLKmode MEM.
1376
1377    If REVERSE is true, the store is to be done in reverse order.
1378
1379    This does not yet handle fields wider than BITS_PER_WORD.  */
1380
1381 static void
1382 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1383                        unsigned HOST_WIDE_INT bitsize,
1384                        unsigned HOST_WIDE_INT bitpos,
1385                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1386                        rtx value, scalar_int_mode value_mode, bool reverse)
1387 {
1388   unsigned int unit, total_bits, bitsdone = 0;
1389
1390   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1391      much at a time.  */
1392   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1393     unit = BITS_PER_WORD;
1394   else
1395     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1396
1397   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1398      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1399      again, and we will mutually recurse forever.  */
1400   if (MEM_P (op0) && op0_mode.exists ())
1401     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1402
1403   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1404      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1405      that VALUE might be a floating-point constant.  */
1406   if (CONSTANT_P (value) && !CONST_INT_P (value))
1407     {
1408       rtx word = gen_lowpart_common (word_mode, value);
1409
1410       if (word && (value != word))
1411         value = word;
1412       else
1413         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1414       value_mode = word_mode;
1415     }
1416
1417   total_bits = GET_MODE_BITSIZE (value_mode);
1418
1419   while (bitsdone < bitsize)
1420     {
1421       unsigned HOST_WIDE_INT thissize;
1422       unsigned HOST_WIDE_INT thispos;
1423       unsigned HOST_WIDE_INT offset;
1424       rtx part;
1425
1426       offset = (bitpos + bitsdone) / unit;
1427       thispos = (bitpos + bitsdone) % unit;
1428
1429       /* When region of bytes we can touch is restricted, decrease
1430          UNIT close to the end of the region as needed.  If op0 is a REG
1431          or SUBREG of REG, don't do this, as there can't be data races
1432          on a register and we can expand shorter code in some cases.  */
1433       if (maybe_ne (bitregion_end, 0U)
1434           && unit > BITS_PER_UNIT
1435           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1436           && !REG_P (op0)
1437           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1438         {
1439           unit = unit / 2;
1440           continue;
1441         }
1442
1443       /* THISSIZE must not overrun a word boundary.  Otherwise,
1444          store_fixed_bit_field will call us again, and we will mutually
1445          recurse forever.  */
1446       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1447       thissize = MIN (thissize, unit - thispos);
1448
1449       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1450         {
1451           /* Fetch successively less significant portions.  */
1452           if (CONST_INT_P (value))
1453             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1454                              >> (bitsize - bitsdone - thissize))
1455                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1456           /* Likewise, but the source is little-endian.  */
1457           else if (reverse)
1458             part = extract_fixed_bit_field (word_mode, value, value_mode,
1459                                             thissize,
1460                                             bitsize - bitsdone - thissize,
1461                                             NULL_RTX, 1, false);
1462           else
1463             /* The args are chosen so that the last part includes the
1464                lsb.  Give extract_bit_field the value it needs (with
1465                endianness compensation) to fetch the piece we want.  */
1466             part = extract_fixed_bit_field (word_mode, value, value_mode,
1467                                             thissize,
1468                                             total_bits - bitsize + bitsdone,
1469                                             NULL_RTX, 1, false);
1470         }
1471       else
1472         {
1473           /* Fetch successively more significant portions.  */
1474           if (CONST_INT_P (value))
1475             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1476                              >> bitsdone)
1477                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1478           /* Likewise, but the source is big-endian.  */
1479           else if (reverse)
1480             part = extract_fixed_bit_field (word_mode, value, value_mode,
1481                                             thissize,
1482                                             total_bits - bitsdone - thissize,
1483                                             NULL_RTX, 1, false);
1484           else
1485             part = extract_fixed_bit_field (word_mode, value, value_mode,
1486                                             thissize, bitsdone, NULL_RTX,
1487                                             1, false);
1488         }
1489
1490       /* If OP0 is a register, then handle OFFSET here.  */
1491       rtx op0_piece = op0;
1492       opt_scalar_int_mode op0_piece_mode = op0_mode;
1493       if (SUBREG_P (op0) || REG_P (op0))
1494         {
1495           scalar_int_mode imode;
1496           if (op0_mode.exists (&imode)
1497               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1498             {
1499               if (offset)
1500                 op0_piece = const0_rtx;
1501             }
1502           else
1503             {
1504               op0_piece = operand_subword_force (op0,
1505                                                  offset * unit / BITS_PER_WORD,
1506                                                  GET_MODE (op0));
1507               op0_piece_mode = word_mode;
1508             }
1509           offset &= BITS_PER_WORD / unit - 1;
1510         }
1511
1512       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1513          it is just an out-of-bounds access.  Ignore it.  */
1514       if (op0_piece != const0_rtx)
1515         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1516                                offset * unit + thispos, bitregion_start,
1517                                bitregion_end, part, word_mode, reverse);
1518       bitsdone += thissize;
1519     }
1520 }
1521 \f
1522 /* A subroutine of extract_bit_field_1 that converts return value X
1523    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1524    to extract_bit_field.  */
1525
1526 static rtx
1527 convert_extracted_bit_field (rtx x, machine_mode mode,
1528                              machine_mode tmode, bool unsignedp)
1529 {
1530   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1531     return x;
1532
1533   /* If the x mode is not a scalar integral, first convert to the
1534      integer mode of that size and then access it as a floating-point
1535      value via a SUBREG.  */
1536   if (!SCALAR_INT_MODE_P (tmode))
1537     {
1538       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1539       x = convert_to_mode (int_mode, x, unsignedp);
1540       x = force_reg (int_mode, x);
1541       return gen_lowpart (tmode, x);
1542     }
1543
1544   return convert_to_mode (tmode, x, unsignedp);
1545 }
1546
1547 /* Try to use an ext(z)v pattern to extract a field from OP0.
1548    Return the extracted value on success, otherwise return null.
1549    EXTV describes the extraction instruction to use.  If OP0_MODE
1550    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1551    The other arguments are as for extract_bit_field.  */
1552
1553 static rtx
1554 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1555                               opt_scalar_int_mode op0_mode,
1556                               unsigned HOST_WIDE_INT bitsize,
1557                               unsigned HOST_WIDE_INT bitnum,
1558                               int unsignedp, rtx target,
1559                               machine_mode mode, machine_mode tmode)
1560 {
1561   class expand_operand ops[4];
1562   rtx spec_target = target;
1563   rtx spec_target_subreg = 0;
1564   scalar_int_mode ext_mode = extv->field_mode;
1565   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1566
1567   if (bitsize == 0 || unit < bitsize)
1568     return NULL_RTX;
1569
1570   if (MEM_P (op0))
1571     /* Get a reference to the first byte of the field.  */
1572     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1573                                 &bitnum);
1574   else
1575     {
1576       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1577       if (BYTES_BIG_ENDIAN)
1578         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1579
1580       /* If op0 is a register, we need it in EXT_MODE to make it
1581          acceptable to the format of ext(z)v.  */
1582       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1583         return NULL_RTX;
1584       if (REG_P (op0) && op0_mode.require () != ext_mode)
1585         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1586     }
1587
1588   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1589      "backwards" from the size of the unit we are extracting from.
1590      Otherwise, we count bits from the most significant on a
1591      BYTES/BITS_BIG_ENDIAN machine.  */
1592
1593   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1594     bitnum = unit - bitsize - bitnum;
1595
1596   if (target == 0)
1597     target = spec_target = gen_reg_rtx (tmode);
1598
1599   if (GET_MODE (target) != ext_mode)
1600     {
1601       rtx temp;
1602       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1603          between the mode of the extraction (word_mode) and the target
1604          mode.  Instead, create a temporary and use convert_move to set
1605          the target.  */
1606       if (REG_P (target)
1607           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1608           && (temp = gen_lowpart_if_possible (ext_mode, target)))
1609         {
1610           target = temp;
1611           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1612             spec_target_subreg = target;
1613         }
1614       else
1615         target = gen_reg_rtx (ext_mode);
1616     }
1617
1618   create_output_operand (&ops[0], target, ext_mode);
1619   create_fixed_operand (&ops[1], op0);
1620   create_integer_operand (&ops[2], bitsize);
1621   create_integer_operand (&ops[3], bitnum);
1622   if (maybe_expand_insn (extv->icode, 4, ops))
1623     {
1624       target = ops[0].value;
1625       if (target == spec_target)
1626         return target;
1627       if (target == spec_target_subreg)
1628         return spec_target;
1629       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1630     }
1631   return NULL_RTX;
1632 }
1633
1634 /* See whether it would be valid to extract the part of OP0 with
1635    mode OP0_MODE described by BITNUM and BITSIZE into a value of
1636    mode MODE using a subreg operation.
1637    Return the subreg if so, otherwise return null.  */
1638
1639 static rtx
1640 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1641                              machine_mode op0_mode,
1642                              poly_uint64 bitsize, poly_uint64 bitnum)
1643 {
1644   poly_uint64 bytenum;
1645   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1646       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1647       && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1648       && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1649     return force_subreg (mode, op0, op0_mode, bytenum);
1650   return NULL_RTX;
1651 }
1652
1653 /* A subroutine of extract_bit_field, with the same arguments.
1654    If UNSIGNEDP is -1, the result need not be sign or zero extended.
1655    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1656    if we can find no other means of implementing the operation.
1657    if FALLBACK_P is false, return NULL instead.  */
1658
1659 static rtx
1660 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1661                      int unsignedp, rtx target, machine_mode mode,
1662                      machine_mode tmode, bool reverse, bool fallback_p,
1663                      rtx *alt_rtl)
1664 {
1665   rtx op0 = str_rtx;
1666   machine_mode mode1;
1667
1668   if (tmode == VOIDmode)
1669     tmode = mode;
1670
1671   while (GET_CODE (op0) == SUBREG)
1672     {
1673       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1674       op0 = SUBREG_REG (op0);
1675     }
1676
1677   /* If we have an out-of-bounds access to a register, just return an
1678      uninitialized register of the required mode.  This can occur if the
1679      source code contains an out-of-bounds access to a small array.  */
1680   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1681     return gen_reg_rtx (tmode);
1682
1683   if (REG_P (op0)
1684       && mode == GET_MODE (op0)
1685       && known_eq (bitnum, 0U)
1686       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1687     {
1688       if (reverse)
1689         op0 = flip_storage_order (mode, op0);
1690       /* We're trying to extract a full register from itself.  */
1691       return op0;
1692     }
1693
1694   /* First try to check for vector from vector extractions.  */
1695   if (VECTOR_MODE_P (GET_MODE (op0))
1696       && !MEM_P (op0)
1697       && VECTOR_MODE_P (tmode)
1698       && known_eq (bitsize, GET_MODE_PRECISION (tmode))
1699       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1700     {
1701       machine_mode new_mode = GET_MODE (op0);
1702       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1703         {
1704           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1705           poly_uint64 nunits;
1706           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1707                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1708               || !related_vector_mode (tmode, inner_mode,
1709                                        nunits).exists (&new_mode)
1710               || maybe_ne (GET_MODE_SIZE (new_mode),
1711                            GET_MODE_SIZE (GET_MODE (op0))))
1712             new_mode = VOIDmode;
1713         }
1714       poly_uint64 pos;
1715       if (new_mode != VOIDmode
1716           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1717               != CODE_FOR_nothing)
1718           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1719         {
1720           class expand_operand ops[3];
1721           machine_mode outermode = new_mode;
1722           machine_mode innermode = tmode;
1723           enum insn_code icode
1724             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1725
1726           if (new_mode != GET_MODE (op0))
1727             op0 = gen_lowpart (new_mode, op0);
1728           create_output_operand (&ops[0], target, innermode);
1729           ops[0].target = 1;
1730           create_input_operand (&ops[1], op0, outermode);
1731           create_integer_operand (&ops[2], pos);
1732           if (maybe_expand_insn (icode, 3, ops))
1733             {
1734               if (alt_rtl && ops[0].target)
1735                 *alt_rtl = target;
1736               target = ops[0].value;
1737               if (GET_MODE (target) != mode)
1738                 return gen_lowpart (tmode, target);
1739               return target;
1740             }
1741         }
1742     }
1743
1744   /* See if we can get a better vector mode before extracting.  */
1745   if (VECTOR_MODE_P (GET_MODE (op0))
1746       && !MEM_P (op0)
1747       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1748     {
1749       machine_mode new_mode;
1750
1751       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1752         new_mode = MIN_MODE_VECTOR_FLOAT;
1753       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1754         new_mode = MIN_MODE_VECTOR_FRACT;
1755       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1756         new_mode = MIN_MODE_VECTOR_UFRACT;
1757       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1758         new_mode = MIN_MODE_VECTOR_ACCUM;
1759       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1760         new_mode = MIN_MODE_VECTOR_UACCUM;
1761       else
1762         new_mode = MIN_MODE_VECTOR_INT;
1763
1764       FOR_EACH_MODE_FROM (new_mode, new_mode)
1765         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1766             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1767             && known_eq (bitsize, GET_MODE_UNIT_PRECISION (new_mode))
1768             && multiple_p (bitnum, GET_MODE_UNIT_PRECISION (new_mode))
1769             && targetm.vector_mode_supported_p (new_mode)
1770             && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1771           break;
1772       if (new_mode != VOIDmode)
1773         op0 = gen_lowpart (new_mode, op0);
1774     }
1775
1776   /* Use vec_extract patterns for extracting parts of vectors whenever
1777      available.  If that fails, see whether the current modes and bitregion
1778      give a natural subreg.  */
1779   machine_mode outermode = GET_MODE (op0);
1780   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1781     {
1782       scalar_mode innermode = GET_MODE_INNER (outermode);
1783
1784       enum insn_code icode
1785         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1786
1787       poly_uint64 pos;
1788       if (icode != CODE_FOR_nothing
1789           && known_eq (bitsize, GET_MODE_PRECISION (innermode))
1790           && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
1791         {
1792           class expand_operand ops[3];
1793
1794           create_output_operand (&ops[0], target,
1795                                  insn_data[icode].operand[0].mode);
1796           ops[0].target = 1;
1797           create_input_operand (&ops[1], op0, outermode);
1798           create_integer_operand (&ops[2], pos);
1799           if (maybe_expand_insn (icode, 3, ops))
1800             {
1801               if (alt_rtl && ops[0].target)
1802                 *alt_rtl = target;
1803               target = ops[0].value;
1804               if (GET_MODE (target) != mode)
1805                 return gen_lowpart (tmode, target);
1806               return target;
1807             }
1808         }
1809       /* Using subregs is useful if we're extracting one register vector
1810          from a multi-register vector.  extract_bit_field_as_subreg checks
1811          for valid bitsize and bitnum, so we don't need to do that here.  */
1812       if (VECTOR_MODE_P (mode))
1813         {
1814           rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1815                                                  bitsize, bitnum);
1816           if (sub)
1817             return sub;
1818         }
1819     }
1820
1821   /* Make sure we are playing with integral modes.  Pun with subregs
1822      if we aren't.  */
1823   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1824   scalar_int_mode imode;
1825   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1826     {
1827       if (MEM_P (op0))
1828         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1829                                             0, MEM_SIZE (op0));
1830       else if (op0_mode.exists (&imode))
1831         {
1832           op0 = gen_lowpart (imode, op0);
1833
1834           /* If we got a SUBREG, force it into a register since we
1835              aren't going to be able to do another SUBREG on it.  */
1836           if (GET_CODE (op0) == SUBREG)
1837             op0 = force_reg (imode, op0);
1838         }
1839       else
1840         {
1841           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1842           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1843           emit_move_insn (mem, op0);
1844           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1845         }
1846     }
1847
1848   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1849      If that's wrong, the solution is to test for it and set TARGET to 0
1850      if needed.  */
1851
1852   /* Get the mode of the field to use for atomic access or subreg
1853      conversion.  */
1854   if (!SCALAR_INT_MODE_P (tmode)
1855       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1856     mode1 = mode;
1857   gcc_assert (mode1 != BLKmode);
1858
1859   /* Extraction of a full MODE1 value can be done with a subreg as long
1860      as the least significant bit of the value is the least significant
1861      bit of either OP0 or a word of OP0.  */
1862   if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1863     {
1864       rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1865                                              bitsize, bitnum);
1866       if (sub)
1867         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1868     }
1869
1870   /* Extraction of a full MODE1 value can be done with a load as long as
1871      the field is on a byte boundary and is sufficiently aligned.  */
1872   poly_uint64 bytenum;
1873   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1874     {
1875       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1876       if (reverse)
1877         op0 = flip_storage_order (mode1, op0);
1878       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1879     }
1880
1881   /* If we have a memory source and a non-constant bit offset, restrict
1882      the memory to the referenced bytes.  This is a worst-case fallback
1883      but is useful for things like vector booleans.  */
1884   if (MEM_P (op0) && !bitnum.is_constant ())
1885     {
1886       bytenum = bits_to_bytes_round_down (bitnum);
1887       bitnum = num_trailing_bits (bitnum);
1888       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1889       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1890       op0_mode = opt_scalar_int_mode ();
1891     }
1892
1893   /* It's possible we'll need to handle other cases here for
1894      polynomial bitnum and bitsize.  */
1895
1896   /* From here on we need to be looking at a fixed-size insertion.  */
1897   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1898                                      bitnum.to_constant (), unsignedp,
1899                                      target, mode, tmode, reverse, fallback_p);
1900 }
1901
1902 /* Subroutine of extract_bit_field_1, with the same arguments, except
1903    that BITSIZE and BITNUM are constant.  Handle cases specific to
1904    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1905    otherwise OP0 is a BLKmode MEM.  */
1906
1907 static rtx
1908 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1909                             unsigned HOST_WIDE_INT bitsize,
1910                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1911                             rtx target, machine_mode mode, machine_mode tmode,
1912                             bool reverse, bool fallback_p)
1913 {
1914   /* Handle fields bigger than a word.  */
1915
1916   if (bitsize > BITS_PER_WORD)
1917     {
1918       /* Here we transfer the words of the field
1919          in the order least significant first.
1920          This is because the most significant word is the one which may
1921          be less than full.  */
1922
1923       const bool backwards = WORDS_BIG_ENDIAN;
1924       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1925       unsigned int i;
1926       rtx_insn *last;
1927
1928       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1929         target = gen_reg_rtx (mode);
1930
1931       /* In case we're about to clobber a base register or something
1932          (see gcc.c-torture/execute/20040625-1.c).   */
1933       if (reg_mentioned_p (target, op0))
1934         target = gen_reg_rtx (mode);
1935
1936       /* Indicate for flow that the entire target reg is being set.  */
1937       emit_clobber (target);
1938
1939       /* The mode must be fixed-size, since extract_bit_field_1 handles
1940          extractions from variable-sized objects before calling this
1941          function.  */
1942       unsigned int target_size
1943         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1944       last = get_last_insn ();
1945       for (i = 0; i < nwords; i++)
1946         {
1947           /* If I is 0, use the low-order word in both field and target;
1948              if I is 1, use the next to lowest word; and so on.  */
1949           /* Word number in TARGET to use.  */
1950           unsigned int wordnum
1951             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1952           /* Offset from start of field in OP0.  */
1953           unsigned int bit_offset = (backwards ^ reverse
1954                                      ? MAX ((int) bitsize - ((int) i + 1)
1955                                             * BITS_PER_WORD,
1956                                             0)
1957                                      : (int) i * BITS_PER_WORD);
1958           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1959           rtx result_part
1960             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1961                                              bitsize - i * BITS_PER_WORD),
1962                                    bitnum + bit_offset,
1963                                    (unsignedp ? 1 : -1), target_part,
1964                                    mode, word_mode, reverse, fallback_p, NULL);
1965
1966           gcc_assert (target_part);
1967           if (!result_part)
1968             {
1969               delete_insns_since (last);
1970               return NULL;
1971             }
1972
1973           if (result_part != target_part)
1974             emit_move_insn (target_part, result_part);
1975         }
1976
1977       if (unsignedp)
1978         {
1979           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1980              need to be zero'd out.  */
1981           if (target_size > nwords * UNITS_PER_WORD)
1982             {
1983               unsigned int i, total_words;
1984
1985               total_words = target_size / UNITS_PER_WORD;
1986               for (i = nwords; i < total_words; i++)
1987                 emit_move_insn
1988                   (operand_subword (target,
1989                                     backwards ? total_words - i - 1 : i,
1990                                     1, VOIDmode),
1991                    const0_rtx);
1992             }
1993           return target;
1994         }
1995
1996       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1997       target = expand_shift (LSHIFT_EXPR, mode, target,
1998                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1999       return expand_shift (RSHIFT_EXPR, mode, target,
2000                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
2001     }
2002
2003   /* If OP0 is a multi-word register, narrow it to the affected word.
2004      If the region spans two words, defer to extract_split_bit_field.  */
2005   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
2006     {
2007       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
2008         {
2009           if (!fallback_p)
2010             return NULL_RTX;
2011           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2012                                             unsignedp, reverse);
2013           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
2014         }
2015       /* If OP0 is a hard register, copy it to a pseudo before calling
2016          force_subreg.  */
2017       if (REG_P (op0) && HARD_REGISTER_P (op0))
2018         op0 = copy_to_reg (op0);
2019       op0 = force_subreg (word_mode, op0, op0_mode.require (),
2020                           bitnum / BITS_PER_WORD * UNITS_PER_WORD);
2021       op0_mode = word_mode;
2022       bitnum %= BITS_PER_WORD;
2023     }
2024
2025   /* From here on we know the desired field is smaller than a word.
2026      If OP0 is a register, it too fits within a word.  */
2027   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
2028   extraction_insn extv;
2029   if (!MEM_P (op0)
2030       && !reverse
2031       /* ??? We could limit the structure size to the part of OP0 that
2032          contains the field, with appropriate checks for endianness
2033          and TARGET_TRULY_NOOP_TRUNCATION.  */
2034       && get_best_reg_extraction_insn (&extv, pattern,
2035                                        GET_MODE_BITSIZE (op0_mode.require ()),
2036                                        tmode))
2037     {
2038       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2039                                                  bitsize, bitnum,
2040                                                  unsignedp, target, mode,
2041                                                  tmode);
2042       if (result)
2043         return result;
2044     }
2045
2046   /* If OP0 is a memory, try copying it to a register and seeing if a
2047      cheap register alternative is available.  */
2048   if (MEM_P (op0) & !reverse)
2049     {
2050       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2051                                         tmode))
2052         {
2053           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2054                                                      bitsize, bitnum,
2055                                                      unsignedp, target, mode,
2056                                                      tmode);
2057           if (result)
2058             return result;
2059         }
2060
2061       rtx_insn *last = get_last_insn ();
2062
2063       /* Try loading part of OP0 into a register and extracting the
2064          bitfield from that.  */
2065       unsigned HOST_WIDE_INT bitpos;
2066       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2067                                                0, 0, tmode, &bitpos);
2068       if (xop0)
2069         {
2070           xop0 = copy_to_reg (xop0);
2071           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2072                                             unsignedp, target,
2073                                             mode, tmode, reverse, false, NULL);
2074           if (result)
2075             return result;
2076           delete_insns_since (last);
2077         }
2078     }
2079
2080   if (!fallback_p)
2081     return NULL;
2082
2083   /* Find a correspondingly-sized integer field, so we can apply
2084      shifts and masks to it.  */
2085   scalar_int_mode int_mode;
2086   if (!int_mode_for_mode (tmode).exists (&int_mode))
2087     /* If this fails, we should probably push op0 out to memory and then
2088        do a load.  */
2089     int_mode = int_mode_for_mode (mode).require ();
2090
2091   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2092                                     bitnum, target, unsignedp, reverse);
2093
2094   /* Complex values must be reversed piecewise, so we need to undo the global
2095      reversal, convert to the complex mode and reverse again.  */
2096   if (reverse && COMPLEX_MODE_P (tmode))
2097     {
2098       target = flip_storage_order (int_mode, target);
2099       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2100       target = flip_storage_order (tmode, target);
2101     }
2102   else
2103     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2104
2105   return target;
2106 }
2107
2108 /* Generate code to extract a byte-field from STR_RTX
2109    containing BITSIZE bits, starting at BITNUM,
2110    and put it in TARGET if possible (if TARGET is nonzero).
2111    Regardless of TARGET, we return the rtx for where the value is placed.
2112
2113    STR_RTX is the structure containing the byte (a REG or MEM).
2114    UNSIGNEDP is nonzero if this is an unsigned bit field.
2115    MODE is the natural mode of the field value once extracted.
2116    TMODE is the mode the caller would like the value to have;
2117    but the value may be returned with type MODE instead.
2118
2119    If REVERSE is true, the extraction is to be done in reverse order.
2120
2121    If a TARGET is specified and we can store in it at no extra cost,
2122    we do so, and return TARGET.
2123    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2124    if they are equally easy.
2125
2126    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2127    then *ALT_RTL is set to TARGET (before legitimziation).  */
2128
2129 rtx
2130 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2131                    int unsignedp, rtx target, machine_mode mode,
2132                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2133 {
2134   machine_mode mode1;
2135
2136   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2137   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2138     mode1 = GET_MODE (str_rtx);
2139   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2140     mode1 = GET_MODE (target);
2141   else
2142     mode1 = tmode;
2143
2144   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2145   scalar_int_mode int_mode;
2146   if (bitsize.is_constant (&ibitsize)
2147       && bitnum.is_constant (&ibitnum)
2148       && is_a <scalar_int_mode> (mode1, &int_mode)
2149       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2150                                      int_mode, 0, 0))
2151     {
2152       /* Extraction of a full INT_MODE value can be done with a simple load.
2153          We know here that the field can be accessed with one single
2154          instruction.  For targets that support unaligned memory,
2155          an unaligned access may be necessary.  */
2156       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2157         {
2158           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2159                                                 ibitnum / BITS_PER_UNIT);
2160           if (reverse)
2161             result = flip_storage_order (int_mode, result);
2162           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2163           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2164         }
2165
2166       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2167                                       &ibitnum);
2168       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2169       str_rtx = copy_to_reg (str_rtx);
2170       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2171                                   target, mode, tmode, reverse, true, alt_rtl);
2172     }
2173
2174   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2175                               target, mode, tmode, reverse, true, alt_rtl);
2176 }
2177 \f
2178 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2179    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2180    otherwise OP0 is a BLKmode MEM.
2181
2182    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2183    If REVERSE is true, the extraction is to be done in reverse order.
2184
2185    If TARGET is nonzero, attempts to store the value there
2186    and return TARGET, but this is not guaranteed.
2187    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2188
2189 static rtx
2190 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2191                          opt_scalar_int_mode op0_mode,
2192                          unsigned HOST_WIDE_INT bitsize,
2193                          unsigned HOST_WIDE_INT bitnum, rtx target,
2194                          int unsignedp, bool reverse)
2195 {
2196   scalar_int_mode mode;
2197   if (MEM_P (op0))
2198     {
2199       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2200                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2201         /* The only way this should occur is if the field spans word
2202            boundaries.  */
2203         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2204                                         unsignedp, reverse);
2205
2206       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2207     }
2208   else
2209     mode = op0_mode.require ();
2210
2211   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2212                                     target, unsignedp, reverse);
2213 }
2214
2215 /* Helper function for extract_fixed_bit_field, extracts
2216    the bit field always using MODE, which is the mode of OP0.
2217    If UNSIGNEDP is -1, the result need not be sign or zero extended.
2218    The other arguments are as for extract_fixed_bit_field.  */
2219
2220 static rtx
2221 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2222                            unsigned HOST_WIDE_INT bitsize,
2223                            unsigned HOST_WIDE_INT bitnum, rtx target,
2224                            int unsignedp, bool reverse)
2225 {
2226   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2227      for invalid input, such as extract equivalent of f5 from
2228      gcc.dg/pr48335-2.c.  */
2229
2230   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2231     /* BITNUM is the distance between our msb and that of OP0.
2232        Convert it to the distance from the lsb.  */
2233     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2234
2235   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2236      We have reduced the big-endian case to the little-endian case.  */
2237   if (reverse)
2238     op0 = flip_storage_order (mode, op0);
2239
2240   if (unsignedp)
2241     {
2242       if (bitnum)
2243         {
2244           /* If the field does not already start at the lsb,
2245              shift it so it does.  */
2246           /* Maybe propagate the target for the shift.  */
2247           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2248           if (tmode != mode)
2249             subtarget = 0;
2250           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2251         }
2252       /* Convert the value to the desired mode.  TMODE must also be a
2253          scalar integer for this conversion to make sense, since we
2254          shouldn't reinterpret the bits.  */
2255       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2256       if (mode != new_mode)
2257         op0 = convert_to_mode (new_mode, op0, 1);
2258
2259       /* Unless the msb of the field used to be the msb when we shifted,
2260          mask out the upper bits.  */
2261
2262       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize
2263           && unsignedp != -1)
2264         return expand_binop (new_mode, and_optab, op0,
2265                              mask_rtx (new_mode, 0, bitsize, 0),
2266                              target, 1, OPTAB_LIB_WIDEN);
2267       return op0;
2268     }
2269
2270   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2271      then arithmetic-shift its lsb to the lsb of the word.  */
2272   op0 = force_reg (mode, op0);
2273
2274   /* Find the narrowest integer mode that contains the field.  */
2275
2276   opt_scalar_int_mode mode_iter;
2277   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2278     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2279       break;
2280
2281   mode = mode_iter.require ();
2282   op0 = convert_to_mode (mode, op0, 0);
2283
2284   if (mode != tmode)
2285     target = 0;
2286
2287   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2288     {
2289       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2290       /* Maybe propagate the target for the shift.  */
2291       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2292       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2293     }
2294
2295   return expand_shift (RSHIFT_EXPR, mode, op0,
2296                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2297 }
2298
2299 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2300    VALUE << BITPOS.  */
2301
2302 static rtx
2303 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2304               int bitpos)
2305 {
2306   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2307 }
2308 \f
2309 /* Extract a bit field that is split across two words
2310    and return an RTX for the result.
2311
2312    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2313    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2314    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2315    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2316    a BLKmode MEM.
2317
2318    If REVERSE is true, the extraction is to be done in reverse order.  */
2319
2320 static rtx
2321 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2322                          unsigned HOST_WIDE_INT bitsize,
2323                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2324                          bool reverse)
2325 {
2326   unsigned int unit;
2327   unsigned int bitsdone = 0;
2328   rtx result = NULL_RTX;
2329   int first = 1;
2330
2331   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2332      much at a time.  */
2333   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2334     unit = BITS_PER_WORD;
2335   else
2336     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2337
2338   while (bitsdone < bitsize)
2339     {
2340       unsigned HOST_WIDE_INT thissize;
2341       rtx part;
2342       unsigned HOST_WIDE_INT thispos;
2343       unsigned HOST_WIDE_INT offset;
2344
2345       offset = (bitpos + bitsdone) / unit;
2346       thispos = (bitpos + bitsdone) % unit;
2347
2348       /* THISSIZE must not overrun a word boundary.  Otherwise,
2349          extract_fixed_bit_field will call us again, and we will mutually
2350          recurse forever.  */
2351       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2352       thissize = MIN (thissize, unit - thispos);
2353
2354       /* If OP0 is a register, then handle OFFSET here.  */
2355       rtx op0_piece = op0;
2356       opt_scalar_int_mode op0_piece_mode = op0_mode;
2357       if (SUBREG_P (op0) || REG_P (op0))
2358         {
2359           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2360           op0_piece_mode = word_mode;
2361           offset = 0;
2362         }
2363
2364       /* Extract the parts in bit-counting order,
2365          whose meaning is determined by BYTES_PER_UNIT.
2366          OFFSET is in UNITs, and UNIT is in bits.  */
2367       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2368                                       thissize, offset * unit + thispos,
2369                                       0, 1, reverse);
2370       bitsdone += thissize;
2371
2372       /* Shift this part into place for the result.  */
2373       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2374         {
2375           if (bitsize != bitsdone)
2376             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2377                                  bitsize - bitsdone, 0, 1);
2378         }
2379       else
2380         {
2381           if (bitsdone != thissize)
2382             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2383                                  bitsdone - thissize, 0, 1);
2384         }
2385
2386       if (first)
2387         result = part;
2388       else
2389         /* Combine the parts with bitwise or.  This works
2390            because we extracted each part as an unsigned bit field.  */
2391         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2392                                OPTAB_LIB_WIDEN);
2393
2394       first = 0;
2395     }
2396
2397   /* Unsigned bit field: we are done.  */
2398   if (unsignedp)
2399     return result;
2400   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2401   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2402                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2403   return expand_shift (RSHIFT_EXPR, word_mode, result,
2404                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2405 }
2406 \f
2407 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2408    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2409    MODE, fill the upper bits with zeros.  Fail if the layout of either
2410    mode is unknown (as for CC modes) or if the extraction would involve
2411    unprofitable mode punning.  Return the value on success, otherwise
2412    return null.
2413
2414    This is different from gen_lowpart* in these respects:
2415
2416      - the returned value must always be considered an rvalue
2417
2418      - when MODE is wider than SRC_MODE, the extraction involves
2419        a zero extension
2420
2421      - when MODE is smaller than SRC_MODE, the extraction involves
2422        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2423
2424    In other words, this routine performs a computation, whereas the
2425    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2426    operations.  */
2427
2428 rtx
2429 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2430 {
2431   scalar_int_mode int_mode, src_int_mode;
2432
2433   if (mode == src_mode)
2434     return src;
2435
2436   if (CONSTANT_P (src))
2437     {
2438       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2439          fails, it will happily create (subreg (symbol_ref)) or similar
2440          invalid SUBREGs.  */
2441       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2442       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2443       if (ret)
2444         return ret;
2445
2446       if (GET_MODE (src) == VOIDmode
2447           || !validate_subreg (mode, src_mode, src, byte))
2448         return NULL_RTX;
2449
2450       src = force_reg (GET_MODE (src), src);
2451       return gen_rtx_SUBREG (mode, src, byte);
2452     }
2453
2454   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2455     return NULL_RTX;
2456
2457   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2458       && targetm.modes_tieable_p (mode, src_mode))
2459     {
2460       rtx x = gen_lowpart_common (mode, src);
2461       if (x)
2462         return x;
2463     }
2464
2465   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2466       || !int_mode_for_mode (mode).exists (&int_mode))
2467     return NULL_RTX;
2468
2469   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2470     return NULL_RTX;
2471   if (!targetm.modes_tieable_p (int_mode, mode))
2472     return NULL_RTX;
2473
2474   src = gen_lowpart (src_int_mode, src);
2475   if (!validate_subreg (int_mode, src_int_mode, src,
2476                         subreg_lowpart_offset (int_mode, src_int_mode)))
2477     return NULL_RTX;
2478
2479   src = convert_modes (int_mode, src_int_mode, src, true);
2480   src = gen_lowpart (mode, src);
2481   return src;
2482 }
2483 \f
2484 /* Add INC into TARGET.  */
2485
2486 void
2487 expand_inc (rtx target, rtx inc)
2488 {
2489   rtx value = expand_binop (GET_MODE (target), add_optab,
2490                             target, inc,
2491                             target, 0, OPTAB_LIB_WIDEN);
2492   if (value != target)
2493     emit_move_insn (target, value);
2494 }
2495
2496 /* Subtract DEC from TARGET.  */
2497
2498 void
2499 expand_dec (rtx target, rtx dec)
2500 {
2501   rtx value = expand_binop (GET_MODE (target), sub_optab,
2502                             target, dec,
2503                             target, 0, OPTAB_LIB_WIDEN);
2504   if (value != target)
2505     emit_move_insn (target, value);
2506 }
2507 \f
2508 /* Output a shift instruction for expression code CODE,
2509    with SHIFTED being the rtx for the value to shift,
2510    and AMOUNT the rtx for the amount to shift by.
2511    Store the result in the rtx TARGET, if that is convenient.
2512    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2513    Return the rtx for where the value is.
2514    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2515    in which case 0 is returned.  */
2516
2517 static rtx
2518 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2519                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2520 {
2521   rtx op1, temp = 0;
2522   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2523   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2524   optab lshift_optab = ashl_optab;
2525   optab rshift_arith_optab = ashr_optab;
2526   optab rshift_uns_optab = lshr_optab;
2527   optab lrotate_optab = rotl_optab;
2528   optab rrotate_optab = rotr_optab;
2529   machine_mode op1_mode;
2530   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2531   int attempt;
2532   bool speed = optimize_insn_for_speed_p ();
2533
2534   op1 = amount;
2535   op1_mode = GET_MODE (op1);
2536
2537   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2538      shift amount is a vector, use the vector/vector shift patterns.  */
2539   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2540     {
2541       lshift_optab = vashl_optab;
2542       rshift_arith_optab = vashr_optab;
2543       rshift_uns_optab = vlshr_optab;
2544       lrotate_optab = vrotl_optab;
2545       rrotate_optab = vrotr_optab;
2546     }
2547
2548   /* Previously detected shift-counts computed by NEGATE_EXPR
2549      and shifted in the other direction; but that does not work
2550      on all machines.  */
2551
2552   if (SHIFT_COUNT_TRUNCATED)
2553     {
2554       if (CONST_INT_P (op1)
2555           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2556               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2557         op1 = gen_int_shift_amount (mode,
2558                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2559                                     % GET_MODE_BITSIZE (scalar_mode));
2560       else if (GET_CODE (op1) == SUBREG
2561                && subreg_lowpart_p (op1)
2562                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2563                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2564         op1 = SUBREG_REG (op1);
2565     }
2566
2567   /* Canonicalize rotates by constant amount.  We may canonicalize
2568      to reduce the immediate or if the ISA can rotate by constants
2569      in only on direction.  */
2570   if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1))
2571     {
2572       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2573                                          - INTVAL (op1)));
2574       left = !left;
2575       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2576     }
2577
2578   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2579      Note that this is not the case for bigger values.  For instance a rotation
2580      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2581      0x04030201 (bswapsi).  */
2582   if (rotate
2583       && CONST_INT_P (op1)
2584       && INTVAL (op1) == BITS_PER_UNIT
2585       && GET_MODE_SIZE (scalar_mode) == 2
2586       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2587     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2588
2589   if (op1 == const0_rtx)
2590     return shifted;
2591
2592   /* Check whether its cheaper to implement a left shift by a constant
2593      bit count by a sequence of additions.  */
2594   if (code == LSHIFT_EXPR
2595       && CONST_INT_P (op1)
2596       && INTVAL (op1) > 0
2597       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2598       && INTVAL (op1) < MAX_BITS_PER_WORD
2599       && (shift_cost (speed, mode, INTVAL (op1))
2600           > INTVAL (op1) * add_cost (speed, mode))
2601       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2602     {
2603       int i;
2604       for (i = 0; i < INTVAL (op1); i++)
2605         {
2606           temp = force_reg (mode, shifted);
2607           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2608                                   unsignedp, OPTAB_LIB_WIDEN);
2609         }
2610       return shifted;
2611     }
2612
2613   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2614     {
2615       enum optab_methods methods;
2616
2617       if (attempt == 0)
2618         methods = OPTAB_DIRECT;
2619       else if (attempt == 1)
2620         methods = OPTAB_WIDEN;
2621       else
2622         methods = OPTAB_LIB_WIDEN;
2623
2624       if (rotate)
2625         {
2626           /* Widening does not work for rotation.  */
2627           if (methods == OPTAB_WIDEN)
2628             continue;
2629           else if (methods == OPTAB_LIB_WIDEN)
2630             {
2631               /* If we have been unable to open-code this by a rotation,
2632                  do it as the IOR or PLUS of two shifts.  I.e., to rotate
2633                  A by N bits, compute
2634                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2635                  where C is the bitsize of A.  If N cannot be zero,
2636                  use PLUS instead of IOR.
2637
2638                  It is theoretically possible that the target machine might
2639                  not be able to perform either shift and hence we would
2640                  be making two libcalls rather than just the one for the
2641                  shift (similarly if IOR could not be done).  We will allow
2642                  this extremely unlikely lossage to avoid complicating the
2643                  code below.  */
2644
2645               rtx subtarget = target == shifted ? 0 : target;
2646               rtx new_amount, other_amount;
2647               rtx temp1;
2648
2649               new_amount = op1;
2650               if (op1 == const0_rtx)
2651                 return shifted;
2652               else if (CONST_INT_P (op1))
2653                 other_amount = gen_int_shift_amount
2654                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2655               else
2656                 {
2657                   other_amount
2658                     = simplify_gen_unary (NEG, GET_MODE (op1),
2659                                           op1, GET_MODE (op1));
2660                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2661                   other_amount
2662                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2663                                            gen_int_mode (mask, GET_MODE (op1)));
2664                 }
2665
2666               shifted = force_reg (mode, shifted);
2667
2668               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2669                                      mode, shifted, new_amount, 0, 1);
2670               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2671                                       mode, shifted, other_amount,
2672                                       subtarget, 1);
2673               return expand_binop (mode,
2674                                    CONST_INT_P (op1) ? add_optab : ior_optab,
2675                                    temp, temp1, target, unsignedp, methods);
2676             }
2677
2678           temp = expand_binop (mode,
2679                                left ? lrotate_optab : rrotate_optab,
2680                                shifted, op1, target, unsignedp, methods);
2681         }
2682       else if (unsignedp)
2683         temp = expand_binop (mode,
2684                              left ? lshift_optab : rshift_uns_optab,
2685                              shifted, op1, target, unsignedp, methods);
2686
2687       /* Do arithmetic shifts.
2688          Also, if we are going to widen the operand, we can just as well
2689          use an arithmetic right-shift instead of a logical one.  */
2690       if (temp == 0 && ! rotate
2691           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2692         {
2693           enum optab_methods methods1 = methods;
2694
2695           /* If trying to widen a log shift to an arithmetic shift,
2696              don't accept an arithmetic shift of the same size.  */
2697           if (unsignedp)
2698             methods1 = OPTAB_MUST_WIDEN;
2699
2700           /* Arithmetic shift */
2701
2702           temp = expand_binop (mode,
2703                                left ? lshift_optab : rshift_arith_optab,
2704                                shifted, op1, target, unsignedp, methods1);
2705         }
2706
2707       /* We used to try extzv here for logical right shifts, but that was
2708          only useful for one machine, the VAX, and caused poor code
2709          generation there for lshrdi3, so the code was deleted and a
2710          define_expand for lshrsi3 was added to vax.md.  */
2711     }
2712
2713   gcc_assert (temp != NULL_RTX || may_fail);
2714   return temp;
2715 }
2716
2717 /* Output a shift instruction for expression code CODE,
2718    with SHIFTED being the rtx for the value to shift,
2719    and AMOUNT the amount to shift by.
2720    Store the result in the rtx TARGET, if that is convenient.
2721    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2722    Return the rtx for where the value is.  */
2723
2724 rtx
2725 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2726               poly_int64 amount, rtx target, int unsignedp)
2727 {
2728   return expand_shift_1 (code, mode, shifted,
2729                          gen_int_shift_amount (mode, amount),
2730                          target, unsignedp);
2731 }
2732
2733 /* Likewise, but return 0 if that cannot be done.  */
2734
2735 rtx
2736 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2737                     int amount, rtx target, int unsignedp)
2738 {
2739   return expand_shift_1 (code, mode,
2740                          shifted, GEN_INT (amount), target, unsignedp, true);
2741 }
2742
2743 /* Output a shift instruction for expression code CODE,
2744    with SHIFTED being the rtx for the value to shift,
2745    and AMOUNT the tree for the amount to shift by.
2746    Store the result in the rtx TARGET, if that is convenient.
2747    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2748    Return the rtx for where the value is.  */
2749
2750 rtx
2751 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2752                        tree amount, rtx target, int unsignedp)
2753 {
2754   return expand_shift_1 (code, mode,
2755                          shifted, expand_normal (amount), target, unsignedp);
2756 }
2757
2758 \f
2759 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2760                         const struct mult_cost *, machine_mode mode);
2761 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2762                               const struct algorithm *, enum mult_variant);
2763 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2764 static rtx extract_high_half (scalar_int_mode, rtx);
2765 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2766
2767 /* Compute and return the best algorithm for multiplying by T.
2768    The algorithm must cost less than cost_limit
2769    If retval.cost >= COST_LIMIT, no algorithm was found and all
2770    other field of the returned struct are undefined.
2771    MODE is the machine mode of the multiplication.  */
2772
2773 static void
2774 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2775             const struct mult_cost *cost_limit, machine_mode mode)
2776 {
2777   int m;
2778   struct algorithm *alg_in, *best_alg;
2779   struct mult_cost best_cost;
2780   struct mult_cost new_limit;
2781   int op_cost, op_latency;
2782   unsigned HOST_WIDE_INT orig_t = t;
2783   unsigned HOST_WIDE_INT q;
2784   int maxm, hash_index;
2785   bool cache_hit = false;
2786   enum alg_code cache_alg = alg_zero;
2787   bool speed = optimize_insn_for_speed_p ();
2788   scalar_int_mode imode;
2789   struct alg_hash_entry *entry_ptr;
2790
2791   /* Indicate that no algorithm is yet found.  If no algorithm
2792      is found, this value will be returned and indicate failure.  */
2793   alg_out->cost.cost = cost_limit->cost + 1;
2794   alg_out->cost.latency = cost_limit->latency + 1;
2795
2796   if (cost_limit->cost < 0
2797       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2798     return;
2799
2800   /* Be prepared for vector modes.  */
2801   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2802
2803   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2804
2805   /* Restrict the bits of "t" to the multiplication's mode.  */
2806   t &= GET_MODE_MASK (imode);
2807
2808   /* t == 1 can be done in zero cost.  */
2809   if (t == 1)
2810     {
2811       alg_out->ops = 1;
2812       alg_out->cost.cost = 0;
2813       alg_out->cost.latency = 0;
2814       alg_out->op[0] = alg_m;
2815       return;
2816     }
2817
2818   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2819      fail now.  */
2820   if (t == 0)
2821     {
2822       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2823         return;
2824       else
2825         {
2826           alg_out->ops = 1;
2827           alg_out->cost.cost = zero_cost (speed);
2828           alg_out->cost.latency = zero_cost (speed);
2829           alg_out->op[0] = alg_zero;
2830           return;
2831         }
2832     }
2833
2834   /* We'll be needing a couple extra algorithm structures now.  */
2835
2836   alg_in = XALLOCA (struct algorithm);
2837   best_alg = XALLOCA (struct algorithm);
2838   best_cost = *cost_limit;
2839
2840   /* Compute the hash index.  */
2841   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2842
2843   /* See if we already know what to do for T.  */
2844   entry_ptr = alg_hash_entry_ptr (hash_index);
2845   if (entry_ptr->t == t
2846       && entry_ptr->mode == mode
2847       && entry_ptr->speed == speed
2848       && entry_ptr->alg != alg_unknown)
2849     {
2850       cache_alg = entry_ptr->alg;
2851
2852       if (cache_alg == alg_impossible)
2853         {
2854           /* The cache tells us that it's impossible to synthesize
2855              multiplication by T within entry_ptr->cost.  */
2856           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2857             /* COST_LIMIT is at least as restrictive as the one
2858                recorded in the hash table, in which case we have no
2859                hope of synthesizing a multiplication.  Just
2860                return.  */
2861             return;
2862
2863           /* If we get here, COST_LIMIT is less restrictive than the
2864              one recorded in the hash table, so we may be able to
2865              synthesize a multiplication.  Proceed as if we didn't
2866              have the cache entry.  */
2867         }
2868       else
2869         {
2870           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2871             /* The cached algorithm shows that this multiplication
2872                requires more cost than COST_LIMIT.  Just return.  This
2873                way, we don't clobber this cache entry with
2874                alg_impossible but retain useful information.  */
2875             return;
2876
2877           cache_hit = true;
2878
2879           switch (cache_alg)
2880             {
2881             case alg_shift:
2882               goto do_alg_shift;
2883
2884             case alg_add_t_m2:
2885             case alg_sub_t_m2:
2886               goto do_alg_addsub_t_m2;
2887
2888             case alg_add_factor:
2889             case alg_sub_factor:
2890               goto do_alg_addsub_factor;
2891
2892             case alg_add_t2_m:
2893               goto do_alg_add_t2_m;
2894
2895             case alg_sub_t2_m:
2896               goto do_alg_sub_t2_m;
2897
2898             default:
2899               gcc_unreachable ();
2900             }
2901         }
2902     }
2903
2904   /* If we have a group of zero bits at the low-order part of T, try
2905      multiplying by the remaining bits and then doing a shift.  */
2906
2907   if ((t & 1) == 0)
2908     {
2909     do_alg_shift:
2910       m = ctz_or_zero (t); /* m = number of low zero bits */
2911       if (m < maxm)
2912         {
2913           q = t >> m;
2914           /* The function expand_shift will choose between a shift and
2915              a sequence of additions, so the observed cost is given as
2916              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2917           op_cost = m * add_cost (speed, mode);
2918           if (shift_cost (speed, mode, m) < op_cost)
2919             op_cost = shift_cost (speed, mode, m);
2920           new_limit.cost = best_cost.cost - op_cost;
2921           new_limit.latency = best_cost.latency - op_cost;
2922           synth_mult (alg_in, q, &new_limit, mode);
2923
2924           alg_in->cost.cost += op_cost;
2925           alg_in->cost.latency += op_cost;
2926           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2927             {
2928               best_cost = alg_in->cost;
2929               std::swap (alg_in, best_alg);
2930               best_alg->log[best_alg->ops] = m;
2931               best_alg->op[best_alg->ops] = alg_shift;
2932             }
2933
2934           /* See if treating ORIG_T as a signed number yields a better
2935              sequence.  Try this sequence only for a negative ORIG_T
2936              as it would be useless for a non-negative ORIG_T.  */
2937           if ((HOST_WIDE_INT) orig_t < 0)
2938             {
2939               /* Shift ORIG_T as follows because a right shift of a
2940                  negative-valued signed type is implementation
2941                  defined.  */
2942               q = ~(~orig_t >> m);
2943               /* The function expand_shift will choose between a shift
2944                  and a sequence of additions, so the observed cost is
2945                  given as MIN (m * add_cost(speed, mode),
2946                  shift_cost(speed, mode, m)).  */
2947               op_cost = m * add_cost (speed, mode);
2948               if (shift_cost (speed, mode, m) < op_cost)
2949                 op_cost = shift_cost (speed, mode, m);
2950               new_limit.cost = best_cost.cost - op_cost;
2951               new_limit.latency = best_cost.latency - op_cost;
2952               synth_mult (alg_in, q, &new_limit, mode);
2953
2954               alg_in->cost.cost += op_cost;
2955               alg_in->cost.latency += op_cost;
2956               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2957                 {
2958                   best_cost = alg_in->cost;
2959                   std::swap (alg_in, best_alg);
2960                   best_alg->log[best_alg->ops] = m;
2961                   best_alg->op[best_alg->ops] = alg_shift;
2962                 }
2963             }
2964         }
2965       if (cache_hit)
2966         goto done;
2967     }
2968
2969   /* If we have an odd number, add or subtract one.  */
2970   if ((t & 1) != 0)
2971     {
2972       unsigned HOST_WIDE_INT w;
2973
2974     do_alg_addsub_t_m2:
2975       for (w = 1; (w & t) != 0; w <<= 1)
2976         ;
2977       /* If T was -1, then W will be zero after the loop.  This is another
2978          case where T ends with ...111.  Handling this with (T + 1) and
2979          subtract 1 produces slightly better code and results in algorithm
2980          selection much faster than treating it like the ...0111 case
2981          below.  */
2982       if (w == 0
2983           || (w > 2
2984               /* Reject the case where t is 3.
2985                  Thus we prefer addition in that case.  */
2986               && t != 3))
2987         {
2988           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2989
2990           op_cost = add_cost (speed, mode);
2991           new_limit.cost = best_cost.cost - op_cost;
2992           new_limit.latency = best_cost.latency - op_cost;
2993           synth_mult (alg_in, t + 1, &new_limit, mode);
2994
2995           alg_in->cost.cost += op_cost;
2996           alg_in->cost.latency += op_cost;
2997           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2998             {
2999               best_cost = alg_in->cost;
3000               std::swap (alg_in, best_alg);
3001               best_alg->log[best_alg->ops] = 0;
3002               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3003             }
3004         }
3005       else
3006         {
3007           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
3008
3009           op_cost = add_cost (speed, mode);
3010           new_limit.cost = best_cost.cost - op_cost;
3011           new_limit.latency = best_cost.latency - op_cost;
3012           synth_mult (alg_in, t - 1, &new_limit, mode);
3013
3014           alg_in->cost.cost += op_cost;
3015           alg_in->cost.latency += op_cost;
3016           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3017             {
3018               best_cost = alg_in->cost;
3019               std::swap (alg_in, best_alg);
3020               best_alg->log[best_alg->ops] = 0;
3021               best_alg->op[best_alg->ops] = alg_add_t_m2;
3022             }
3023         }
3024
3025       /* We may be able to calculate a * -7, a * -15, a * -31, etc
3026          quickly with a - a * n for some appropriate constant n.  */
3027       m = exact_log2 (-orig_t + 1);
3028       if (m >= 0 && m < maxm)
3029         {
3030           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3031           /* If the target has a cheap shift-and-subtract insn use
3032              that in preference to a shift insn followed by a sub insn.
3033              Assume that the shift-and-sub is "atomic" with a latency
3034              equal to it's cost, otherwise assume that on superscalar
3035              hardware the shift may be executed concurrently with the
3036              earlier steps in the algorithm.  */
3037           if (shiftsub1_cost (speed, mode, m) <= op_cost)
3038             {
3039               op_cost = shiftsub1_cost (speed, mode, m);
3040               op_latency = op_cost;
3041             }
3042           else
3043             op_latency = add_cost (speed, mode);
3044
3045           new_limit.cost = best_cost.cost - op_cost;
3046           new_limit.latency = best_cost.latency - op_latency;
3047           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3048                       &new_limit, mode);
3049
3050           alg_in->cost.cost += op_cost;
3051           alg_in->cost.latency += op_latency;
3052           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3053             {
3054               best_cost = alg_in->cost;
3055               std::swap (alg_in, best_alg);
3056               best_alg->log[best_alg->ops] = m;
3057               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3058             }
3059         }
3060
3061       if (cache_hit)
3062         goto done;
3063     }
3064
3065   /* Look for factors of t of the form
3066      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3067      If we find such a factor, we can multiply by t using an algorithm that
3068      multiplies by q, shift the result by m and add/subtract it to itself.
3069
3070      We search for large factors first and loop down, even if large factors
3071      are less probable than small; if we find a large factor we will find a
3072      good sequence quickly, and therefore be able to prune (by decreasing
3073      COST_LIMIT) the search.  */
3074
3075  do_alg_addsub_factor:
3076   for (m = floor_log2 (t - 1); m >= 2; m--)
3077     {
3078       unsigned HOST_WIDE_INT d;
3079
3080       d = (HOST_WIDE_INT_1U << m) + 1;
3081       if (t % d == 0 && t > d && m < maxm
3082           && (!cache_hit || cache_alg == alg_add_factor))
3083         {
3084           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3085           if (shiftadd_cost (speed, mode, m) <= op_cost)
3086             op_cost = shiftadd_cost (speed, mode, m);
3087
3088           op_latency = op_cost;
3089
3090
3091           new_limit.cost = best_cost.cost - op_cost;
3092           new_limit.latency = best_cost.latency - op_latency;
3093           synth_mult (alg_in, t / d, &new_limit, mode);
3094
3095           alg_in->cost.cost += op_cost;
3096           alg_in->cost.latency += op_latency;
3097           if (alg_in->cost.latency < op_cost)
3098             alg_in->cost.latency = op_cost;
3099           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3100             {
3101               best_cost = alg_in->cost;
3102               std::swap (alg_in, best_alg);
3103               best_alg->log[best_alg->ops] = m;
3104               best_alg->op[best_alg->ops] = alg_add_factor;
3105             }
3106           /* Other factors will have been taken care of in the recursion.  */
3107           break;
3108         }
3109
3110       d = (HOST_WIDE_INT_1U << m) - 1;
3111       if (t % d == 0 && t > d && m < maxm
3112           && (!cache_hit || cache_alg == alg_sub_factor))
3113         {
3114           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3115           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3116             op_cost = shiftsub0_cost (speed, mode, m);
3117
3118           op_latency = op_cost;
3119
3120           new_limit.cost = best_cost.cost - op_cost;
3121           new_limit.latency = best_cost.latency - op_latency;
3122           synth_mult (alg_in, t / d, &new_limit, mode);
3123
3124           alg_in->cost.cost += op_cost;
3125           alg_in->cost.latency += op_latency;
3126           if (alg_in->cost.latency < op_cost)
3127             alg_in->cost.latency = op_cost;
3128           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3129             {
3130               best_cost = alg_in->cost;
3131               std::swap (alg_in, best_alg);
3132               best_alg->log[best_alg->ops] = m;
3133               best_alg->op[best_alg->ops] = alg_sub_factor;
3134             }
3135           break;
3136         }
3137     }
3138   if (cache_hit)
3139     goto done;
3140
3141   /* Try shift-and-add (load effective address) instructions,
3142      i.e. do a*3, a*5, a*9.  */
3143   if ((t & 1) != 0)
3144     {
3145     do_alg_add_t2_m:
3146       q = t - 1;
3147       m = ctz_hwi (q);
3148       if (q && m < maxm)
3149         {
3150           op_cost = shiftadd_cost (speed, mode, m);
3151           new_limit.cost = best_cost.cost - op_cost;
3152           new_limit.latency = best_cost.latency - op_cost;
3153           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3154
3155           alg_in->cost.cost += op_cost;
3156           alg_in->cost.latency += op_cost;
3157           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3158             {
3159               best_cost = alg_in->cost;
3160               std::swap (alg_in, best_alg);
3161               best_alg->log[best_alg->ops] = m;
3162               best_alg->op[best_alg->ops] = alg_add_t2_m;
3163             }
3164         }
3165       if (cache_hit)
3166         goto done;
3167
3168     do_alg_sub_t2_m:
3169       q = t + 1;
3170       m = ctz_hwi (q);
3171       if (q && m < maxm)
3172         {
3173           op_cost = shiftsub0_cost (speed, mode, m);
3174           new_limit.cost = best_cost.cost - op_cost;
3175           new_limit.latency = best_cost.latency - op_cost;
3176           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3177
3178           alg_in->cost.cost += op_cost;
3179           alg_in->cost.latency += op_cost;
3180           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3181             {
3182               best_cost = alg_in->cost;
3183               std::swap (alg_in, best_alg);
3184               best_alg->log[best_alg->ops] = m;
3185               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3186             }
3187         }
3188       if (cache_hit)
3189         goto done;
3190     }
3191
3192  done:
3193   /* If best_cost has not decreased, we have not found any algorithm.  */
3194   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3195     {
3196       /* We failed to find an algorithm.  Record alg_impossible for
3197          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3198          we are asked to find an algorithm for T within the same or
3199          lower COST_LIMIT, we can immediately return to the
3200          caller.  */
3201       entry_ptr->t = t;
3202       entry_ptr->mode = mode;
3203       entry_ptr->speed = speed;
3204       entry_ptr->alg = alg_impossible;
3205       entry_ptr->cost = *cost_limit;
3206       return;
3207     }
3208
3209   /* Cache the result.  */
3210   if (!cache_hit)
3211     {
3212       entry_ptr->t = t;
3213       entry_ptr->mode = mode;
3214       entry_ptr->speed = speed;
3215       entry_ptr->alg = best_alg->op[best_alg->ops];
3216       entry_ptr->cost.cost = best_cost.cost;
3217       entry_ptr->cost.latency = best_cost.latency;
3218     }
3219
3220   /* If we are getting a too long sequence for `struct algorithm'
3221      to record, make this search fail.  */
3222   if (best_alg->ops == MAX_BITS_PER_WORD)
3223     return;
3224
3225   /* Copy the algorithm from temporary space to the space at alg_out.
3226      We avoid using structure assignment because the majority of
3227      best_alg is normally undefined, and this is a critical function.  */
3228   alg_out->ops = best_alg->ops + 1;
3229   alg_out->cost = best_cost;
3230   memcpy (alg_out->op, best_alg->op,
3231           alg_out->ops * sizeof *alg_out->op);
3232   memcpy (alg_out->log, best_alg->log,
3233           alg_out->ops * sizeof *alg_out->log);
3234 }
3235 \f
3236 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3237    Try three variations:
3238
3239        - a shift/add sequence based on VAL itself
3240        - a shift/add sequence based on -VAL, followed by a negation
3241        - a shift/add sequence based on VAL - 1, followed by an addition.
3242
3243    Return true if the cheapest of these cost less than MULT_COST,
3244    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3245
3246 bool
3247 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3248                      struct algorithm *alg, enum mult_variant *variant,
3249                      int mult_cost)
3250 {
3251   struct algorithm alg2;
3252   struct mult_cost limit;
3253   int op_cost;
3254   bool speed = optimize_insn_for_speed_p ();
3255
3256   /* Fail quickly for impossible bounds.  */
3257   if (mult_cost < 0)
3258     return false;
3259
3260   /* Ensure that mult_cost provides a reasonable upper bound.
3261      Any constant multiplication can be performed with less
3262      than 2 * bits additions.  */
3263   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3264   if (mult_cost > op_cost)
3265     mult_cost = op_cost;
3266
3267   *variant = basic_variant;
3268   limit.cost = mult_cost;
3269   limit.latency = mult_cost;
3270   synth_mult (alg, val, &limit, mode);
3271
3272   /* This works only if the inverted value actually fits in an
3273      `unsigned int' */
3274   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3275     {
3276       op_cost = neg_cost (speed, mode);
3277       if (MULT_COST_LESS (&alg->cost, mult_cost))
3278         {
3279           limit.cost = alg->cost.cost - op_cost;
3280           limit.latency = alg->cost.latency - op_cost;
3281         }
3282       else
3283         {
3284           limit.cost = mult_cost - op_cost;
3285           limit.latency = mult_cost - op_cost;
3286         }
3287
3288       synth_mult (&alg2, -val, &limit, mode);
3289       alg2.cost.cost += op_cost;
3290       alg2.cost.latency += op_cost;
3291       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3292         *alg = alg2, *variant = negate_variant;
3293     }
3294
3295   /* This proves very useful for division-by-constant.  */
3296   op_cost = add_cost (speed, mode);
3297   if (MULT_COST_LESS (&alg->cost, mult_cost))
3298     {
3299       limit.cost = alg->cost.cost - op_cost;
3300       limit.latency = alg->cost.latency - op_cost;
3301     }
3302   else
3303     {
3304       limit.cost = mult_cost - op_cost;
3305       limit.latency = mult_cost - op_cost;
3306     }
3307
3308   if (val != HOST_WIDE_INT_MIN
3309       || GET_MODE_UNIT_PRECISION (mode) == HOST_BITS_PER_WIDE_INT)
3310     {
3311       synth_mult (&alg2, val - HOST_WIDE_INT_1U, &limit, mode);
3312       alg2.cost.cost += op_cost;
3313       alg2.cost.latency += op_cost;
3314       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3315         *alg = alg2, *variant = add_variant;
3316     }
3317
3318   return MULT_COST_LESS (&alg->cost, mult_cost);
3319 }
3320
3321 /* A subroutine of expand_mult, used for constant multiplications.
3322    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3323    convenient.  Use the shift/add sequence described by ALG and apply
3324    the final fixup specified by VARIANT.  */
3325
3326 static rtx
3327 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3328                    rtx target, const struct algorithm *alg,
3329                    enum mult_variant variant)
3330 {
3331   unsigned HOST_WIDE_INT val_so_far;
3332   rtx_insn *insn;
3333   rtx accum, tem;
3334   int opno;
3335   machine_mode nmode;
3336
3337   /* Avoid referencing memory over and over and invalid sharing
3338      on SUBREGs.  */
3339   op0 = force_reg (mode, op0);
3340
3341   /* ACCUM starts out either as OP0 or as a zero, depending on
3342      the first operation.  */
3343
3344   if (alg->op[0] == alg_zero)
3345     {
3346       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3347       val_so_far = 0;
3348     }
3349   else if (alg->op[0] == alg_m)
3350     {
3351       accum = copy_to_mode_reg (mode, op0);
3352       val_so_far = 1;
3353     }
3354   else
3355     gcc_unreachable ();
3356
3357   for (opno = 1; opno < alg->ops; opno++)
3358     {
3359       int log = alg->log[opno];
3360       rtx shift_subtarget = optimize ? 0 : accum;
3361       rtx add_target
3362         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3363            && !optimize)
3364           ? target : 0;
3365       rtx accum_target = optimize ? 0 : accum;
3366       rtx accum_inner;
3367
3368       switch (alg->op[opno])
3369         {
3370         case alg_shift:
3371           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3372           /* REG_EQUAL note will be attached to the following insn.  */
3373           emit_move_insn (accum, tem);
3374           val_so_far <<= log;
3375           break;
3376
3377         case alg_add_t_m2:
3378           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3379           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3380                                  add_target ? add_target : accum_target);
3381           val_so_far += HOST_WIDE_INT_1U << log;
3382           break;
3383
3384         case alg_sub_t_m2:
3385           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3386           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3387                                  add_target ? add_target : accum_target);
3388           val_so_far -= HOST_WIDE_INT_1U << log;
3389           break;
3390
3391         case alg_add_t2_m:
3392           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3393                                 log, shift_subtarget, 0);
3394           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3395                                  add_target ? add_target : accum_target);
3396           val_so_far = (val_so_far << log) + 1;
3397           break;
3398
3399         case alg_sub_t2_m:
3400           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3401                                 log, shift_subtarget, 0);
3402           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3403                                  add_target ? add_target : accum_target);
3404           val_so_far = (val_so_far << log) - 1;
3405           break;
3406
3407         case alg_add_factor:
3408           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3409           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3410                                  add_target ? add_target : accum_target);
3411           val_so_far += val_so_far << log;
3412           break;
3413
3414         case alg_sub_factor:
3415           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3416           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3417                                  (add_target
3418                                   ? add_target : (optimize ? 0 : tem)));
3419           val_so_far = (val_so_far << log) - val_so_far;
3420           break;
3421
3422         default:
3423           gcc_unreachable ();
3424         }
3425
3426       if (SCALAR_INT_MODE_P (mode))
3427         {
3428           /* Write a REG_EQUAL note on the last insn so that we can cse
3429              multiplication sequences.  Note that if ACCUM is a SUBREG,
3430              we've set the inner register and must properly indicate that.  */
3431           tem = op0, nmode = mode;
3432           accum_inner = accum;
3433           if (GET_CODE (accum) == SUBREG)
3434             {
3435               accum_inner = SUBREG_REG (accum);
3436               nmode = GET_MODE (accum_inner);
3437               tem = gen_lowpart (nmode, op0);
3438             }
3439
3440           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3441              In that case, only the low bits of accum would be guaranteed to
3442              be equal to the content of the REG_EQUAL note, the upper bits
3443              can be anything.  */
3444           if (!paradoxical_subreg_p (tem))
3445             {
3446               insn = get_last_insn ();
3447               wide_int wval_so_far
3448                 = wi::uhwi (val_so_far,
3449                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3450               rtx c = immed_wide_int_const (wval_so_far, nmode);
3451               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3452                                 accum_inner);
3453             }
3454         }
3455     }
3456
3457   if (variant == negate_variant)
3458     {
3459       val_so_far = -val_so_far;
3460       accum = expand_unop (mode, neg_optab, accum, target, 0);
3461     }
3462   else if (variant == add_variant)
3463     {
3464       val_so_far = val_so_far + 1;
3465       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3466     }
3467
3468   /* Compare only the bits of val and val_so_far that are significant
3469      in the result mode, to avoid sign-/zero-extension confusion.  */
3470   nmode = GET_MODE_INNER (mode);
3471   val &= GET_MODE_MASK (nmode);
3472   val_so_far &= GET_MODE_MASK (nmode);
3473   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3474
3475   return accum;
3476 }
3477
3478 /* Perform a multiplication and return an rtx for the result.
3479    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3480    TARGET is a suggestion for where to store the result (an rtx).
3481
3482    We check specially for a constant integer as OP1.
3483    If you want this check for OP0 as well, then before calling
3484    you should swap the two operands if OP0 would be constant.  */
3485
3486 rtx
3487 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3488              int unsignedp, bool no_libcall)
3489 {
3490   enum mult_variant variant;
3491   struct algorithm algorithm;
3492   rtx scalar_op1;
3493   int max_cost;
3494   bool speed = optimize_insn_for_speed_p ();
3495   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3496
3497   if (CONSTANT_P (op0))
3498     std::swap (op0, op1);
3499
3500   /* For vectors, there are several simplifications that can be made if
3501      all elements of the vector constant are identical.  */
3502   scalar_op1 = unwrap_const_vec_duplicate (op1);
3503
3504   if (INTEGRAL_MODE_P (mode))
3505     {
3506       rtx fake_reg;
3507       HOST_WIDE_INT coeff;
3508       bool is_neg;
3509       int mode_bitsize;
3510
3511       if (op1 == CONST0_RTX (mode))
3512         return op1;
3513       if (op1 == CONST1_RTX (mode))
3514         return op0;
3515       if (op1 == CONSTM1_RTX (mode))
3516         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3517                             op0, target, 0);
3518
3519       if (do_trapv)
3520         goto skip_synth;
3521
3522       /* If mode is integer vector mode, check if the backend supports
3523          vector lshift (by scalar or vector) at all.  If not, we can't use
3524          synthetized multiply.  */
3525       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3526           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3527           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3528         goto skip_synth;
3529
3530       /* These are the operations that are potentially turned into
3531          a sequence of shifts and additions.  */
3532       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3533
3534       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3535          less than or equal in size to `unsigned int' this doesn't matter.
3536          If the mode is larger than `unsigned int', then synth_mult works
3537          only if the constant value exactly fits in an `unsigned int' without
3538          any truncation.  This means that multiplying by negative values does
3539          not work; results are off by 2^32 on a 32 bit machine.  */
3540       if (CONST_INT_P (scalar_op1))
3541         {
3542           coeff = INTVAL (scalar_op1);
3543           is_neg = coeff < 0;
3544         }
3545 #if TARGET_SUPPORTS_WIDE_INT
3546       else if (CONST_WIDE_INT_P (scalar_op1))
3547 #else
3548       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3549 #endif
3550         {
3551           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3552           /* Perfect power of 2 (other than 1, which is handled above).  */
3553           if (shift > 0)
3554             return expand_shift (LSHIFT_EXPR, mode, op0,
3555                                  shift, target, unsignedp);
3556           else
3557             goto skip_synth;
3558         }
3559       else
3560         goto skip_synth;
3561
3562       /* We used to test optimize here, on the grounds that it's better to
3563          produce a smaller program when -O is not used.  But this causes
3564          such a terrible slowdown sometimes that it seems better to always
3565          use synth_mult.  */
3566
3567       /* Special case powers of two.  */
3568       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3569           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3570         return expand_shift (LSHIFT_EXPR, mode, op0,
3571                              floor_log2 (coeff), target, unsignedp);
3572
3573       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3574
3575       /* Attempt to handle multiplication of DImode values by negative
3576          coefficients, by performing the multiplication by a positive
3577          multiplier and then inverting the result.  */
3578       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3579         {
3580           /* Its safe to use -coeff even for INT_MIN, as the
3581              result is interpreted as an unsigned coefficient.
3582              Exclude cost of op0 from max_cost to match the cost
3583              calculation of the synth_mult.  */
3584           coeff = -(unsigned HOST_WIDE_INT) coeff;
3585           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3586                                     mode, speed)
3587                       - neg_cost (speed, mode));
3588           if (max_cost <= 0)
3589             goto skip_synth;
3590
3591           /* Special case powers of two.  */
3592           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3593             {
3594               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3595                                        floor_log2 (coeff), target, unsignedp);
3596               return expand_unop (mode, neg_optab, temp, target, 0);
3597             }
3598
3599           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3600                                    max_cost))
3601             {
3602               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3603                                             &algorithm, variant);
3604               return expand_unop (mode, neg_optab, temp, target, 0);
3605             }
3606           goto skip_synth;
3607         }
3608
3609       /* Exclude cost of op0 from max_cost to match the cost
3610          calculation of the synth_mult.  */
3611       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3612       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3613         return expand_mult_const (mode, op0, coeff, target,
3614                                   &algorithm, variant);
3615     }
3616  skip_synth:
3617
3618   /* Expand x*2.0 as x+x.  */
3619   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3620       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3621     {
3622       op0 = force_reg (GET_MODE (op0), op0);
3623       return expand_binop (mode, add_optab, op0, op0,
3624                            target, unsignedp,
3625                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3626     }
3627
3628   /* This used to use umul_optab if unsigned, but for non-widening multiply
3629      there is no difference between signed and unsigned.  */
3630   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3631                       op0, op1, target, unsignedp,
3632                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3633   gcc_assert (op0 || no_libcall);
3634   return op0;
3635 }
3636
3637 /* Return a cost estimate for multiplying a register by the given
3638    COEFFicient in the given MODE and SPEED.  */
3639
3640 int
3641 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3642 {
3643   int max_cost;
3644   struct algorithm algorithm;
3645   enum mult_variant variant;
3646
3647   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3648   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3649                            mode, speed);
3650   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3651     return algorithm.cost.cost;
3652   else
3653     return max_cost;
3654 }
3655
3656 /* Perform a widening multiplication and return an rtx for the result.
3657    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3658    TARGET is a suggestion for where to store the result (an rtx).
3659    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3660    or smul_widen_optab.
3661
3662    We check specially for a constant integer as OP1, comparing the
3663    cost of a widening multiply against the cost of a sequence of shifts
3664    and adds.  */
3665
3666 rtx
3667 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3668                       int unsignedp, optab this_optab)
3669 {
3670   bool speed = optimize_insn_for_speed_p ();
3671   rtx cop1;
3672
3673   if (CONST_INT_P (op1)
3674       && GET_MODE (op0) != VOIDmode
3675       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3676                                 this_optab == umul_widen_optab))
3677       && CONST_INT_P (cop1)
3678       && (INTVAL (cop1) >= 0
3679           || HWI_COMPUTABLE_MODE_P (mode)))
3680     {
3681       HOST_WIDE_INT coeff = INTVAL (cop1);
3682       int max_cost;
3683       enum mult_variant variant;
3684       struct algorithm algorithm;
3685
3686       if (coeff == 0)
3687         return CONST0_RTX (mode);
3688
3689       /* Special case powers of two.  */
3690       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3691         {
3692           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3693           return expand_shift (LSHIFT_EXPR, mode, op0,
3694                                floor_log2 (coeff), target, unsignedp);
3695         }
3696
3697       /* Exclude cost of op0 from max_cost to match the cost
3698          calculation of the synth_mult.  */
3699       max_cost = mul_widen_cost (speed, mode);
3700       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3701                                max_cost))
3702         {
3703           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3704           return expand_mult_const (mode, op0, coeff, target,
3705                                     &algorithm, variant);
3706         }
3707     }
3708   return expand_binop (mode, this_optab, op0, op1, target,
3709                        unsignedp, OPTAB_LIB_WIDEN);
3710 }
3711 \f
3712 /* Choose a minimal N + 1 bit approximation to 2**K / D that can be used to
3713    replace division by D, put the least significant N bits of the result in
3714    *MULTIPLIER_PTR, the value K - N in *POST_SHIFT_PTR, and return the most
3715    significant bit.
3716
3717    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3718    needed precision is PRECISION (should be <= N).
3719
3720    PRECISION should be as small as possible so this function can choose the
3721    multiplier more freely.  If PRECISION is <= N - 1, the most significant
3722    bit returned by the function will be zero.
3723
3724    Using this function, x / D is equal to (x*m) / 2**N >> (*POST_SHIFT_PTR),
3725    where m is the full N + 1 bit multiplier.  */
3726
3727 unsigned HOST_WIDE_INT
3728 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3729                    unsigned HOST_WIDE_INT *multiplier_ptr,
3730                    int *post_shift_ptr)
3731 {
3732   int lgup, post_shift;
3733   int pow1, pow2;
3734
3735   /* lgup = ceil(log2(d)) */
3736   /* Assuming d > 1, we have d >= 2^(lgup-1) + 1 */
3737   lgup = ceil_log2 (d);
3738
3739   gcc_assert (lgup <= n);
3740   gcc_assert (lgup <= precision);
3741
3742   pow1 = n + lgup;
3743   pow2 = n + lgup - precision;
3744
3745   /* mlow = 2^(n + lgup)/d */
3746   /* Trivially from above we have mlow < 2^(n+1) */
3747   wide_int val = wi::set_bit_in_zero (pow1, HOST_BITS_PER_DOUBLE_INT);
3748   wide_int mlow = wi::udiv_trunc (val, d);
3749
3750   /* mhigh = (2^(n + lgup) + 2^(n + lgup - precision))/d */
3751   /* From above we have mhigh < 2^(n+1) assuming lgup <= precision */
3752   /* From precision <= n, the difference between the numerators of mhigh and
3753      mlow is >= 2^lgup >= d.  Therefore the difference of the quotients in
3754      the Euclidean division by d is at least 1, so we have mlow < mhigh and
3755      the exact value of 2^(n + lgup)/d lies in the interval [mlow; mhigh).  */
3756   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3757   wide_int mhigh = wi::udiv_trunc (val, d);
3758
3759   /* Reduce to lowest terms.  */
3760   /* If precision <= n - 1, then the difference between the numerators of
3761      mhigh and mlow is >= 2^(lgup + 1) >= 2 * 2^lgup >= 2 * d.  Therefore
3762      the difference of the quotients in the Euclidean division by d is at
3763      least 2, which means that mhigh and mlow differ by at least one bit
3764      not in the last place.  The conclusion is that the first iteration of
3765      the loop below completes and shifts mhigh and mlow by 1 bit, which in
3766      particular means that mhigh < 2^n, that is to say, the most significant
3767      bit in the n + 1 bit value is zero.  */
3768   for (post_shift = lgup; post_shift > 0; post_shift--)
3769     {
3770       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3771                                                        HOST_BITS_PER_WIDE_INT);
3772       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3773                                                        HOST_BITS_PER_WIDE_INT);
3774       if (ml_lo >= mh_lo)
3775         break;
3776
3777       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3778       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3779     }
3780
3781   *post_shift_ptr = post_shift;
3782
3783   if (n < HOST_BITS_PER_WIDE_INT)
3784     {
3785       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3786       *multiplier_ptr = mhigh.to_uhwi () & mask;
3787       return mhigh.to_uhwi () > mask;
3788     }
3789   else
3790     {
3791       *multiplier_ptr = mhigh.to_uhwi ();
3792       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3793     }
3794 }
3795
3796 /* Compute the inverse of X mod 2**N, i.e., find Y such that X * Y is congruent
3797    to 1 modulo 2**N, assuming that X is odd.  Bézout's lemma guarantees that Y
3798    exists for any given positive N.  */
3799
3800 static unsigned HOST_WIDE_INT
3801 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3802 {
3803   gcc_assert ((x & 1) == 1);
3804
3805   /* The algorithm notes that the choice Y = Z satisfies X*Y == 1 mod 2^3,
3806      since X is odd.  Then each iteration doubles the number of bits of
3807      significance in Y.  */
3808
3809   const unsigned HOST_WIDE_INT mask
3810     = (n == HOST_BITS_PER_WIDE_INT
3811        ? HOST_WIDE_INT_M1U
3812        : (HOST_WIDE_INT_1U << n) - 1);
3813   unsigned HOST_WIDE_INT y = x;
3814   int nbit = 3;
3815
3816   while (nbit < n)
3817     {
3818       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3819       nbit *= 2;
3820     }
3821
3822   return y;
3823 }
3824
3825 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3826    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3827    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3828    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3829    become signed.
3830
3831    The result is put in TARGET if that is convenient.
3832
3833    MODE is the mode of operation.  */
3834
3835 rtx
3836 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3837                              rtx op1, rtx target, int unsignedp)
3838 {
3839   rtx tem;
3840   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3841
3842   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3843                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3844   tem = expand_and (mode, tem, op1, NULL_RTX);
3845   adj_operand
3846     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3847                      adj_operand);
3848
3849   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3850                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3851   tem = expand_and (mode, tem, op0, NULL_RTX);
3852   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3853                           target);
3854
3855   return target;
3856 }
3857
3858 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3859
3860 static rtx
3861 extract_high_half (scalar_int_mode mode, rtx op)
3862 {
3863   if (mode == word_mode)
3864     return gen_highpart (mode, op);
3865
3866   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3867
3868   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3869                      GET_MODE_BITSIZE (mode), 0, 1);
3870   return convert_modes (mode, wider_mode, op, 0);
3871 }
3872
3873 /* Like expmed_mult_highpart, but only consider using multiplication optab.  */
3874
3875 rtx
3876 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3877                             rtx target, int unsignedp, int max_cost)
3878 {
3879   const scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3880   const bool speed = optimize_insn_for_speed_p ();
3881   const int size = GET_MODE_BITSIZE (mode);
3882   optab moptab;
3883   rtx tem;
3884
3885   /* Firstly, try using a multiplication insn that only generates the needed
3886      high part of the product, and in the sign flavor of unsignedp.  */
3887   if (mul_highpart_cost (speed, mode) < max_cost)
3888     {
3889       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3890       tem = expand_binop (mode, moptab, op0, op1, target, unsignedp,
3891                           OPTAB_DIRECT);
3892       if (tem)
3893         return tem;
3894     }
3895
3896   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3897      Need to adjust the result after the multiplication.  */
3898   if (size - 1 < BITS_PER_WORD
3899       && (mul_highpart_cost (speed, mode)
3900           + 2 * shift_cost (speed, mode, size-1)
3901           + 4 * add_cost (speed, mode) < max_cost))
3902     {
3903       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3904       tem = expand_binop (mode, moptab, op0, op1, target, !unsignedp,
3905                           OPTAB_DIRECT);
3906       if (tem)
3907         /* We used the wrong signedness.  Adjust the result.  */
3908         return expand_mult_highpart_adjust (mode, tem, op0, op1, tem,
3909                                             unsignedp);
3910     }
3911
3912   /* Try widening multiplication.  */
3913   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3914   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3915       && mul_widen_cost (speed, wider_mode) < max_cost)
3916     {
3917       tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, unsignedp,
3918                           OPTAB_WIDEN);
3919       if (tem)
3920         return extract_high_half (mode, tem);
3921     }
3922
3923   /* Try widening the mode and perform a non-widening multiplication.  */
3924   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3925       && size - 1 < BITS_PER_WORD
3926       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3927           < max_cost))
3928     {
3929       rtx_insn *insns;
3930       rtx wop0, wop1;
3931
3932       /* We need to widen the operands, for example to ensure the
3933          constant multiplier is correctly sign or zero extended.
3934          Use a sequence to clean-up any instructions emitted by
3935          the conversions if things don't work out.  */
3936       start_sequence ();
3937       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3938       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3939       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3940                           unsignedp, OPTAB_WIDEN);
3941       insns = get_insns ();
3942       end_sequence ();
3943
3944       if (tem)
3945         {
3946           emit_insn (insns);
3947           return extract_high_half (mode, tem);
3948         }
3949     }
3950
3951   /* Try widening multiplication of opposite signedness, and adjust.  */
3952   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3953   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3954       && size - 1 < BITS_PER_WORD
3955       && (mul_widen_cost (speed, wider_mode)
3956           + 2 * shift_cost (speed, mode, size-1)
3957           + 4 * add_cost (speed, mode) < max_cost))
3958     {
3959       tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, !unsignedp,
3960                           OPTAB_WIDEN);
3961       if (tem != 0)
3962         {
3963           tem = extract_high_half (mode, tem);
3964           /* We used the wrong signedness.  Adjust the result.  */
3965           return expand_mult_highpart_adjust (mode, tem, op0, op1, target,
3966                                               unsignedp);
3967         }
3968     }
3969
3970   return 0;
3971 }
3972
3973 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3974    putting the high half of the result in TARGET if that is convenient,
3975    and return where the result is.  If the operation cannot be performed,
3976    0 is returned.
3977
3978    MODE is the mode of operation and result.
3979
3980    UNSIGNEDP nonzero means unsigned multiply.
3981
3982    MAX_COST is the total allowed cost for the expanded RTL.  */
3983
3984 static rtx
3985 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3986                       rtx target, int unsignedp, int max_cost)
3987 {
3988   const bool speed = optimize_insn_for_speed_p ();
3989   unsigned HOST_WIDE_INT cnst1;
3990   int extra_cost;
3991   bool sign_adjust = false;
3992   enum mult_variant variant;
3993   struct algorithm alg;
3994   rtx narrow_op1, tem;
3995
3996   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3997   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3998
3999   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
4000   narrow_op1 = gen_int_mode (INTVAL (op1), mode);
4001
4002   /* We can't optimize modes wider than BITS_PER_WORD.
4003      ??? We might be able to perform double-word arithmetic if
4004      mode == word_mode, however all the cost calculations in
4005      synth_mult etc. assume single-word operations.  */
4006   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
4007   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
4008     return expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
4009                                        unsignedp, max_cost);
4010
4011   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
4012
4013   /* Check whether we try to multiply by a negative constant.  */
4014   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
4015     {
4016       sign_adjust = true;
4017       extra_cost += add_cost (speed, mode);
4018     }
4019
4020   /* See whether shift/add multiplication is cheap enough.  */
4021   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
4022                            max_cost - extra_cost))
4023     {
4024       /* See whether the specialized multiplication optabs are
4025          cheaper than the shift/add version.  */
4026       tem = expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
4027                                         unsignedp,
4028                                         alg.cost.cost + extra_cost);
4029       if (tem)
4030         return tem;
4031
4032       tem = convert_to_mode (wider_mode, op0, unsignedp);
4033       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
4034       tem = extract_high_half (mode, tem);
4035
4036       /* Adjust result for signedness.  */
4037       if (sign_adjust)
4038         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
4039
4040       return tem;
4041     }
4042   return expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
4043                                      unsignedp, max_cost);
4044 }
4045
4046
4047 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
4048
4049 static rtx
4050 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4051 {
4052   rtx result, temp, shift;
4053   rtx_code_label *label;
4054   int logd;
4055   int prec = GET_MODE_PRECISION (mode);
4056
4057   logd = floor_log2 (d);
4058   result = gen_reg_rtx (mode);
4059
4060   /* Avoid conditional branches when they're expensive.  */
4061   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4062       && optimize_insn_for_speed_p ())
4063     {
4064       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4065                                       mode, 0, -1);
4066       if (signmask)
4067         {
4068           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4069           signmask = force_reg (mode, signmask);
4070           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4071
4072           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4073              which instruction sequence to use.  If logical right shifts
4074              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4075              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4076
4077           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4078           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4079               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4080                   > COSTS_N_INSNS (2)))
4081             {
4082               temp = expand_binop (mode, xor_optab, op0, signmask,
4083                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4084               temp = expand_binop (mode, sub_optab, temp, signmask,
4085                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4086               temp = expand_binop (mode, and_optab, temp,
4087                                    gen_int_mode (masklow, mode),
4088                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4089               temp = expand_binop (mode, xor_optab, temp, signmask,
4090                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4091               temp = expand_binop (mode, sub_optab, temp, signmask,
4092                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4093             }
4094           else
4095             {
4096               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4097                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4098               signmask = force_reg (mode, signmask);
4099
4100               temp = expand_binop (mode, add_optab, op0, signmask,
4101                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4102               temp = expand_binop (mode, and_optab, temp,
4103                                    gen_int_mode (masklow, mode),
4104                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4105               temp = expand_binop (mode, sub_optab, temp, signmask,
4106                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4107             }
4108           return temp;
4109         }
4110     }
4111
4112   /* Mask contains the mode's signbit and the significant bits of the
4113      modulus.  By including the signbit in the operation, many targets
4114      can avoid an explicit compare operation in the following comparison
4115      against zero.  */
4116   wide_int mask = wi::mask (logd, false, prec);
4117   mask = wi::set_bit (mask, prec - 1);
4118
4119   temp = expand_binop (mode, and_optab, op0,
4120                        immed_wide_int_const (mask, mode),
4121                        result, 1, OPTAB_LIB_WIDEN);
4122   if (temp != result)
4123     emit_move_insn (result, temp);
4124
4125   label = gen_label_rtx ();
4126   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4127
4128   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4129                        0, OPTAB_LIB_WIDEN);
4130
4131   mask = wi::mask (logd, true, prec);
4132   temp = expand_binop (mode, ior_optab, temp,
4133                        immed_wide_int_const (mask, mode),
4134                        result, 1, OPTAB_LIB_WIDEN);
4135   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4136                        0, OPTAB_LIB_WIDEN);
4137   if (temp != result)
4138     emit_move_insn (result, temp);
4139   emit_label (label);
4140   return result;
4141 }
4142
4143 /* Expand signed division of OP0 by a power of two D in mode MODE.
4144    This routine is only called for positive values of D.  */
4145
4146 static rtx
4147 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4148 {
4149   rtx temp;
4150   rtx_code_label *label;
4151   int logd;
4152
4153   logd = floor_log2 (d);
4154
4155   if (d == 2
4156       && BRANCH_COST (optimize_insn_for_speed_p (),
4157                       false) >= 1)
4158     {
4159       temp = gen_reg_rtx (mode);
4160       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4161       if (temp != NULL_RTX)
4162         {
4163           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4164                                0, OPTAB_LIB_WIDEN);
4165           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4166         }
4167     }
4168
4169   if (HAVE_conditional_move
4170       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4171     {
4172       rtx temp2;
4173
4174       start_sequence ();
4175       temp2 = copy_to_mode_reg (mode, op0);
4176       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4177                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4178       temp = force_reg (mode, temp);
4179
4180       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4181       temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4182                                      temp, temp2, mode, 0);
4183       if (temp2)
4184         {
4185           rtx_insn *seq = get_insns ();
4186           end_sequence ();
4187           emit_insn (seq);
4188           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4189         }
4190       end_sequence ();
4191     }
4192
4193   if (BRANCH_COST (optimize_insn_for_speed_p (),
4194                    false) >= 2)
4195     {
4196       int ushift = GET_MODE_BITSIZE (mode) - logd;
4197
4198       temp = gen_reg_rtx (mode);
4199       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4200       if (temp != NULL_RTX)
4201         {
4202           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4203               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4204               > COSTS_N_INSNS (1))
4205             temp = expand_binop (mode, and_optab, temp,
4206                                  gen_int_mode (d - 1, mode),
4207                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4208           else
4209             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4210                                  ushift, NULL_RTX, 1);
4211           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4212                                0, OPTAB_LIB_WIDEN);
4213           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4214         }
4215     }
4216
4217   label = gen_label_rtx ();
4218   temp = copy_to_mode_reg (mode, op0);
4219   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4220   expand_inc (temp, gen_int_mode (d - 1, mode));
4221   emit_label (label);
4222   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4223 }
4224 \f
4225 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4226    if that is convenient, and returning where the result is.
4227    You may request either the quotient or the remainder as the result;
4228    specify REM_FLAG nonzero to get the remainder.
4229
4230    CODE is the expression code for which kind of division this is;
4231    it controls how rounding is done.  MODE is the machine mode to use.
4232    UNSIGNEDP nonzero means do unsigned division.  */
4233
4234 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4235    and then correct it by or'ing in missing high bits
4236    if result of ANDI is nonzero.
4237    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4238    This could optimize to a bfexts instruction.
4239    But C doesn't use these operations, so their optimizations are
4240    left for later.  */
4241 /* ??? For modulo, we don't actually need the highpart of the first product,
4242    the low part will do nicely.  And for small divisors, the second multiply
4243    can also be a low-part only multiply or even be completely left out.
4244    E.g. to calculate the remainder of a division by 3 with a 32 bit
4245    multiply, multiply with 0x55555556 and extract the upper two bits;
4246    the result is exact for inputs up to 0x1fffffff.
4247    The input range can be reduced by using cross-sum rules.
4248    For odd divisors >= 3, the following table gives right shift counts
4249    so that if a number is shifted by an integer multiple of the given
4250    amount, the remainder stays the same:
4251    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4252    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4253    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4254    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4255    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4256
4257    Cross-sum rules for even numbers can be derived by leaving as many bits
4258    to the right alone as the divisor has zeros to the right.
4259    E.g. if x is an unsigned 32 bit number:
4260    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4261    */
4262
4263 rtx
4264 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4265                rtx op0, rtx op1, rtx target, int unsignedp,
4266                enum optab_methods methods)
4267 {
4268   machine_mode compute_mode;
4269   rtx tquotient;
4270   rtx quotient = 0, remainder = 0;
4271   rtx_insn *last;
4272   rtx_insn *insn;
4273   optab optab1, optab2;
4274   int op1_is_constant, op1_is_pow2 = 0;
4275   int max_cost, extra_cost;
4276   static HOST_WIDE_INT last_div_const = 0;
4277   bool speed = optimize_insn_for_speed_p ();
4278
4279   op1_is_constant = CONST_INT_P (op1);
4280   if (op1_is_constant)
4281     {
4282       wide_int ext_op1 = rtx_mode_t (op1, mode);
4283       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4284                      || (! unsignedp
4285                          && wi::popcount (wi::neg (ext_op1)) == 1));
4286     }
4287
4288   /*
4289      This is the structure of expand_divmod:
4290
4291      First comes code to fix up the operands so we can perform the operations
4292      correctly and efficiently.
4293
4294      Second comes a switch statement with code specific for each rounding mode.
4295      For some special operands this code emits all RTL for the desired
4296      operation, for other cases, it generates only a quotient and stores it in
4297      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4298      to indicate that it has not done anything.
4299
4300      Last comes code that finishes the operation.  If QUOTIENT is set and
4301      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4302      QUOTIENT is not set, it is computed using trunc rounding.
4303
4304      We try to generate special code for division and remainder when OP1 is a
4305      constant.  If |OP1| = 2**n we can use shifts and some other fast
4306      operations.  For other values of OP1, we compute a carefully selected
4307      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4308      by m.
4309
4310      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4311      half of the product.  Different strategies for generating the product are
4312      implemented in expmed_mult_highpart.
4313
4314      If what we actually want is the remainder, we generate that by another
4315      by-constant multiplication and a subtraction.  */
4316
4317   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4318      code below will malfunction if we are, so check here and handle
4319      the special case if so.  */
4320   if (op1 == const1_rtx)
4321     return rem_flag ? const0_rtx : op0;
4322
4323     /* When dividing by -1, we could get an overflow.
4324      negv_optab can handle overflows.  */
4325   if (! unsignedp && op1 == constm1_rtx)
4326     {
4327       if (rem_flag)
4328         return const0_rtx;
4329       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4330                           ? negv_optab : neg_optab, op0, target, 0);
4331     }
4332
4333   if (target
4334       /* Don't use the function value register as a target
4335          since we have to read it as well as write it,
4336          and function-inlining gets confused by this.  */
4337       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4338           /* Don't clobber an operand while doing a multi-step calculation.  */
4339           || ((rem_flag || op1_is_constant)
4340               && (reg_mentioned_p (target, op0)
4341                   || (MEM_P (op0) && MEM_P (target))))
4342           || reg_mentioned_p (target, op1)
4343           || (MEM_P (op1) && MEM_P (target))))
4344     target = 0;
4345
4346   /* Get the mode in which to perform this computation.  Normally it will
4347      be MODE, but sometimes we can't do the desired operation in MODE.
4348      If so, pick a wider mode in which we can do the operation.  Convert
4349      to that mode at the start to avoid repeated conversions.
4350
4351      First see what operations we need.  These depend on the expression
4352      we are evaluating.  (We assume that divxx3 insns exist under the
4353      same conditions that modxx3 insns and that these insns don't normally
4354      fail.  If these assumptions are not correct, we may generate less
4355      efficient code in some cases.)
4356
4357      Then see if we find a mode in which we can open-code that operation
4358      (either a division, modulus, or shift).  Finally, check for the smallest
4359      mode for which we can do the operation with a library call.  */
4360
4361   /* We might want to refine this now that we have division-by-constant
4362      optimization.  Since expmed_mult_highpart tries so many variants, it is
4363      not straightforward to generalize this.  Maybe we should make an array
4364      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4365
4366   optab1 = (op1_is_pow2
4367             ? (unsignedp ? lshr_optab : ashr_optab)
4368             : (unsignedp ? udiv_optab : sdiv_optab));
4369   optab2 = (op1_is_pow2 ? optab1
4370             : (unsignedp ? udivmod_optab : sdivmod_optab));
4371
4372   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4373     {
4374       FOR_EACH_MODE_FROM (compute_mode, mode)
4375       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4376           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4377         break;
4378
4379       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4380         FOR_EACH_MODE_FROM (compute_mode, mode)
4381           if (optab_libfunc (optab1, compute_mode)
4382               || optab_libfunc (optab2, compute_mode))
4383             break;
4384     }
4385   else
4386     compute_mode = mode;
4387
4388   /* If we still couldn't find a mode, use MODE, but expand_binop will
4389      probably die.  */
4390   if (compute_mode == VOIDmode)
4391     compute_mode = mode;
4392
4393   if (target && GET_MODE (target) == compute_mode)
4394     tquotient = target;
4395   else
4396     tquotient = gen_reg_rtx (compute_mode);
4397
4398 #if 0
4399   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4400      (mode), and thereby get better code when OP1 is a constant.  Do that
4401      later.  It will require going over all usages of SIZE below.  */
4402   size = GET_MODE_BITSIZE (mode);
4403 #endif
4404
4405   /* Only deduct something for a REM if the last divide done was
4406      for a different constant.   Then set the constant of the last
4407      divide.  */
4408   max_cost = (unsignedp
4409               ? udiv_cost (speed, compute_mode)
4410               : sdiv_cost (speed, compute_mode));
4411   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4412                      && INTVAL (op1) == last_div_const))
4413     max_cost -= (mul_cost (speed, compute_mode)
4414                  + add_cost (speed, compute_mode));
4415
4416   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4417
4418   /* Now convert to the best mode to use.  */
4419   if (compute_mode != mode)
4420     {
4421       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4422       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4423
4424       /* convert_modes may have placed op1 into a register, so we
4425          must recompute the following.  */
4426       op1_is_constant = CONST_INT_P (op1);
4427       if (op1_is_constant)
4428         {
4429           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4430           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4431                          || (! unsignedp
4432                              && wi::popcount (wi::neg (ext_op1)) == 1));
4433         }
4434       else
4435         op1_is_pow2 = 0;
4436     }
4437
4438   /* If one of the operands is a volatile MEM, copy it into a register.  */
4439
4440   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4441     op0 = force_reg (compute_mode, op0);
4442   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4443     op1 = force_reg (compute_mode, op1);
4444
4445   /* If we need the remainder or if OP1 is constant, we need to
4446      put OP0 in a register in case it has any queued subexpressions.  */
4447   if (rem_flag || op1_is_constant)
4448     op0 = force_reg (compute_mode, op0);
4449
4450   last = get_last_insn ();
4451
4452   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4453   if (unsignedp)
4454     {
4455       if (code == FLOOR_DIV_EXPR)
4456         code = TRUNC_DIV_EXPR;
4457       if (code == FLOOR_MOD_EXPR)
4458         code = TRUNC_MOD_EXPR;
4459       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4460         code = TRUNC_DIV_EXPR;
4461     }
4462
4463   if (op1 != const0_rtx)
4464     switch (code)
4465       {
4466       case TRUNC_MOD_EXPR:
4467       case TRUNC_DIV_EXPR:
4468         if (op1_is_constant)
4469           {
4470             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4471             int size = GET_MODE_BITSIZE (int_mode);
4472             if (unsignedp)
4473               {
4474                 unsigned HOST_WIDE_INT mh, ml;
4475                 int pre_shift, post_shift;
4476                 wide_int wd = rtx_mode_t (op1, int_mode);
4477                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4478
4479                 if (wi::popcount (wd) == 1)
4480                   {
4481                     pre_shift = floor_log2 (d);
4482                     if (rem_flag)
4483                       {
4484                         unsigned HOST_WIDE_INT mask
4485                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4486                         remainder
4487                           = expand_binop (int_mode, and_optab, op0,
4488                                           gen_int_mode (mask, int_mode),
4489                                           remainder, 1, methods);
4490                         if (remainder)
4491                           return gen_lowpart (mode, remainder);
4492                       }
4493                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4494                                              pre_shift, tquotient, 1);
4495                   }
4496                 else if (size <= HOST_BITS_PER_WIDE_INT)
4497                   {
4498                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4499                       {
4500                         /* Most significant bit of divisor is set; emit an scc
4501                            insn.  */
4502                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4503                                                           int_mode, 1, 1);
4504                       }
4505                     else
4506                       {
4507                         /* Find a suitable multiplier and right shift count
4508                            instead of directly dividing by D.  */
4509                         mh = choose_multiplier (d, size, size,
4510                                                 &ml, &post_shift);
4511
4512                         /* If the suggested multiplier is more than SIZE bits,
4513                            we can do better for even divisors, using an
4514                            initial right shift.  */
4515                         if (mh != 0 && (d & 1) == 0)
4516                           {
4517                             pre_shift = ctz_or_zero (d);
4518                             mh = choose_multiplier (d >> pre_shift, size,
4519                                                     size - pre_shift,
4520                                                     &ml, &post_shift);
4521                             gcc_assert (!mh);
4522                           }
4523                         else
4524                           pre_shift = 0;
4525
4526                         if (mh != 0)
4527                           {
4528                             rtx t1, t2, t3, t4;
4529
4530                             if (post_shift - 1 >= BITS_PER_WORD)
4531                               goto fail1;
4532
4533                             extra_cost
4534                               = (shift_cost (speed, int_mode, post_shift - 1)
4535                                  + shift_cost (speed, int_mode, 1)
4536                                  + 2 * add_cost (speed, int_mode));
4537                             t1 = expmed_mult_highpart
4538                               (int_mode, op0, gen_int_mode (ml, int_mode),
4539                                NULL_RTX, 1, max_cost - extra_cost);
4540                             if (t1 == 0)
4541                               goto fail1;
4542                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4543                                                                op0, t1),
4544                                                 NULL_RTX);
4545                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4546                                                t2, 1, NULL_RTX, 1);
4547                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4548                                                               t1, t3),
4549                                                 NULL_RTX);
4550                             quotient = expand_shift
4551                               (RSHIFT_EXPR, int_mode, t4,
4552                                post_shift - 1, tquotient, 1);
4553                           }
4554                         else
4555                           {
4556                             rtx t1, t2;
4557
4558                             if (pre_shift >= BITS_PER_WORD
4559                                 || post_shift >= BITS_PER_WORD)
4560                               goto fail1;
4561
4562                             t1 = expand_shift
4563                               (RSHIFT_EXPR, int_mode, op0,
4564                                pre_shift, NULL_RTX, 1);
4565                             extra_cost
4566                               = (shift_cost (speed, int_mode, pre_shift)
4567                                  + shift_cost (speed, int_mode, post_shift));
4568                             t2 = expmed_mult_highpart
4569                               (int_mode, t1,
4570                                gen_int_mode (ml, int_mode),
4571                                NULL_RTX, 1, max_cost - extra_cost);
4572                             if (t2 == 0)
4573                               goto fail1;
4574                             quotient = expand_shift
4575                               (RSHIFT_EXPR, int_mode, t2,
4576                                post_shift, tquotient, 1);
4577                           }
4578                       }
4579                   }
4580                 else            /* Too wide mode to use tricky code */
4581                   break;
4582
4583                 insn = get_last_insn ();
4584                 if (insn != last)
4585                   set_dst_reg_note (insn, REG_EQUAL,
4586                                     gen_rtx_UDIV (int_mode, op0, op1),
4587                                     quotient);
4588               }
4589             else                /* TRUNC_DIV, signed */
4590               {
4591                 unsigned HOST_WIDE_INT ml;
4592                 int post_shift;
4593                 rtx mlr;
4594                 HOST_WIDE_INT d = INTVAL (op1);
4595                 unsigned HOST_WIDE_INT abs_d;
4596
4597                 /* Not prepared to handle division/remainder by
4598                    0xffffffffffffffff8000000000000000 etc.  */
4599                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4600                   break;
4601
4602                 /* Since d might be INT_MIN, we have to cast to
4603                    unsigned HOST_WIDE_INT before negating to avoid
4604                    undefined signed overflow.  */
4605                 abs_d = (d >= 0
4606                          ? (unsigned HOST_WIDE_INT) d
4607                          : - (unsigned HOST_WIDE_INT) d);
4608
4609                 /* n rem d = n rem -d */
4610                 if (rem_flag && d < 0)
4611                   {
4612                     d = abs_d;
4613                     op1 = gen_int_mode (abs_d, int_mode);
4614                   }
4615
4616                 if (d == 1)
4617                   quotient = op0;
4618                 else if (d == -1)
4619                   quotient = expand_unop (int_mode, neg_optab, op0,
4620                                           tquotient, 0);
4621                 else if (size <= HOST_BITS_PER_WIDE_INT
4622                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4623                   {
4624                     /* This case is not handled correctly below.  */
4625                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4626                                                 int_mode, 1, 1);
4627                     if (quotient == 0)
4628                       goto fail1;
4629                   }
4630                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4631                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4632                          && (rem_flag
4633                              ? smod_pow2_cheap (speed, int_mode)
4634                              : sdiv_pow2_cheap (speed, int_mode))
4635                          /* We assume that cheap metric is true if the
4636                             optab has an expander for this mode.  */
4637                          && ((optab_handler ((rem_flag ? smod_optab
4638                                               : sdiv_optab),
4639                                              int_mode)
4640                               != CODE_FOR_nothing)
4641                              || (optab_handler (sdivmod_optab, int_mode)
4642                                  != CODE_FOR_nothing)))
4643                   ;
4644                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4645                   {
4646                     if (rem_flag)
4647                       {
4648                         remainder = expand_smod_pow2 (int_mode, op0, d);
4649                         if (remainder)
4650                           return gen_lowpart (mode, remainder);
4651                       }
4652
4653                     if (sdiv_pow2_cheap (speed, int_mode)
4654                         && ((optab_handler (sdiv_optab, int_mode)
4655                              != CODE_FOR_nothing)
4656                             || (optab_handler (sdivmod_optab, int_mode)
4657                                 != CODE_FOR_nothing)))
4658                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4659                                                 int_mode, op0,
4660                                                 gen_int_mode (abs_d,
4661                                                               int_mode),
4662                                                 NULL_RTX, 0);
4663                     else
4664                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4665
4666                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4667                        negate the quotient.  */
4668                     if (d < 0)
4669                       {
4670                         insn = get_last_insn ();
4671                         if (insn != last
4672                             && abs_d < (HOST_WIDE_INT_1U
4673                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4674                           set_dst_reg_note (insn, REG_EQUAL,
4675                                             gen_rtx_DIV (int_mode, op0,
4676                                                          gen_int_mode
4677                                                            (abs_d,
4678                                                             int_mode)),
4679                                             quotient);
4680
4681                         quotient = expand_unop (int_mode, neg_optab,
4682                                                 quotient, quotient, 0);
4683                       }
4684                   }
4685                 else if (size <= HOST_BITS_PER_WIDE_INT)
4686                   {
4687                     choose_multiplier (abs_d, size, size - 1,
4688                                        &ml, &post_shift);
4689                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4690                       {
4691                         rtx t1, t2, t3;
4692
4693                         if (post_shift >= BITS_PER_WORD
4694                             || size - 1 >= BITS_PER_WORD)
4695                           goto fail1;
4696
4697                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4698                                       + shift_cost (speed, int_mode, size - 1)
4699                                       + add_cost (speed, int_mode));
4700                         t1 = expmed_mult_highpart
4701                           (int_mode, op0, gen_int_mode (ml, int_mode),
4702                            NULL_RTX, 0, max_cost - extra_cost);
4703                         if (t1 == 0)
4704                           goto fail1;
4705                         t2 = expand_shift
4706                           (RSHIFT_EXPR, int_mode, t1,
4707                            post_shift, NULL_RTX, 0);
4708                         t3 = expand_shift
4709                           (RSHIFT_EXPR, int_mode, op0,
4710                            size - 1, NULL_RTX, 0);
4711                         if (d < 0)
4712                           quotient
4713                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4714                                              tquotient);
4715                         else
4716                           quotient
4717                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4718                                              tquotient);
4719                       }
4720                     else
4721                       {
4722                         rtx t1, t2, t3, t4;
4723
4724                         if (post_shift >= BITS_PER_WORD
4725                             || size - 1 >= BITS_PER_WORD)
4726                           goto fail1;
4727
4728                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4729                         mlr = gen_int_mode (ml, int_mode);
4730                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4731                                       + shift_cost (speed, int_mode, size - 1)
4732                                       + 2 * add_cost (speed, int_mode));
4733                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4734                                                    NULL_RTX, 0,
4735                                                    max_cost - extra_cost);
4736                         if (t1 == 0)
4737                           goto fail1;
4738                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4739                                             NULL_RTX);
4740                         t3 = expand_shift
4741                           (RSHIFT_EXPR, int_mode, t2,
4742                            post_shift, NULL_RTX, 0);
4743                         t4 = expand_shift
4744                           (RSHIFT_EXPR, int_mode, op0,
4745                            size - 1, NULL_RTX, 0);
4746                         if (d < 0)
4747                           quotient
4748                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4749                                              tquotient);
4750                         else
4751                           quotient
4752                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4753                                              tquotient);
4754                       }
4755                   }
4756                 else            /* Too wide mode to use tricky code */
4757                   break;
4758
4759                 insn = get_last_insn ();
4760                 if (insn != last)
4761                   set_dst_reg_note (insn, REG_EQUAL,
4762                                     gen_rtx_DIV (int_mode, op0, op1),
4763                                     quotient);
4764               }
4765             break;
4766           }
4767       fail1:
4768         delete_insns_since (last);
4769         break;
4770
4771       case FLOOR_DIV_EXPR:
4772       case FLOOR_MOD_EXPR:
4773       /* We will come here only for signed operations.  */
4774         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4775           {
4776             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4777             int size = GET_MODE_BITSIZE (int_mode);
4778             unsigned HOST_WIDE_INT mh, ml;
4779             int pre_shift, post_shift;
4780             HOST_WIDE_INT d = INTVAL (op1);
4781
4782             if (d > 0)
4783               {
4784                 /* We could just as easily deal with negative constants here,
4785                    but it does not seem worth the trouble for GCC 2.6.  */
4786                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4787                   {
4788                     pre_shift = floor_log2 (d);
4789                     if (rem_flag)
4790                       {
4791                         unsigned HOST_WIDE_INT mask
4792                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4793                         remainder = expand_binop
4794                           (int_mode, and_optab, op0,
4795                            gen_int_mode (mask, int_mode),
4796                            remainder, 0, methods);
4797                         if (remainder)
4798                           return gen_lowpart (mode, remainder);
4799                       }
4800                     quotient = expand_shift
4801                       (RSHIFT_EXPR, int_mode, op0,
4802                        pre_shift, tquotient, 0);
4803                   }
4804                 else
4805                   {
4806                     rtx t1, t2, t3, t4;
4807
4808                     mh = choose_multiplier (d, size, size - 1,
4809                                             &ml, &post_shift);
4810                     gcc_assert (!mh);
4811
4812                     if (post_shift < BITS_PER_WORD
4813                         && size - 1 < BITS_PER_WORD)
4814                       {
4815                         t1 = expand_shift
4816                           (RSHIFT_EXPR, int_mode, op0,
4817                            size - 1, NULL_RTX, 0);
4818                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4819                                            NULL_RTX, 0, OPTAB_WIDEN);
4820                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4821                                       + shift_cost (speed, int_mode, size - 1)
4822                                       + 2 * add_cost (speed, int_mode));
4823                         t3 = expmed_mult_highpart
4824                           (int_mode, t2, gen_int_mode (ml, int_mode),
4825                            NULL_RTX, 1, max_cost - extra_cost);
4826                         if (t3 != 0)
4827                           {
4828                             t4 = expand_shift
4829                               (RSHIFT_EXPR, int_mode, t3,
4830                                post_shift, NULL_RTX, 1);
4831                             quotient = expand_binop (int_mode, xor_optab,
4832                                                      t4, t1, tquotient, 0,
4833                                                      OPTAB_WIDEN);
4834                           }
4835                       }
4836                   }
4837               }
4838             else
4839               {
4840                 rtx nsign, t1, t2, t3, t4;
4841                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4842                                                   op0, constm1_rtx), NULL_RTX);
4843                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4844                                    0, OPTAB_WIDEN);
4845                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4846                                       size - 1, NULL_RTX, 0);
4847                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4848                                     NULL_RTX);
4849                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4850                                     NULL_RTX, 0);
4851                 if (t4)
4852                   {
4853                     rtx t5;
4854                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4855                                       NULL_RTX, 0);
4856                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4857                                               tquotient);
4858                   }
4859               }
4860           }
4861
4862         if (quotient != 0)
4863           break;
4864         delete_insns_since (last);
4865
4866         /* Try using an instruction that produces both the quotient and
4867            remainder, using truncation.  We can easily compensate the quotient
4868            or remainder to get floor rounding, once we have the remainder.
4869            Notice that we compute also the final remainder value here,
4870            and return the result right away.  */
4871         if (target == 0 || GET_MODE (target) != compute_mode)
4872           target = gen_reg_rtx (compute_mode);
4873
4874         if (rem_flag)
4875           {
4876             remainder
4877               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4878             quotient = gen_reg_rtx (compute_mode);
4879           }
4880         else
4881           {
4882             quotient
4883               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4884             remainder = gen_reg_rtx (compute_mode);
4885           }
4886
4887         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4888                                  quotient, remainder, 0))
4889           {
4890             /* This could be computed with a branch-less sequence.
4891                Save that for later.  */
4892             rtx tem;
4893             rtx_code_label *label = gen_label_rtx ();
4894             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4895             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4896                                 NULL_RTX, 0, OPTAB_WIDEN);
4897             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4898             expand_dec (quotient, const1_rtx);
4899             expand_inc (remainder, op1);
4900             emit_label (label);
4901             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4902           }
4903
4904         /* No luck with division elimination or divmod.  Have to do it
4905            by conditionally adjusting op0 *and* the result.  */
4906         {
4907           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4908           rtx adjusted_op0;
4909           rtx tem;
4910
4911           quotient = gen_reg_rtx (compute_mode);
4912           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4913           label1 = gen_label_rtx ();
4914           label2 = gen_label_rtx ();
4915           label3 = gen_label_rtx ();
4916           label4 = gen_label_rtx ();
4917           label5 = gen_label_rtx ();
4918           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4919           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4920           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4921                               quotient, 0, methods);
4922           if (tem != quotient)
4923             emit_move_insn (quotient, tem);
4924           emit_jump_insn (targetm.gen_jump (label5));
4925           emit_barrier ();
4926           emit_label (label1);
4927           expand_inc (adjusted_op0, const1_rtx);
4928           emit_jump_insn (targetm.gen_jump (label4));
4929           emit_barrier ();
4930           emit_label (label2);
4931           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4932           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4933                               quotient, 0, methods);
4934           if (tem != quotient)
4935             emit_move_insn (quotient, tem);
4936           emit_jump_insn (targetm.gen_jump (label5));
4937           emit_barrier ();
4938           emit_label (label3);
4939           expand_dec (adjusted_op0, const1_rtx);
4940           emit_label (label4);
4941           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4942                               quotient, 0, methods);
4943           if (tem != quotient)
4944             emit_move_insn (quotient, tem);
4945           expand_dec (quotient, const1_rtx);
4946           emit_label (label5);
4947         }
4948         break;
4949
4950       case CEIL_DIV_EXPR:
4951       case CEIL_MOD_EXPR:
4952         if (unsignedp)
4953           {
4954             if (op1_is_constant
4955                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4956                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4957                     || INTVAL (op1) >= 0))
4958               {
4959                 scalar_int_mode int_mode
4960                   = as_a <scalar_int_mode> (compute_mode);
4961                 rtx t1, t2, t3;
4962                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4963                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4964                                    floor_log2 (d), tquotient, 1);
4965                 t2 = expand_binop (int_mode, and_optab, op0,
4966                                    gen_int_mode (d - 1, int_mode),
4967                                    NULL_RTX, 1, methods);
4968                 t3 = gen_reg_rtx (int_mode);
4969                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4970                 if (t3 == 0)
4971                   {
4972                     rtx_code_label *lab;
4973                     lab = gen_label_rtx ();
4974                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4975                     expand_inc (t1, const1_rtx);
4976                     emit_label (lab);
4977                     quotient = t1;
4978                   }
4979                 else
4980                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4981                                             tquotient);
4982                 break;
4983               }
4984
4985             /* Try using an instruction that produces both the quotient and
4986                remainder, using truncation.  We can easily compensate the
4987                quotient or remainder to get ceiling rounding, once we have the
4988                remainder.  Notice that we compute also the final remainder
4989                value here, and return the result right away.  */
4990             if (target == 0 || GET_MODE (target) != compute_mode)
4991               target = gen_reg_rtx (compute_mode);
4992
4993             if (rem_flag)
4994               {
4995                 remainder = (REG_P (target)
4996                              ? target : gen_reg_rtx (compute_mode));
4997                 quotient = gen_reg_rtx (compute_mode);
4998               }
4999             else
5000               {
5001                 quotient = (REG_P (target)
5002                             ? target : gen_reg_rtx (compute_mode));
5003                 remainder = gen_reg_rtx (compute_mode);
5004               }
5005
5006             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
5007                                      remainder, 1))
5008               {
5009                 /* This could be computed with a branch-less sequence.
5010                    Save that for later.  */
5011                 rtx_code_label *label = gen_label_rtx ();
5012                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5013                                  compute_mode, label);
5014                 expand_inc (quotient, const1_rtx);
5015                 expand_dec (remainder, op1);
5016                 emit_label (label);
5017                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5018               }
5019
5020             /* No luck with division elimination or divmod.  Have to do it
5021                by conditionally adjusting op0 *and* the result.  */
5022             {
5023               rtx_code_label *label1, *label2;
5024               rtx adjusted_op0, tem;
5025
5026               quotient = gen_reg_rtx (compute_mode);
5027               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5028               label1 = gen_label_rtx ();
5029               label2 = gen_label_rtx ();
5030               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
5031                                compute_mode, label1);
5032               emit_move_insn  (quotient, const0_rtx);
5033               emit_jump_insn (targetm.gen_jump (label2));
5034               emit_barrier ();
5035               emit_label (label1);
5036               expand_dec (adjusted_op0, const1_rtx);
5037               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
5038                                   quotient, 1, methods);
5039               if (tem != quotient)
5040                 emit_move_insn (quotient, tem);
5041               expand_inc (quotient, const1_rtx);
5042               emit_label (label2);
5043             }
5044           }
5045         else /* signed */
5046           {
5047             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5048                 && INTVAL (op1) >= 0)
5049               {
5050                 /* This is extremely similar to the code for the unsigned case
5051                    above.  For 2.7 we should merge these variants, but for
5052                    2.6.1 I don't want to touch the code for unsigned since that
5053                    get used in C.  The signed case will only be used by other
5054                    languages (Ada).  */
5055
5056                 rtx t1, t2, t3;
5057                 unsigned HOST_WIDE_INT d = INTVAL (op1);
5058                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5059                                    floor_log2 (d), tquotient, 0);
5060                 t2 = expand_binop (compute_mode, and_optab, op0,
5061                                    gen_int_mode (d - 1, compute_mode),
5062                                    NULL_RTX, 1, methods);
5063                 t3 = gen_reg_rtx (compute_mode);
5064                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5065                                       compute_mode, 1, 1);
5066                 if (t3 == 0)
5067                   {
5068                     rtx_code_label *lab;
5069                     lab = gen_label_rtx ();
5070                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5071                     expand_inc (t1, const1_rtx);
5072                     emit_label (lab);
5073                     quotient = t1;
5074                   }
5075                 else
5076                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5077                                                           t1, t3),
5078                                             tquotient);
5079                 break;
5080               }
5081
5082             /* Try using an instruction that produces both the quotient and
5083                remainder, using truncation.  We can easily compensate the
5084                quotient or remainder to get ceiling rounding, once we have the
5085                remainder.  Notice that we compute also the final remainder
5086                value here, and return the result right away.  */
5087             if (target == 0 || GET_MODE (target) != compute_mode)
5088               target = gen_reg_rtx (compute_mode);
5089             if (rem_flag)
5090               {
5091                 remainder= (REG_P (target)
5092                             ? target : gen_reg_rtx (compute_mode));
5093                 quotient = gen_reg_rtx (compute_mode);
5094               }
5095             else
5096               {
5097                 quotient = (REG_P (target)
5098                             ? target : gen_reg_rtx (compute_mode));
5099                 remainder = gen_reg_rtx (compute_mode);
5100               }
5101
5102             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5103                                      remainder, 0))
5104               {
5105                 /* This could be computed with a branch-less sequence.
5106                    Save that for later.  */
5107                 rtx tem;
5108                 rtx_code_label *label = gen_label_rtx ();
5109                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5110                                  compute_mode, label);
5111                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5112                                     NULL_RTX, 0, OPTAB_WIDEN);
5113                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5114                 expand_inc (quotient, const1_rtx);
5115                 expand_dec (remainder, op1);
5116                 emit_label (label);
5117                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5118               }
5119
5120             /* No luck with division elimination or divmod.  Have to do it
5121                by conditionally adjusting op0 *and* the result.  */
5122             {
5123               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5124               rtx adjusted_op0;
5125               rtx tem;
5126
5127               quotient = gen_reg_rtx (compute_mode);
5128               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5129               label1 = gen_label_rtx ();
5130               label2 = gen_label_rtx ();
5131               label3 = gen_label_rtx ();
5132               label4 = gen_label_rtx ();
5133               label5 = gen_label_rtx ();
5134               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5135               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5136                                compute_mode, label1);
5137               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5138                                   quotient, 0, methods);
5139               if (tem != quotient)
5140                 emit_move_insn (quotient, tem);
5141               emit_jump_insn (targetm.gen_jump (label5));
5142               emit_barrier ();
5143               emit_label (label1);
5144               expand_dec (adjusted_op0, const1_rtx);
5145               emit_jump_insn (targetm.gen_jump (label4));
5146               emit_barrier ();
5147               emit_label (label2);
5148               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5149                                compute_mode, label3);
5150               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5151                                   quotient, 0, methods);
5152               if (tem != quotient)
5153                 emit_move_insn (quotient, tem);
5154               emit_jump_insn (targetm.gen_jump (label5));
5155               emit_barrier ();
5156               emit_label (label3);
5157               expand_inc (adjusted_op0, const1_rtx);
5158               emit_label (label4);
5159               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5160                                   quotient, 0, methods);
5161               if (tem != quotient)
5162                 emit_move_insn (quotient, tem);
5163               expand_inc (quotient, const1_rtx);
5164               emit_label (label5);
5165             }
5166           }
5167         break;
5168
5169       case EXACT_DIV_EXPR:
5170         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5171           {
5172             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5173             int size = GET_MODE_BITSIZE (int_mode);
5174             HOST_WIDE_INT d = INTVAL (op1);
5175             unsigned HOST_WIDE_INT ml;
5176             int pre_shift;
5177             rtx t1;
5178
5179             pre_shift = ctz_or_zero (d);
5180             ml = invert_mod2n (d >> pre_shift, size);
5181             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5182                                pre_shift, NULL_RTX, unsignedp);
5183             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5184                                     NULL_RTX, 1);
5185
5186             insn = get_last_insn ();
5187             set_dst_reg_note (insn, REG_EQUAL,
5188                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5189                                               int_mode, op0, op1),
5190                               quotient);
5191           }
5192         break;
5193
5194       case ROUND_DIV_EXPR:
5195       case ROUND_MOD_EXPR:
5196         if (unsignedp)
5197           {
5198             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5199             rtx tem;
5200             rtx_code_label *label;
5201             label = gen_label_rtx ();
5202             quotient = gen_reg_rtx (int_mode);
5203             remainder = gen_reg_rtx (int_mode);
5204             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5205               {
5206                 rtx tem;
5207                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5208                                          quotient, 1, methods);
5209                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5210                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5211                                           remainder, 1, methods);
5212               }
5213             tem = plus_constant (int_mode, op1, -1);
5214             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5215             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5216             expand_inc (quotient, const1_rtx);
5217             expand_dec (remainder, op1);
5218             emit_label (label);
5219           }
5220         else
5221           {
5222             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5223             int size = GET_MODE_BITSIZE (int_mode);
5224             rtx abs_rem, abs_op1, tem, mask;
5225             rtx_code_label *label;
5226             label = gen_label_rtx ();
5227             quotient = gen_reg_rtx (int_mode);
5228             remainder = gen_reg_rtx (int_mode);
5229             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5230               {
5231                 rtx tem;
5232                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5233                                          quotient, 0, methods);
5234                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5235                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5236                                           remainder, 0, methods);
5237               }
5238             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5239             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5240             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5241                                 1, NULL_RTX, 1);
5242             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5243             tem = expand_binop (int_mode, xor_optab, op0, op1,
5244                                 NULL_RTX, 0, OPTAB_WIDEN);
5245             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5246                                  size - 1, NULL_RTX, 0);
5247             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5248                                 NULL_RTX, 0, OPTAB_WIDEN);
5249             tem = expand_binop (int_mode, sub_optab, tem, mask,
5250                                 NULL_RTX, 0, OPTAB_WIDEN);
5251             expand_inc (quotient, tem);
5252             tem = expand_binop (int_mode, xor_optab, mask, op1,
5253                                 NULL_RTX, 0, OPTAB_WIDEN);
5254             tem = expand_binop (int_mode, sub_optab, tem, mask,
5255                                 NULL_RTX, 0, OPTAB_WIDEN);
5256             expand_dec (remainder, tem);
5257             emit_label (label);
5258           }
5259         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5260
5261       default:
5262         gcc_unreachable ();
5263       }
5264
5265   if (quotient == 0)
5266     {
5267       if (target && GET_MODE (target) != compute_mode)
5268         target = 0;
5269
5270       if (rem_flag)
5271         {
5272           /* Try to produce the remainder without producing the quotient.
5273              If we seem to have a divmod pattern that does not require widening,
5274              don't try widening here.  We should really have a WIDEN argument
5275              to expand_twoval_binop, since what we'd really like to do here is
5276              1) try a mod insn in compute_mode
5277              2) try a divmod insn in compute_mode
5278              3) try a div insn in compute_mode and multiply-subtract to get
5279                 remainder
5280              4) try the same things with widening allowed.  */
5281           remainder
5282             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5283                                  op0, op1, target,
5284                                  unsignedp,
5285                                  ((optab_handler (optab2, compute_mode)
5286                                    != CODE_FOR_nothing)
5287                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5288           if (remainder == 0)
5289             {
5290               /* No luck there.  Can we do remainder and divide at once
5291                  without a library call?  */
5292               remainder = gen_reg_rtx (compute_mode);
5293               if (! expand_twoval_binop ((unsignedp
5294                                           ? udivmod_optab
5295                                           : sdivmod_optab),
5296                                          op0, op1,
5297                                          NULL_RTX, remainder, unsignedp))
5298                 remainder = 0;
5299             }
5300
5301           if (remainder)
5302             return gen_lowpart (mode, remainder);
5303         }
5304
5305       /* Produce the quotient.  Try a quotient insn, but not a library call.
5306          If we have a divmod in this mode, use it in preference to widening
5307          the div (for this test we assume it will not fail). Note that optab2
5308          is set to the one of the two optabs that the call below will use.  */
5309       quotient
5310         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5311                              op0, op1, rem_flag ? NULL_RTX : target,
5312                              unsignedp,
5313                              ((optab_handler (optab2, compute_mode)
5314                                != CODE_FOR_nothing)
5315                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5316
5317       if (quotient == 0)
5318         {
5319           /* No luck there.  Try a quotient-and-remainder insn,
5320              keeping the quotient alone.  */
5321           quotient = gen_reg_rtx (compute_mode);
5322           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5323                                      op0, op1,
5324                                      quotient, NULL_RTX, unsignedp))
5325             {
5326               quotient = 0;
5327               if (! rem_flag)
5328                 /* Still no luck.  If we are not computing the remainder,
5329                    use a library call for the quotient.  */
5330                 quotient = sign_expand_binop (compute_mode,
5331                                               udiv_optab, sdiv_optab,
5332                                               op0, op1, target,
5333                                               unsignedp, methods);
5334             }
5335         }
5336     }
5337
5338   if (rem_flag)
5339     {
5340       if (target && GET_MODE (target) != compute_mode)
5341         target = 0;
5342
5343       if (quotient == 0)
5344         {
5345           /* No divide instruction either.  Use library for remainder.  */
5346           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5347                                          op0, op1, target,
5348                                          unsignedp, methods);
5349           /* No remainder function.  Try a quotient-and-remainder
5350              function, keeping the remainder.  */
5351           if (!remainder
5352               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5353             {
5354               remainder = gen_reg_rtx (compute_mode);
5355               if (!expand_twoval_binop_libfunc
5356                   (unsignedp ? udivmod_optab : sdivmod_optab,
5357                    op0, op1,
5358                    NULL_RTX, remainder,
5359                    unsignedp ? UMOD : MOD))
5360                 remainder = NULL_RTX;
5361             }
5362         }
5363       else
5364         {
5365           /* We divided.  Now finish doing X - Y * (X / Y).  */
5366           remainder = expand_mult (compute_mode, quotient, op1,
5367                                    NULL_RTX, unsignedp);
5368           remainder = expand_binop (compute_mode, sub_optab, op0,
5369                                     remainder, target, unsignedp,
5370                                     methods);
5371         }
5372     }
5373
5374   if (methods != OPTAB_LIB_WIDEN
5375       && (rem_flag ? remainder : quotient) == NULL_RTX)
5376     return NULL_RTX;
5377
5378   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5379 }
5380 \f
5381 /* Return a tree node with data type TYPE, describing the value of X.
5382    Usually this is an VAR_DECL, if there is no obvious better choice.
5383    X may be an expression, however we only support those expressions
5384    generated by loop.c.  */
5385
5386 tree
5387 make_tree (tree type, rtx x)
5388 {
5389   tree t;
5390
5391   switch (GET_CODE (x))
5392     {
5393     case CONST_INT:
5394     case CONST_WIDE_INT:
5395       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5396       return t;
5397
5398     case CONST_DOUBLE:
5399       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5400       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5401         t = wide_int_to_tree (type,
5402                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5403                                                     HOST_BITS_PER_WIDE_INT * 2));
5404       else
5405         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5406
5407       return t;
5408
5409     case CONST_VECTOR:
5410       {
5411         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5412         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5413         tree itype = TREE_TYPE (type);
5414
5415         /* Build a tree with vector elements.  */
5416         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5417         unsigned int count = elts.encoded_nelts ();
5418         for (unsigned int i = 0; i < count; ++i)
5419           {
5420             rtx elt = CONST_VECTOR_ELT (x, i);
5421             elts.quick_push (make_tree (itype, elt));
5422           }
5423
5424         return elts.build ();
5425       }
5426
5427     case PLUS:
5428       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5429                           make_tree (type, XEXP (x, 1)));
5430
5431     case MINUS:
5432       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5433                           make_tree (type, XEXP (x, 1)));
5434
5435     case NEG:
5436       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5437
5438     case MULT:
5439       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5440                           make_tree (type, XEXP (x, 1)));
5441
5442     case ASHIFT:
5443       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5444                           make_tree (type, XEXP (x, 1)));
5445
5446     case LSHIFTRT:
5447       t = unsigned_type_for (type);
5448       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5449                                          make_tree (t, XEXP (x, 0)),
5450                                          make_tree (type, XEXP (x, 1))));
5451
5452     case ASHIFTRT:
5453       t = signed_type_for (type);
5454       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5455                                          make_tree (t, XEXP (x, 0)),
5456                                          make_tree (type, XEXP (x, 1))));
5457
5458     case DIV:
5459       if (TREE_CODE (type) != REAL_TYPE)
5460         t = signed_type_for (type);
5461       else
5462         t = type;
5463
5464       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5465                                          make_tree (t, XEXP (x, 0)),
5466                                          make_tree (t, XEXP (x, 1))));
5467     case UDIV:
5468       t = unsigned_type_for (type);
5469       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5470                                          make_tree (t, XEXP (x, 0)),
5471                                          make_tree (t, XEXP (x, 1))));
5472
5473     case SIGN_EXTEND:
5474     case ZERO_EXTEND:
5475       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5476                                           GET_CODE (x) == ZERO_EXTEND);
5477       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5478
5479     case CONST:
5480       return make_tree (type, XEXP (x, 0));
5481
5482     case SYMBOL_REF:
5483       t = SYMBOL_REF_DECL (x);
5484       if (t)
5485         return fold_convert (type, build_fold_addr_expr (t));
5486       /* fall through.  */
5487
5488     default:
5489       if (CONST_POLY_INT_P (x))
5490         return wide_int_to_tree (t, const_poly_int_value (x));
5491
5492       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5493
5494       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5495          address mode to pointer mode.  */
5496       if (POINTER_TYPE_P (type))
5497         x = convert_memory_address_addr_space
5498           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5499
5500       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5501          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5502       t->decl_with_rtl.rtl = x;
5503
5504       return t;
5505     }
5506 }
5507 \f
5508 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5509    and returning TARGET.
5510
5511    If TARGET is 0, a pseudo-register or constant is returned.  */
5512
5513 rtx
5514 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5515 {
5516   rtx tem = 0;
5517
5518   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5519     tem = simplify_binary_operation (AND, mode, op0, op1);
5520   if (tem == 0)
5521     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5522
5523   if (target == 0)
5524     target = tem;
5525   else if (tem != target)
5526     emit_move_insn (target, tem);
5527   return target;
5528 }
5529
5530 /* Helper function for emit_store_flag.  */
5531 rtx
5532 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5533              machine_mode mode, machine_mode compare_mode,
5534              int unsignedp, rtx x, rtx y, int normalizep,
5535              machine_mode target_mode)
5536 {
5537   class expand_operand ops[4];
5538   rtx op0, comparison, subtarget;
5539   rtx_insn *last;
5540   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5541   scalar_int_mode int_target_mode;
5542
5543   last = get_last_insn ();
5544   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5545   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5546   if (!x || !y)
5547     {
5548       delete_insns_since (last);
5549       return NULL_RTX;
5550     }
5551
5552   if (target_mode == VOIDmode)
5553     int_target_mode = result_mode;
5554   else
5555     int_target_mode = as_a <scalar_int_mode> (target_mode);
5556   if (!target)
5557     target = gen_reg_rtx (int_target_mode);
5558
5559   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5560
5561   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5562   create_fixed_operand (&ops[1], comparison);
5563   create_fixed_operand (&ops[2], x);
5564   create_fixed_operand (&ops[3], y);
5565   if (!maybe_expand_insn (icode, 4, ops))
5566     {
5567       delete_insns_since (last);
5568       return NULL_RTX;
5569     }
5570   subtarget = ops[0].value;
5571
5572   /* If we are converting to a wider mode, first convert to
5573      INT_TARGET_MODE, then normalize.  This produces better combining
5574      opportunities on machines that have a SIGN_EXTRACT when we are
5575      testing a single bit.  This mostly benefits the 68k.
5576
5577      If STORE_FLAG_VALUE does not have the sign bit set when
5578      interpreted in MODE, we can do this conversion as unsigned, which
5579      is usually more efficient.  */
5580   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5581     {
5582       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5583                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5584
5585       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5586       convert_move (target, subtarget, unsignedp);
5587
5588       op0 = target;
5589       result_mode = int_target_mode;
5590     }
5591   else
5592     op0 = subtarget;
5593
5594   /* If we want to keep subexpressions around, don't reuse our last
5595      target.  */
5596   if (optimize)
5597     subtarget = 0;
5598
5599   /* Now normalize to the proper value in MODE.  Sometimes we don't
5600      have to do anything.  */
5601   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5602     ;
5603   /* STORE_FLAG_VALUE might be the most negative number, so write
5604      the comparison this way to avoid a compiler-time warning.  */
5605   else if (- normalizep == STORE_FLAG_VALUE)
5606     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5607
5608   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5609      it hard to use a value of just the sign bit due to ANSI integer
5610      constant typing rules.  */
5611   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5612     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5613                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5614                         normalizep == 1);
5615   else
5616     {
5617       gcc_assert (STORE_FLAG_VALUE & 1);
5618
5619       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5620       if (normalizep == -1)
5621         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5622     }
5623
5624   /* If we were converting to a smaller mode, do the conversion now.  */
5625   if (int_target_mode != result_mode)
5626     {
5627       convert_move (target, op0, 0);
5628       return target;
5629     }
5630   else
5631     return op0;
5632 }
5633
5634
5635 /* A subroutine of emit_store_flag only including "tricks" that do not
5636    need a recursive call.  These are kept separate to avoid infinite
5637    loops.  */
5638
5639 static rtx
5640 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5641                    machine_mode mode, int unsignedp, int normalizep,
5642                    machine_mode target_mode)
5643 {
5644   rtx subtarget;
5645   enum insn_code icode;
5646   machine_mode compare_mode;
5647   enum mode_class mclass;
5648
5649   if (unsignedp)
5650     code = unsigned_condition (code);
5651
5652   /* If one operand is constant, make it the second one.  Only do this
5653      if the other operand is not constant as well.  */
5654
5655   if (swap_commutative_operands_p (op0, op1))
5656     {
5657       std::swap (op0, op1);
5658       code = swap_condition (code);
5659     }
5660
5661   if (mode == VOIDmode)
5662     mode = GET_MODE (op0);
5663
5664   if (CONST_SCALAR_INT_P (op1))
5665     canonicalize_comparison (mode, &code, &op1);
5666
5667   /* For some comparisons with 1 and -1, we can convert this to
5668      comparisons with zero.  This will often produce more opportunities for
5669      store-flag insns.  */
5670
5671   switch (code)
5672     {
5673     case LT:
5674       if (op1 == const1_rtx)
5675         op1 = const0_rtx, code = LE;
5676       break;
5677     case LE:
5678       if (op1 == constm1_rtx)
5679         op1 = const0_rtx, code = LT;
5680       break;
5681     case GE:
5682       if (op1 == const1_rtx)
5683         op1 = const0_rtx, code = GT;
5684       break;
5685     case GT:
5686       if (op1 == constm1_rtx)
5687         op1 = const0_rtx, code = GE;
5688       break;
5689     case GEU:
5690       if (op1 == const1_rtx)
5691         op1 = const0_rtx, code = NE;
5692       break;
5693     case LTU:
5694       if (op1 == const1_rtx)
5695         op1 = const0_rtx, code = EQ;
5696       break;
5697     default:
5698       break;
5699     }
5700
5701   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5702      complement of A (for GE) and shifting the sign bit to the low bit.  */
5703   scalar_int_mode int_mode;
5704   if (op1 == const0_rtx && (code == LT || code == GE)
5705       && is_int_mode (mode, &int_mode)
5706       && (normalizep || STORE_FLAG_VALUE == 1
5707           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5708     {
5709       scalar_int_mode int_target_mode;
5710       subtarget = target;
5711
5712       if (!target)
5713         int_target_mode = int_mode;
5714       else
5715         {
5716           /* If the result is to be wider than OP0, it is best to convert it
5717              first.  If it is to be narrower, it is *incorrect* to convert it
5718              first.  */
5719           int_target_mode = as_a <scalar_int_mode> (target_mode);
5720           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5721             {
5722               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5723               int_mode = int_target_mode;
5724             }
5725         }
5726
5727       if (int_target_mode != int_mode)
5728         subtarget = 0;
5729
5730       if (code == GE)
5731         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5732                            ((STORE_FLAG_VALUE == 1 || normalizep)
5733                             ? 0 : subtarget), 0);
5734
5735       if (STORE_FLAG_VALUE == 1 || normalizep)
5736         /* If we are supposed to produce a 0/1 value, we want to do
5737            a logical shift from the sign bit to the low-order bit; for
5738            a -1/0 value, we do an arithmetic shift.  */
5739         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5740                             GET_MODE_BITSIZE (int_mode) - 1,
5741                             subtarget, normalizep != -1);
5742
5743       if (int_mode != int_target_mode)
5744         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5745
5746       return op0;
5747     }
5748
5749   /* Next try expanding this via the backend's cstore<mode>4.  */
5750   mclass = GET_MODE_CLASS (mode);
5751   FOR_EACH_WIDER_MODE_FROM (compare_mode, mode)
5752     {
5753      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5754      icode = optab_handler (cstore_optab, optab_mode);
5755      if (icode != CODE_FOR_nothing)
5756         {
5757           do_pending_stack_adjust ();
5758           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5759                                  unsignedp, op0, op1, normalizep, target_mode);
5760           if (tem)
5761             return tem;
5762
5763           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5764             {
5765               enum rtx_code scode = swap_condition (code);
5766
5767               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5768                                  unsignedp, op1, op0, normalizep, target_mode);
5769               if (tem)
5770                 return tem;
5771             }
5772           break;
5773         }
5774     }
5775
5776   /* If we are comparing a double-word integer with zero or -1, we can
5777      convert the comparison into one involving a single word.  */
5778   if (is_int_mode (mode, &int_mode)
5779       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5780       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5781     {
5782       rtx tem;
5783       if ((code == EQ || code == NE)
5784           && (op1 == const0_rtx || op1 == constm1_rtx))
5785         {
5786           rtx op00, op01;
5787
5788           /* Do a logical OR or AND of the two words and compare the
5789              result.  */
5790           op00 = force_subreg (word_mode, op0, int_mode, 0);
5791           op01 = force_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5792           tem = expand_binop (word_mode,
5793                               op1 == const0_rtx ? ior_optab : and_optab,
5794                               op00, op01, NULL_RTX, unsignedp,
5795                               OPTAB_DIRECT);
5796
5797           if (tem != 0)
5798             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5799                                    unsignedp, normalizep);
5800         }
5801       else if ((code == LT || code == GE) && op1 == const0_rtx)
5802         {
5803           rtx op0h;
5804
5805           /* If testing the sign bit, can just test on high word.  */
5806           op0h = force_highpart_subreg (word_mode, op0, int_mode);
5807           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5808                                  unsignedp, normalizep);
5809         }
5810       else
5811         tem = NULL_RTX;
5812
5813       if (tem)
5814         {
5815           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5816             return tem;
5817           if (!target)
5818             target = gen_reg_rtx (target_mode);
5819
5820           convert_move (target, tem,
5821                         !val_signbit_known_set_p (word_mode,
5822                                                   (normalizep ? normalizep
5823                                                    : STORE_FLAG_VALUE)));
5824           return target;
5825         }
5826     }
5827
5828   return 0;
5829 }
5830
5831 /* Subroutine of emit_store_flag that handles cases in which the operands
5832    are scalar integers.  SUBTARGET is the target to use for temporary
5833    operations and TRUEVAL is the value to store when the condition is
5834    true.  All other arguments are as for emit_store_flag.  */
5835
5836 rtx
5837 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5838                      rtx op1, scalar_int_mode mode, int unsignedp,
5839                      int normalizep, rtx trueval)
5840 {
5841   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5842   rtx_insn *last = get_last_insn ();
5843
5844   /* If this is an equality comparison of integers, we can try to exclusive-or
5845      (or subtract) the two operands and use a recursive call to try the
5846      comparison with zero.  Don't do any of these cases if branches are
5847      very cheap.  */
5848
5849   if ((code == EQ || code == NE) && op1 != const0_rtx)
5850     {
5851       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5852                               OPTAB_WIDEN);
5853
5854       if (tem == 0)
5855         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5856                             OPTAB_WIDEN);
5857       if (tem != 0)
5858         tem = emit_store_flag (target, code, tem, const0_rtx,
5859                                mode, unsignedp, normalizep);
5860       if (tem != 0)
5861         return tem;
5862
5863       delete_insns_since (last);
5864     }
5865
5866   /* For integer comparisons, try the reverse comparison.  However, for
5867      small X and if we'd have anyway to extend, implementing "X != 0"
5868      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5869   rtx_code rcode = reverse_condition (code);
5870   if (can_compare_p (rcode, mode, ccp_store_flag)
5871       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5872             && code == NE
5873             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5874             && op1 == const0_rtx))
5875     {
5876       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5877                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5878
5879       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5880       if (want_add
5881           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5882                        optimize_insn_for_speed_p ()) == 0)
5883         {
5884           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5885                                        STORE_FLAG_VALUE, target_mode);
5886           if (tem != 0)
5887             tem = expand_binop (target_mode, add_optab, tem,
5888                                 gen_int_mode (normalizep, target_mode),
5889                                 target, 0, OPTAB_WIDEN);
5890           if (tem != 0)
5891             return tem;
5892         }
5893       else if (!want_add
5894                && rtx_cost (trueval, mode, XOR, 1,
5895                             optimize_insn_for_speed_p ()) == 0)
5896         {
5897           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5898                                        normalizep, target_mode);
5899           if (tem != 0)
5900             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5901                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5902           if (tem != 0)
5903             return tem;
5904         }
5905
5906       delete_insns_since (last);
5907     }
5908
5909   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5910      the constant zero.  Reject all other comparisons at this point.  Only
5911      do LE and GT if branches are expensive since they are expensive on
5912      2-operand machines.  */
5913
5914   if (op1 != const0_rtx
5915       || (code != EQ && code != NE
5916           && (BRANCH_COST (optimize_insn_for_speed_p (),
5917                            false) <= 1 || (code != LE && code != GT))))
5918     return 0;
5919
5920   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5921      do the necessary operation below.  */
5922
5923   rtx tem = 0;
5924
5925   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5926      the sign bit set.  */
5927
5928   if (code == LE)
5929     {
5930       /* This is destructive, so SUBTARGET can't be OP0.  */
5931       if (rtx_equal_p (subtarget, op0))
5932         subtarget = 0;
5933
5934       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5935                           OPTAB_WIDEN);
5936       if (tem)
5937         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5938                             OPTAB_WIDEN);
5939     }
5940
5941   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5942      number of bits in the mode of OP0, minus one.  */
5943
5944   if (code == GT)
5945     {
5946       if (rtx_equal_p (subtarget, op0))
5947         subtarget = 0;
5948
5949       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5950                                 GET_MODE_BITSIZE (mode) - 1,
5951                                 subtarget, 0);
5952       if (tem)
5953         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5954                             OPTAB_WIDEN);
5955     }
5956
5957   if (code == EQ || code == NE)
5958     {
5959       /* For EQ or NE, one way to do the comparison is to apply an operation
5960          that converts the operand into a positive number if it is nonzero
5961          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5962          for NE we negate.  This puts the result in the sign bit.  Then we
5963          normalize with a shift, if needed.
5964
5965          Two operations that can do the above actions are ABS and FFS, so try
5966          them.  If that doesn't work, and MODE is smaller than a full word,
5967          we can use zero-extension to the wider mode (an unsigned conversion)
5968          as the operation.  */
5969
5970       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5971          that is compensated by the subsequent overflow when subtracting
5972          one / negating.  */
5973
5974       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5975         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5976       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5977         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5978       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5979         {
5980           tem = convert_modes (word_mode, mode, op0, 1);
5981           mode = word_mode;
5982         }
5983
5984       if (tem != 0)
5985         {
5986           if (code == EQ)
5987             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5988                                 0, OPTAB_WIDEN);
5989           else
5990             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5991         }
5992
5993       /* If we couldn't do it that way, for NE we can "or" the two's complement
5994          of the value with itself.  For EQ, we take the one's complement of
5995          that "or", which is an extra insn, so we only handle EQ if branches
5996          are expensive.  */
5997
5998       if (tem == 0
5999           && (code == NE
6000               || BRANCH_COST (optimize_insn_for_speed_p (),
6001                               false) > 1))
6002         {
6003           if (rtx_equal_p (subtarget, op0))
6004             subtarget = 0;
6005
6006           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
6007           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
6008                               OPTAB_WIDEN);
6009
6010           if (tem && code == EQ)
6011             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
6012         }
6013     }
6014
6015   if (tem && normalizep)
6016     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
6017                               GET_MODE_BITSIZE (mode) - 1,
6018                               subtarget, normalizep == 1);
6019
6020   if (tem)
6021     {
6022       if (!target)
6023         ;
6024       else if (GET_MODE (tem) != target_mode)
6025         {
6026           convert_move (target, tem, 0);
6027           tem = target;
6028         }
6029       else if (!subtarget)
6030         {
6031           emit_move_insn (target, tem);
6032           tem = target;
6033         }
6034     }
6035   else
6036     delete_insns_since (last);
6037
6038   return tem;
6039 }
6040
6041 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
6042    and storing in TARGET.  Normally return TARGET.
6043    Return 0 if that cannot be done.
6044
6045    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
6046    it is VOIDmode, they cannot both be CONST_INT.
6047
6048    UNSIGNEDP is for the case where we have to widen the operands
6049    to perform the operation.  It says to use zero-extension.
6050
6051    NORMALIZEP is 1 if we should convert the result to be either zero
6052    or one.  Normalize is -1 if we should convert the result to be
6053    either zero or -1.  If NORMALIZEP is zero, the result will be left
6054    "raw" out of the scc insn.  */
6055
6056 rtx
6057 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6058                  machine_mode mode, int unsignedp, int normalizep)
6059 {
6060   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6061   enum rtx_code rcode;
6062   rtx subtarget;
6063   rtx tem, trueval;
6064   rtx_insn *last;
6065
6066   /* If we compare constants, we shouldn't use a store-flag operation,
6067      but a constant load.  We can get there via the vanilla route that
6068      usually generates a compare-branch sequence, but will in this case
6069      fold the comparison to a constant, and thus elide the branch.  */
6070   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6071     return NULL_RTX;
6072
6073   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6074                            target_mode);
6075   if (tem)
6076     return tem;
6077
6078   /* If we reached here, we can't do this with a scc insn, however there
6079      are some comparisons that can be done in other ways.  Don't do any
6080      of these cases if branches are very cheap.  */
6081   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6082     return 0;
6083
6084   /* See what we need to return.  We can only return a 1, -1, or the
6085      sign bit.  */
6086
6087   if (normalizep == 0)
6088     {
6089       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6090         normalizep = STORE_FLAG_VALUE;
6091
6092       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6093         ;
6094       else
6095         return 0;
6096     }
6097
6098   last = get_last_insn ();
6099
6100   /* If optimizing, use different pseudo registers for each insn, instead
6101      of reusing the same pseudo.  This leads to better CSE, but slows
6102      down the compiler, since there are more pseudos.  */
6103   subtarget = (!optimize
6104                && (target_mode == mode)) ? target : NULL_RTX;
6105   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6106
6107   /* For floating-point comparisons, try the reverse comparison or try
6108      changing the "orderedness" of the comparison.  */
6109   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6110     {
6111       enum rtx_code first_code;
6112       bool and_them;
6113
6114       rcode = reverse_condition_maybe_unordered (code);
6115       if (can_compare_p (rcode, mode, ccp_store_flag)
6116           && (code == ORDERED || code == UNORDERED
6117               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6118               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6119         {
6120           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6121                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6122
6123           /* For the reverse comparison, use either an addition or a XOR.  */
6124           if (want_add
6125               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6126                            optimize_insn_for_speed_p ()) == 0)
6127             {
6128               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6129                                        STORE_FLAG_VALUE, target_mode);
6130               if (tem)
6131                 return expand_binop (target_mode, add_optab, tem,
6132                                      gen_int_mode (normalizep, target_mode),
6133                                      target, 0, OPTAB_WIDEN);
6134             }
6135           else if (!want_add
6136                    && rtx_cost (trueval, mode, XOR, 1,
6137                                 optimize_insn_for_speed_p ()) == 0)
6138             {
6139               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6140                                        normalizep, target_mode);
6141               if (tem)
6142                 return expand_binop (target_mode, xor_optab, tem, trueval,
6143                                      target, INTVAL (trueval) >= 0,
6144                                      OPTAB_WIDEN);
6145             }
6146         }
6147
6148       delete_insns_since (last);
6149
6150       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6151       if (code == ORDERED || code == UNORDERED)
6152         return 0;
6153
6154       and_them = split_comparison (code, mode, &first_code, &code);
6155
6156       /* If there are no NaNs, the first comparison should always fall through.
6157          Effectively change the comparison to the other one.  */
6158       if (!HONOR_NANS (mode))
6159         {
6160           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6161           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6162                                     target_mode);
6163         }
6164
6165       if (!HAVE_conditional_move)
6166         return 0;
6167
6168       /* Do not turn a trapping comparison into a non-trapping one.  */
6169       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6170           && flag_trapping_math)
6171         return 0;
6172
6173       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6174          conditional move.  */
6175       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6176                                normalizep, target_mode);
6177       if (tem == 0)
6178         return 0;
6179
6180       if (and_them)
6181         tem = emit_conditional_move (target, { code, op0, op1, mode },
6182                                      tem, const0_rtx, GET_MODE (tem), 0);
6183       else
6184         tem = emit_conditional_move (target, { code, op0, op1, mode },
6185                                      trueval, tem, GET_MODE (tem), 0);
6186
6187       if (tem == 0)
6188         delete_insns_since (last);
6189       return tem;
6190     }
6191
6192   /* The remaining tricks only apply to integer comparisons.  */
6193
6194   scalar_int_mode int_mode;
6195   if (is_int_mode (mode, &int_mode))
6196     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6197                                 unsignedp, normalizep, trueval);
6198
6199   return 0;
6200 }
6201
6202 /* Like emit_store_flag, but always succeeds.  */
6203
6204 rtx
6205 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6206                        machine_mode mode, int unsignedp, int normalizep)
6207 {
6208   rtx tem;
6209   rtx_code_label *label;
6210   rtx trueval, falseval;
6211
6212   /* First see if emit_store_flag can do the job.  */
6213   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6214   if (tem != 0)
6215     return tem;
6216
6217   /* If one operand is constant, make it the second one.  Only do this
6218      if the other operand is not constant as well.  */
6219   if (swap_commutative_operands_p (op0, op1))
6220     {
6221       std::swap (op0, op1);
6222       code = swap_condition (code);
6223     }
6224
6225   if (mode == VOIDmode)
6226     mode = GET_MODE (op0);
6227
6228   if (!target)
6229     target = gen_reg_rtx (word_mode);
6230
6231   /* If this failed, we have to do this with set/compare/jump/set code.
6232      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6233   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6234   if (code == NE
6235       && GET_MODE_CLASS (mode) == MODE_INT
6236       && REG_P (target)
6237       && op0 == target
6238       && op1 == const0_rtx)
6239     {
6240       label = gen_label_rtx ();
6241       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6242                                NULL_RTX, NULL, label,
6243                                profile_probability::uninitialized ());
6244       emit_move_insn (target, trueval);
6245       emit_label (label);
6246       return target;
6247     }
6248
6249   if (!REG_P (target)
6250       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6251     target = gen_reg_rtx (GET_MODE (target));
6252
6253   /* Jump in the right direction if the target cannot implement CODE
6254      but can jump on its reverse condition.  */
6255   falseval = const0_rtx;
6256   if (! can_compare_p (code, mode, ccp_jump)
6257       && (! FLOAT_MODE_P (mode)
6258           || code == ORDERED || code == UNORDERED
6259           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6260           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6261     {
6262       enum rtx_code rcode;
6263       if (FLOAT_MODE_P (mode))
6264         rcode = reverse_condition_maybe_unordered (code);
6265       else
6266         rcode = reverse_condition (code);
6267
6268       /* Canonicalize to UNORDERED for the libcall.  */
6269       if (can_compare_p (rcode, mode, ccp_jump)
6270           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6271         {
6272           falseval = trueval;
6273           trueval = const0_rtx;
6274           code = rcode;
6275         }
6276     }
6277
6278   emit_move_insn (target, trueval);
6279   label = gen_label_rtx ();
6280   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6281                            label, profile_probability::uninitialized ());
6282
6283   emit_move_insn (target, falseval);
6284   emit_label (label);
6285
6286   return target;
6287 }
6288
6289 /* Expand a vector (left) rotate of MODE of X by an immediate AMT as a vector
6290    permute operation.  Emit code to put the result in DST if successfull and
6291    return it.  Otherwise return NULL.  This is intended to implement vector
6292    rotates by byte amounts using vector permutes when the target does not offer
6293    native vector rotate operations.  */
6294 rtx
6295 expand_rotate_as_vec_perm (machine_mode mode, rtx dst, rtx x, rtx amt)
6296 {
6297   rtx amt_unwrap = unwrap_const_vec_duplicate (amt);
6298   /* For now handle only rotate by the same integer constant in all lanes.
6299      In principle rotates by any constant vector are representable through
6300      permutes as long as the individual rotate amounts are multiples of
6301      BITS_PER_UNIT.  */
6302   if (!CONST_INT_P (amt_unwrap))
6303     return NULL_RTX;
6304
6305   int rotamnt = INTVAL (amt_unwrap);
6306   if (rotamnt % BITS_PER_UNIT != 0)
6307     return NULL_RTX;
6308   machine_mode qimode;
6309   if (!qimode_for_vec_perm (mode).exists (&qimode))
6310     return NULL_RTX;
6311
6312   vec_perm_builder builder;
6313   unsigned nunits = GET_MODE_SIZE (GET_MODE_INNER (mode));
6314   poly_uint64 total_units = GET_MODE_SIZE (mode);
6315   builder.new_vector (total_units, nunits, 3);
6316   unsigned rot_bytes = rotamnt / BITS_PER_UNIT;
6317   unsigned rot_to_perm = BYTES_BIG_ENDIAN ? rot_bytes : nunits - rot_bytes;
6318   for (unsigned j = 0; j < 3 * nunits; j += nunits)
6319     for (unsigned i = 0; i < nunits; i++)
6320       builder.quick_push ((rot_to_perm + i) % nunits + j);
6321
6322   rtx perm_src = lowpart_subreg (qimode, x, mode);
6323   rtx perm_dst = lowpart_subreg (qimode, dst, mode);
6324   rtx res
6325     = expand_vec_perm_const (qimode, perm_src, perm_src, builder,
6326                              qimode, perm_dst);
6327   if (!res)
6328     return NULL_RTX;
6329   emit_move_insn (dst, lowpart_subreg (mode, res, qimode));
6330   return dst;
6331 }
6332
6333 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6334    and exclusive ranges in order to create an equivalent comparison.  See
6335    canonicalize_cmp_for_target for the possible cases.  */
6336
6337 static enum rtx_code
6338 equivalent_cmp_code (enum rtx_code code)
6339 {
6340   switch (code)
6341     {
6342     case GT:
6343       return GE;
6344     case GE:
6345       return GT;
6346     case LT:
6347       return LE;
6348     case LE:
6349       return LT;
6350     case GTU:
6351       return GEU;
6352     case GEU:
6353       return GTU;
6354     case LTU:
6355       return LEU;
6356     case LEU:
6357       return LTU;
6358
6359     default:
6360       return code;
6361     }
6362 }
6363
6364 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6365    purpose of this is to end up with an immediate which can be loaded into a
6366    register in fewer moves, if possible.
6367
6368    For each integer comparison there exists an equivalent choice:
6369      i)   a >  b or a >= b + 1
6370      ii)  a <= b or a <  b + 1
6371      iii) a >= b or a >  b - 1
6372      iv)  a <  b or a <= b - 1
6373
6374    MODE is the mode of the first operand.
6375    CODE points to the comparison code.
6376    IMM points to the rtx containing the immediate.  *IMM must satisfy
6377    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6378    on exit.  */
6379
6380 void
6381 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6382 {
6383   if (!SCALAR_INT_MODE_P (mode))
6384     return;
6385
6386   int to_add = 0;
6387   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6388
6389   /* Extract the immediate value from the rtx.  */
6390   wide_int imm_val = rtx_mode_t (*imm, mode);
6391
6392   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6393     to_add = 1;
6394   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6395     to_add = -1;
6396   else
6397     return;
6398
6399   /* Check for overflow/underflow in the case of signed values and
6400      wrapping around in the case of unsigned values.  If any occur
6401      cancel the optimization.  */
6402   wi::overflow_type overflow = wi::OVF_NONE;
6403   wide_int imm_modif;
6404
6405   if (to_add == 1)
6406     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6407   else
6408     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6409
6410   if (overflow)
6411     return;
6412
6413   /* The following creates a pseudo; if we cannot do that, bail out.  */
6414   if (!can_create_pseudo_p ())
6415     return;
6416
6417   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6418   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6419
6420   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6421   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6422
6423   /* Update the immediate and the code.  */
6424   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6425     {
6426       *code = equivalent_cmp_code (*code);
6427       *imm = new_imm;
6428     }
6429 }
6430
6431
6432 \f
6433 /* Perform possibly multi-word comparison and conditional jump to LABEL
6434    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6435    now a thin wrapper around do_compare_rtx_and_jump.  */
6436
6437 static void
6438 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6439                  rtx_code_label *label)
6440 {
6441   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6442   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6443                            NULL, label, profile_probability::uninitialized ());
6444 }