gcc/expmed.cc

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2024 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46 #include "recog.h"
  47
  48 struct target_expmed default_target_expmed;
  49 #if SWITCHABLE_TARGET
  50 struct target_expmed *this_target_expmed = &default_target_expmed;
  51 #endif
  52
  53 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  54                                       unsigned HOST_WIDE_INT,
  55                                       unsigned HOST_WIDE_INT,
  56                                       poly_uint64, poly_uint64,
  57                                       machine_mode, rtx, bool, bool);
  58 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  59                                    unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    poly_uint64, poly_uint64,
  62                                    rtx, scalar_int_mode, bool);
  63 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  64                                      unsigned HOST_WIDE_INT,
  65                                      unsigned HOST_WIDE_INT,
  66                                      rtx, scalar_int_mode, bool);
  67 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  68                                    unsigned HOST_WIDE_INT,
  69                                    unsigned HOST_WIDE_INT,
  70                                    poly_uint64, poly_uint64,
  71                                    rtx, scalar_int_mode, bool);
  72 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  73                                        unsigned HOST_WIDE_INT,
  74                                        unsigned HOST_WIDE_INT, int, rtx,
  75                                        machine_mode, machine_mode, bool, bool);
  76 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  77                                     unsigned HOST_WIDE_INT,
  78                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  79 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  80                                       unsigned HOST_WIDE_INT,
  81                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  82 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  83 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  84                                     unsigned HOST_WIDE_INT,
  85                                     unsigned HOST_WIDE_INT, int, bool);
  86 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  87 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  89
  90 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  91    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  92    The mask is truncated if necessary to the width of mode MODE.  The
  93    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  94
  95 static inline rtx
  96 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  97 {
  98   return immed_wide_int_const
  99     (wi::shifted_mask (bitpos, bitsize, complement,
 100                        GET_MODE_PRECISION (mode)), mode);
 101 }
 102
 103 /* Test whether a value is zero of a power of two.  */
 104 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 105   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 106
 107 struct init_expmed_rtl
 108 {
 109   rtx reg;
 110   rtx plus;
 111   rtx neg;
 112   rtx mult;
 113   rtx sdiv;
 114   rtx udiv;
 115   rtx sdiv_32;
 116   rtx smod_32;
 117   rtx wide_mult;
 118   rtx wide_lshr;
 119   rtx wide_trunc;
 120   rtx shift;
 121   rtx shift_mult;
 122   rtx shift_add;
 123   rtx shift_sub0;
 124   rtx shift_sub1;
 125   rtx zext;
 126   rtx trunc;
 127
 128   rtx pow2[MAX_BITS_PER_WORD];
 129   rtx cint[MAX_BITS_PER_WORD];
 130 };
 131
 132 static void
 133 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 134                       scalar_int_mode from_mode, bool speed)
 135 {
 136   int to_size, from_size;
 137   rtx which;
 138
 139   to_size = GET_MODE_PRECISION (to_mode);
 140   from_size = GET_MODE_PRECISION (from_mode);
 141
 142   /* Most partial integers have a precision less than the "full"
 143      integer it requires for storage.  In case one doesn't, for
 144      comparison purposes here, reduce the bit size by one in that
 145      case.  */
 146   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 147       && pow2p_hwi (to_size))
 148     to_size --;
 149   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 150       && pow2p_hwi (from_size))
 151     from_size --;
 152
 153   /* Assume cost of zero-extend and sign-extend is the same.  */
 154   which = (to_size < from_size ? all->trunc : all->zext);
 155
 156   PUT_MODE (all->reg, from_mode);
 157   set_convert_cost (to_mode, from_mode, speed,
 158                     set_src_cost (which, to_mode, speed));
 159   /* Restore all->reg's mode.  */
 160   PUT_MODE (all->reg, to_mode);
 161 }
 162
 163 static void
 164 init_expmed_one_mode (struct init_expmed_rtl *all,
 165                       machine_mode mode, int speed)
 166 {
 167   int m, n, mode_bitsize;
 168   machine_mode mode_from;
 169
 170   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 171
 172   PUT_MODE (all->reg, mode);
 173   PUT_MODE (all->plus, mode);
 174   PUT_MODE (all->neg, mode);
 175   PUT_MODE (all->mult, mode);
 176   PUT_MODE (all->sdiv, mode);
 177   PUT_MODE (all->udiv, mode);
 178   PUT_MODE (all->sdiv_32, mode);
 179   PUT_MODE (all->smod_32, mode);
 180   PUT_MODE (all->wide_trunc, mode);
 181   PUT_MODE (all->shift, mode);
 182   PUT_MODE (all->shift_mult, mode);
 183   PUT_MODE (all->shift_add, mode);
 184   PUT_MODE (all->shift_sub0, mode);
 185   PUT_MODE (all->shift_sub1, mode);
 186   PUT_MODE (all->zext, mode);
 187   PUT_MODE (all->trunc, mode);
 188
 189   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 190   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 191   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 192   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 193   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 194
 195   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 196                                      <= 2 * add_cost (speed, mode)));
 197   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 198                                      <= 4 * add_cost (speed, mode)));
 199
 200   set_shift_cost (speed, mode, 0, 0);
 201   {
 202     int cost = add_cost (speed, mode);
 203     set_shiftadd_cost (speed, mode, 0, cost);
 204     set_shiftsub0_cost (speed, mode, 0, cost);
 205     set_shiftsub1_cost (speed, mode, 0, cost);
 206   }
 207
 208   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 209   for (m = 1; m < n; m++)
 210     {
 211       XEXP (all->shift, 1) = all->cint[m];
 212       XEXP (all->shift_mult, 1) = all->pow2[m];
 213
 214       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 215       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 216                                                        speed));
 217       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 218                                                         speed));
 219       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 220                                                         speed));
 221     }
 222
 223   scalar_int_mode int_mode_to;
 224   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 225     {
 226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 227            mode_from = (machine_mode)(mode_from + 1))
 228         init_expmed_one_conv (all, int_mode_to,
 229                               as_a <scalar_int_mode> (mode_from), speed);
 230
 231       scalar_int_mode wider_mode;
 232       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 233           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 234         {
 235           PUT_MODE (all->reg, mode);
 236           PUT_MODE (all->zext, wider_mode);
 237           PUT_MODE (all->wide_mult, wider_mode);
 238           PUT_MODE (all->wide_lshr, wider_mode);
 239           XEXP (all->wide_lshr, 1)
 240             = gen_int_shift_amount (wider_mode, mode_bitsize);
 241
 242           set_mul_widen_cost (speed, wider_mode,
 243                               set_src_cost (all->wide_mult, wider_mode, speed));
 244           set_mul_highpart_cost (speed, int_mode_to,
 245                                  set_src_cost (all->wide_trunc,
 246                                                int_mode_to, speed));
 247         }
 248     }
 249 }
 250
 251 void
 252 init_expmed (void)
 253 {
 254   struct init_expmed_rtl all;
 255   machine_mode mode = QImode;
 256   int m, speed;
 257
 258   memset (&all, 0, sizeof all);
 259   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 260     {
 261       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 262       all.cint[m] = GEN_INT (m);
 263     }
 264
 265   /* Avoid using hard regs in ways which may be unsupported.  */
 266   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 267   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 268   all.neg = gen_rtx_NEG (mode, all.reg);
 269   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 270   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 271   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 272   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 273   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 274   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 275   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 276   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 277   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 278   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 279   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 280   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 282   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 283   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 284
 285   for (speed = 0; speed < 2; speed++)
 286     {
 287       crtl->maybe_hot_insn_p = speed;
 288       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 289
 290       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 291            mode = (machine_mode)(mode + 1))
 292         init_expmed_one_mode (&all, mode, speed);
 293
 294       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 295         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 296              mode = (machine_mode)(mode + 1))
 297           init_expmed_one_mode (&all, mode, speed);
 298
 299       if (MIN_MODE_VECTOR_INT != VOIDmode)
 300         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 301              mode = (machine_mode)(mode + 1))
 302           init_expmed_one_mode (&all, mode, speed);
 303     }
 304
 305   if (alg_hash_used_p ())
 306     {
 307       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 308       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 309     }
 310   else
 311     set_alg_hash_used_p (true);
 312   default_rtl_profile ();
 313
 314   ggc_free (all.trunc);
 315   ggc_free (all.shift_sub1);
 316   ggc_free (all.shift_sub0);
 317   ggc_free (all.shift_add);
 318   ggc_free (all.shift_mult);
 319   ggc_free (all.shift);
 320   ggc_free (all.wide_trunc);
 321   ggc_free (all.wide_lshr);
 322   ggc_free (all.wide_mult);
 323   ggc_free (all.zext);
 324   ggc_free (all.smod_32);
 325   ggc_free (all.sdiv_32);
 326   ggc_free (all.udiv);
 327   ggc_free (all.sdiv);
 328   ggc_free (all.mult);
 329   ggc_free (all.neg);
 330   ggc_free (all.plus);
 331   ggc_free (all.reg);
 332 }
 333
 334 /* Return an rtx representing minus the value of X.
 335    MODE is the intended mode of the result,
 336    useful if X is a CONST_INT.  */
 337
 338 rtx
 339 negate_rtx (machine_mode mode, rtx x)
 340 {
 341   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 342
 343   if (result == 0)
 344     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 345
 346   return result;
 347 }
 348
 349 /* Whether reverse storage order is supported on the target.  */
 350 static int reverse_storage_order_supported = -1;
 351
 352 /* Check whether reverse storage order is supported on the target.  */
 353
 354 static void
 355 check_reverse_storage_order_support (void)
 356 {
 357   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 358     {
 359       reverse_storage_order_supported = 0;
 360       sorry ("reverse scalar storage order");
 361     }
 362   else
 363     reverse_storage_order_supported = 1;
 364 }
 365
 366 /* Whether reverse FP storage order is supported on the target.  */
 367 static int reverse_float_storage_order_supported = -1;
 368
 369 /* Check whether reverse FP storage order is supported on the target.  */
 370
 371 static void
 372 check_reverse_float_storage_order_support (void)
 373 {
 374   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 375     {
 376       reverse_float_storage_order_supported = 0;
 377       sorry ("reverse floating-point scalar storage order");
 378     }
 379   else
 380     reverse_float_storage_order_supported = 1;
 381 }
 382
 383 /* Return an rtx representing value of X with reverse storage order.
 384    MODE is the intended mode of the result,
 385    useful if X is a CONST_INT.  */
 386
 387 rtx
 388 flip_storage_order (machine_mode mode, rtx x)
 389 {
 390   scalar_int_mode int_mode;
 391   rtx result;
 392
 393   if (mode == QImode)
 394     return x;
 395
 396   if (COMPLEX_MODE_P (mode))
 397     {
 398       rtx real = read_complex_part (x, false);
 399       rtx imag = read_complex_part (x, true);
 400
 401       real = flip_storage_order (GET_MODE_INNER (mode), real);
 402       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 403
 404       return gen_rtx_CONCAT (mode, real, imag);
 405     }
 406
 407   if (UNLIKELY (reverse_storage_order_supported < 0))
 408     check_reverse_storage_order_support ();
 409
 410   if (!is_a <scalar_int_mode> (mode, &int_mode))
 411     {
 412       if (FLOAT_MODE_P (mode)
 413           && UNLIKELY (reverse_float_storage_order_supported < 0))
 414         check_reverse_float_storage_order_support ();
 415
 416       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 417           || !targetm.scalar_mode_supported_p (int_mode))
 418         {
 419           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 420           return x;
 421         }
 422       x = gen_lowpart (int_mode, x);
 423     }
 424
 425   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 426   if (result == 0)
 427     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 428
 429   if (int_mode != mode)
 430     result = gen_lowpart (mode, result);
 431
 432   return result;
 433 }
 434
 435 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 436    first unit of mode MODE that contains a bitfield of size BITSIZE at
 437    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 438    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 439    of the field within the new memory.  */
 440
 441 static rtx
 442 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 443                       unsigned HOST_WIDE_INT bitsize,
 444                       unsigned HOST_WIDE_INT bitnum,
 445                       unsigned HOST_WIDE_INT *new_bitnum)
 446 {
 447   scalar_int_mode imode;
 448   if (mode.exists (&imode))
 449     {
 450       unsigned int unit = GET_MODE_BITSIZE (imode);
 451       *new_bitnum = bitnum % unit;
 452       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 453       return adjust_bitfield_address (mem, imode, offset);
 454     }
 455   else
 456     {
 457       *new_bitnum = bitnum % BITS_PER_UNIT;
 458       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 459       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 460                             / BITS_PER_UNIT);
 461       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 462     }
 463 }
 464
 465 /* The caller wants to perform insertion or extraction PATTERN on a
 466    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 467    BITREGION_START and BITREGION_END are as for store_bit_field
 468    and FIELDMODE is the natural mode of the field.
 469
 470    Search for a mode that is compatible with the memory access
 471    restrictions and (where applicable) with a register insertion or
 472    extraction.  Return the new memory on success, storing the adjusted
 473    bit position in *NEW_BITNUM.  Return null otherwise.  */
 474
 475 static rtx
 476 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 477                               rtx op0, HOST_WIDE_INT bitsize,
 478                               HOST_WIDE_INT bitnum,
 479                               poly_uint64 bitregion_start,
 480                               poly_uint64 bitregion_end,
 481                               machine_mode fieldmode,
 482                               unsigned HOST_WIDE_INT *new_bitnum)
 483 {
 484   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 485                                 bitregion_end, MEM_ALIGN (op0),
 486                                 MEM_VOLATILE_P (op0));
 487   scalar_int_mode best_mode;
 488   if (iter.next_mode (&best_mode))
 489     {
 490       /* We can use a memory in BEST_MODE.  See whether this is true for
 491          any wider modes.  All other things being equal, we prefer to
 492          use the widest mode possible because it tends to expose more
 493          CSE opportunities.  */
 494       if (!iter.prefer_smaller_modes ())
 495         {
 496           /* Limit the search to the mode required by the corresponding
 497              register insertion or extraction instruction, if any.  */
 498           scalar_int_mode limit_mode = word_mode;
 499           extraction_insn insn;
 500           if (get_best_reg_extraction_insn (&insn, pattern,
 501                                             GET_MODE_BITSIZE (best_mode),
 502                                             fieldmode))
 503             limit_mode = insn.field_mode;
 504
 505           scalar_int_mode wider_mode;
 506           while (iter.next_mode (&wider_mode)
 507                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 508             best_mode = wider_mode;
 509         }
 510       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 511                                    new_bitnum);
 512     }
 513   return NULL_RTX;
 514 }
 515
 516 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 517    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 518    offset is then BITNUM / BITS_PER_UNIT.  */
 519
 520 static bool
 521 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 522                      machine_mode struct_mode)
 523 {
 524   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 525   if (BYTES_BIG_ENDIAN)
 526     return (multiple_p (bitnum, BITS_PER_UNIT)
 527             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 528                 || multiple_p (bitnum + bitsize,
 529                                regsize * BITS_PER_UNIT)));
 530   else
 531     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 532 }
 533
 534 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 535    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 536    Return false if the access would touch memory outside the range
 537    BITREGION_START to BITREGION_END for conformance to the C++ memory
 538    model.  */
 539
 540 static bool
 541 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 542                             unsigned HOST_WIDE_INT bitnum,
 543                             scalar_int_mode fieldmode,
 544                             poly_uint64 bitregion_start,
 545                             poly_uint64 bitregion_end)
 546 {
 547   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 548
 549   /* -fstrict-volatile-bitfields must be enabled and we must have a
 550      volatile MEM.  */
 551   if (!MEM_P (op0)
 552       || !MEM_VOLATILE_P (op0)
 553       || flag_strict_volatile_bitfields <= 0)
 554     return false;
 555
 556   /* The bit size must not be larger than the field mode, and
 557      the field mode must not be larger than a word.  */
 558   if (bitsize > modesize || modesize > BITS_PER_WORD)
 559     return false;
 560
 561   /* Check for cases of unaligned fields that must be split.  */
 562   if (bitnum % modesize + bitsize > modesize)
 563     return false;
 564
 565   /* The memory must be sufficiently aligned for a MODESIZE access.
 566      This condition guarantees, that the memory access will not
 567      touch anything after the end of the structure.  */
 568   if (MEM_ALIGN (op0) < modesize)
 569     return false;
 570
 571   /* Check for cases where the C++ memory model applies.  */
 572   if (maybe_ne (bitregion_end, 0U)
 573       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 574           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 575                        bitregion_end)))
 576     return false;
 577
 578   return true;
 579 }
 580
 581 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 582    bit number BITNUM can be treated as a simple value of mode MODE.
 583    Store the byte offset in *BYTENUM if so.  */
 584
 585 static bool
 586 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 587                        machine_mode mode, poly_uint64 *bytenum)
 588 {
 589   return (MEM_P (op0)
 590           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 591           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 592           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 593               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 594                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 595 }
 596 \f
 597 /* Try to use instruction INSV to store VALUE into a field of OP0.
 598    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 599    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 600    are as for store_bit_field.  */
 601
 602 static bool
 603 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 604                             opt_scalar_int_mode op0_mode,
 605                             unsigned HOST_WIDE_INT bitsize,
 606                             unsigned HOST_WIDE_INT bitnum,
 607                             rtx value, scalar_int_mode value_mode)
 608 {
 609   class expand_operand ops[4];
 610   rtx value1;
 611   rtx xop0 = op0;
 612   rtx_insn *last = get_last_insn ();
 613   bool copy_back = false;
 614
 615   scalar_int_mode op_mode = insv->field_mode;
 616   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 617   if (bitsize == 0 || bitsize > unit)
 618     return false;
 619
 620   if (MEM_P (xop0))
 621     /* Get a reference to the first byte of the field.  */
 622     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 623                                  &bitnum);
 624   else
 625     {
 626       /* Convert from counting within OP0 to counting in OP_MODE.  */
 627       if (BYTES_BIG_ENDIAN)
 628         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 629
 630       /* If xop0 is a register, we need it in OP_MODE
 631          to make it acceptable to the format of insv.  */
 632       if (GET_CODE (xop0) == SUBREG)
 633         {
 634           /* If such a SUBREG can't be created, give up.  */
 635           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 636                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 637             return false;
 638           /* We can't just change the mode, because this might clobber op0,
 639              and we will need the original value of op0 if insv fails.  */
 640           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 641                                  SUBREG_BYTE (xop0));
 642         }
 643       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 644         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 645     }
 646
 647   /* If the destination is a paradoxical subreg such that we need a
 648      truncate to the inner mode, perform the insertion on a temporary and
 649      truncate the result to the original destination.  Note that we can't
 650      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 651      X) 0)) is (reg:N X).  */
 652   if (GET_CODE (xop0) == SUBREG
 653       && REG_P (SUBREG_REG (xop0))
 654       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 655                                          op_mode))
 656     {
 657       rtx tem = gen_reg_rtx (op_mode);
 658       emit_move_insn (tem, xop0);
 659       xop0 = tem;
 660       copy_back = true;
 661     }
 662
 663   /* There are similar overflow check at the start of store_bit_field_1,
 664      but that only check the situation where the field lies completely
 665      outside the register, while there do have situation where the field
 666      lies partialy in the register, we need to adjust bitsize for this
 667      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 668      will broken on those arch support bit insert instruction, like arm, aarch64
 669      etc.  */
 670   if (bitsize + bitnum > unit && bitnum < unit)
 671     {
 672       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 673                "destination object, data truncated into %wu-bit",
 674                bitsize, unit - bitnum);
 675       bitsize = unit - bitnum;
 676     }
 677
 678   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 679      "backwards" from the size of the unit we are inserting into.
 680      Otherwise, we count bits from the most significant on a
 681      BYTES/BITS_BIG_ENDIAN machine.  */
 682
 683   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 684     bitnum = unit - bitsize - bitnum;
 685
 686   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 687   value1 = value;
 688   if (value_mode != op_mode)
 689     {
 690       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 691         {
 692           rtx tmp;
 693           /* Optimization: Don't bother really extending VALUE
 694              if it has all the bits we will actually use.  However,
 695              if we must narrow it, be sure we do it correctly.  */
 696
 697           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 698             {
 699               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 700               if (! tmp)
 701                 tmp = simplify_gen_subreg (op_mode,
 702                                            force_reg (value_mode, value1),
 703                                            value_mode, 0);
 704             }
 705           else
 706             {
 707               tmp = gen_lowpart_if_possible (op_mode, value1);
 708               if (! tmp)
 709                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 710             }
 711           value1 = tmp;
 712         }
 713       else if (CONST_INT_P (value))
 714         value1 = gen_int_mode (INTVAL (value), op_mode);
 715       else
 716         /* Parse phase is supposed to make VALUE's data type
 717            match that of the component reference, which is a type
 718            at least as wide as the field; so VALUE should have
 719            a mode that corresponds to that type.  */
 720         gcc_assert (CONSTANT_P (value));
 721     }
 722
 723   create_fixed_operand (&ops[0], xop0);
 724   create_integer_operand (&ops[1], bitsize);
 725   create_integer_operand (&ops[2], bitnum);
 726   create_input_operand (&ops[3], value1, op_mode);
 727   if (maybe_expand_insn (insv->icode, 4, ops))
 728     {
 729       if (copy_back)
 730         convert_move (op0, xop0, true);
 731       return true;
 732     }
 733   delete_insns_since (last);
 734   return false;
 735 }
 736
 737 /* A subroutine of store_bit_field, with the same arguments.  Return true
 738    if the operation could be implemented.
 739
 740    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 741    no other way of implementing the operation.  If FALLBACK_P is false,
 742    return false instead.
 743
 744    if UNDEFINED_P is true then STR_RTX is undefined and may be set using
 745    a subreg instead.  */
 746
 747 static bool
 748 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 749                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 750                    machine_mode fieldmode,
 751                    rtx value, bool reverse, bool fallback_p, bool undefined_p)
 752 {
 753   rtx op0 = str_rtx;
 754
 755   while (GET_CODE (op0) == SUBREG)
 756     {
 757       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 758       op0 = SUBREG_REG (op0);
 759     }
 760
 761   /* No action is needed if the target is a register and if the field
 762      lies completely outside that register.  This can occur if the source
 763      code contains an out-of-bounds access to a small array.  */
 764   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 765     return true;
 766
 767   /* Use vec_set patterns for inserting parts of vectors whenever
 768      available.  */
 769   machine_mode outermode = GET_MODE (op0);
 770   scalar_mode innermode = GET_MODE_INNER (outermode);
 771   poly_uint64 pos;
 772   if (VECTOR_MODE_P (outermode)
 773       && !MEM_P (op0)
 774       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 775       && fieldmode == innermode
 776       && known_eq (bitsize, GET_MODE_PRECISION (innermode))
 777       && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
 778     {
 779       class expand_operand ops[3];
 780       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 781
 782       create_fixed_operand (&ops[0], op0);
 783       create_input_operand (&ops[1], value, innermode);
 784       create_integer_operand (&ops[2], pos);
 785       if (maybe_expand_insn (icode, 3, ops))
 786         return true;
 787     }
 788
 789   /* If the target is a register, overwriting the entire object, or storing
 790      a full-word or multi-word field can be done with just a SUBREG.  */
 791   if (!MEM_P (op0)
 792       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 793     {
 794       /* Use the subreg machinery either to narrow OP0 to the required
 795          words or to cope with mode punning between equal-sized modes.
 796          In the latter case, use subreg on the rhs side, not lhs.  */
 797       rtx sub;
 798       poly_uint64 bytenum;
 799       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 800       if (known_eq (bitnum, 0U)
 801           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 802         {
 803           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 804           if (sub)
 805             {
 806               if (reverse)
 807                 sub = flip_storage_order (GET_MODE (op0), sub);
 808               emit_move_insn (op0, sub);
 809               return true;
 810             }
 811         }
 812       else if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
 813                && (undefined_p
 814                    || (multiple_p (bitnum, regsize * BITS_PER_UNIT)
 815                        && multiple_p (bitsize, regsize * BITS_PER_UNIT)))
 816                && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
 817         {
 818           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), bytenum);
 819           if (sub)
 820             {
 821               if (reverse)
 822                 value = flip_storage_order (fieldmode, value);
 823               emit_move_insn (sub, value);
 824               return true;
 825             }
 826         }
 827     }
 828
 829   /* If the target is memory, storing any naturally aligned field can be
 830      done with a simple store.  For targets that support fast unaligned
 831      memory, any naturally sized, unit aligned field can be done directly.  */
 832   poly_uint64 bytenum;
 833   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 834     {
 835       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 836       if (reverse)
 837         value = flip_storage_order (fieldmode, value);
 838       emit_move_insn (op0, value);
 839       return true;
 840     }
 841
 842   /* It's possible we'll need to handle other cases here for
 843      polynomial bitnum and bitsize.  */
 844
 845   /* From here on we need to be looking at a fixed-size insertion.  */
 846   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 847   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 848
 849   /* Make sure we are playing with integral modes.  Pun with subregs
 850      if we aren't.  This must come after the entire register case above,
 851      since that case is valid for any mode.  The following cases are only
 852      valid for integral modes.  */
 853   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 854   scalar_int_mode imode;
 855   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 856     {
 857       if (MEM_P (op0))
 858         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 859                                             0, MEM_SIZE (op0));
 860       else if (!op0_mode.exists ())
 861         {
 862           if (ibitnum == 0
 863               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 864               && MEM_P (value)
 865               && !reverse)
 866             {
 867               value = adjust_address (value, GET_MODE (op0), 0);
 868               emit_move_insn (op0, value);
 869               return true;
 870             }
 871           if (!fallback_p)
 872             return false;
 873           rtx temp = assign_stack_temp (GET_MODE (op0),
 874                                         GET_MODE_SIZE (GET_MODE (op0)));
 875           emit_move_insn (temp, op0);
 876           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 877                              reverse, fallback_p, undefined_p);
 878           emit_move_insn (op0, temp);
 879           return true;
 880         }
 881       else
 882         op0 = gen_lowpart (op0_mode.require (), op0);
 883     }
 884
 885   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 886                                    bitregion_start, bitregion_end,
 887                                    fieldmode, value, reverse, fallback_p);
 888 }
 889
 890 /* Subroutine of store_bit_field_1, with the same arguments, except
 891    that BITSIZE and BITNUM are constant.  Handle cases specific to
 892    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 893    otherwise OP0 is a BLKmode MEM.  */
 894
 895 static bool
 896 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 897                           unsigned HOST_WIDE_INT bitsize,
 898                           unsigned HOST_WIDE_INT bitnum,
 899                           poly_uint64 bitregion_start,
 900                           poly_uint64 bitregion_end,
 901                           machine_mode fieldmode,
 902                           rtx value, bool reverse, bool fallback_p)
 903 {
 904   /* Storing an lsb-aligned field in a register
 905      can be done with a movstrict instruction.  */
 906
 907   if (!MEM_P (op0)
 908       && !reverse
 909       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 910       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 911       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 912     {
 913       class expand_operand ops[2];
 914       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 915       rtx arg0 = op0;
 916       unsigned HOST_WIDE_INT subreg_off;
 917
 918       if (GET_CODE (arg0) == SUBREG)
 919         {
 920           /* Else we've got some float mode source being extracted into
 921              a different float mode destination -- this combination of
 922              subregs results in Severe Tire Damage.  */
 923           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 924                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 925                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 926           arg0 = SUBREG_REG (arg0);
 927         }
 928
 929       subreg_off = bitnum / BITS_PER_UNIT;
 930       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
 931           /* STRICT_LOW_PART must have a non-paradoxical subreg as
 932              operand.  */
 933           && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
 934         {
 935           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 936
 937           create_fixed_operand (&ops[0], arg0);
 938           /* Shrink the source operand to FIELDMODE.  */
 939           create_convert_operand_to (&ops[1], value, fieldmode, false);
 940           if (maybe_expand_insn (icode, 2, ops))
 941             return true;
 942         }
 943     }
 944
 945   /* Handle fields bigger than a word.  */
 946
 947   if (bitsize > BITS_PER_WORD)
 948     {
 949       /* Here we transfer the words of the field
 950          in the order least significant first.
 951          This is because the most significant word is the one which may
 952          be less than full.
 953          However, only do that if the value is not BLKmode.  */
 954
 955       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 956       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 957       rtx_insn *last;
 958
 959       /* This is the mode we must force value to, so that there will be enough
 960          subwords to extract.  Note that fieldmode will often (always?) be
 961          VOIDmode, because that is what store_field uses to indicate that this
 962          is a bit field, but passing VOIDmode to operand_subword_force
 963          is not allowed.
 964
 965          The mode must be fixed-size, since insertions into variable-sized
 966          objects are meant to be handled before calling this function.  */
 967       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 968       if (value_mode == VOIDmode)
 969         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 970
 971       last = get_last_insn ();
 972       for (int i = 0; i < nwords; i++)
 973         {
 974           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 975              except maybe for the last iteration.  */
 976           const unsigned HOST_WIDE_INT new_bitsize
 977             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 978           /* Bit offset from the starting bit number in the target.  */
 979           const unsigned int bit_offset
 980             = backwards ^ reverse
 981               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 982               : i * BITS_PER_WORD;
 983           /* Starting word number in the value.  */
 984           const unsigned int wordnum
 985             = backwards
 986               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 987               : i;
 988           /* The chunk of the value in word_mode.  We use bit-field extraction
 989               in BLKmode to handle unaligned memory references and to shift the
 990               last chunk right on big-endian machines if need be.  */
 991           rtx value_word
 992             = fieldmode == BLKmode
 993               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 994                                    1, NULL_RTX, word_mode, word_mode, false,
 995                                    NULL)
 996               : operand_subword_force (value, wordnum, value_mode);
 997
 998           if (!store_bit_field_1 (op0, new_bitsize,
 999                                   bitnum + bit_offset,
1000                                   bitregion_start, bitregion_end,
1001                                   word_mode,
1002                                   value_word, reverse, fallback_p, false))
1003             {
1004               delete_insns_since (last);
1005               return false;
1006             }
1007         }
1008       return true;
1009     }
1010
1011   /* If VALUE has a floating-point or complex mode, access it as an
1012      integer of the corresponding size.  This can occur on a machine
1013      with 64 bit registers that uses SFmode for float.  It can also
1014      occur for unaligned float or complex fields.  */
1015   rtx orig_value = value;
1016   scalar_int_mode value_mode;
1017   if (GET_MODE (value) == VOIDmode)
1018     /* By this point we've dealt with values that are bigger than a word,
1019        so word_mode is a conservatively correct choice.  */
1020     value_mode = word_mode;
1021   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1022     {
1023       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1024       value = gen_reg_rtx (value_mode);
1025       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1026     }
1027
1028   /* If OP0 is a multi-word register, narrow it to the affected word.
1029      If the region spans two words, defer to store_split_bit_field.
1030      Don't do this if op0 is a single hard register wider than word
1031      such as a float or vector register.  */
1032   if (!MEM_P (op0)
1033       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1034       && (!REG_P (op0)
1035           || !HARD_REGISTER_P (op0)
1036           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1037     {
1038       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1039         {
1040           if (!fallback_p)
1041             return false;
1042
1043           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1044                                  bitregion_start, bitregion_end,
1045                                  value, value_mode, reverse);
1046           return true;
1047         }
1048       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1049                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1050       gcc_assert (op0);
1051       op0_mode = word_mode;
1052       bitnum %= BITS_PER_WORD;
1053     }
1054
1055   /* From here on we can assume that the field to be stored in fits
1056      within a word.  If the destination is a register, it too fits
1057      in a word.  */
1058
1059   extraction_insn insv;
1060   if (!MEM_P (op0)
1061       && !reverse
1062       && get_best_reg_extraction_insn (&insv, EP_insv,
1063                                        GET_MODE_BITSIZE (op0_mode.require ()),
1064                                        fieldmode)
1065       && store_bit_field_using_insv (&insv, op0, op0_mode,
1066                                      bitsize, bitnum, value, value_mode))
1067     return true;
1068
1069   /* If OP0 is a memory, try copying it to a register and seeing if a
1070      cheap register alternative is available.  */
1071   if (MEM_P (op0) && !reverse)
1072     {
1073       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1074                                         fieldmode)
1075           && store_bit_field_using_insv (&insv, op0, op0_mode,
1076                                          bitsize, bitnum, value, value_mode))
1077         return true;
1078
1079       rtx_insn *last = get_last_insn ();
1080
1081       /* Try loading part of OP0 into a register, inserting the bitfield
1082          into that, and then copying the result back to OP0.  */
1083       unsigned HOST_WIDE_INT bitpos;
1084       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1085                                                bitregion_start, bitregion_end,
1086                                                fieldmode, &bitpos);
1087       if (xop0)
1088         {
1089           rtx tempreg = copy_to_reg (xop0);
1090           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1091                                  bitregion_start, bitregion_end,
1092                                  fieldmode, orig_value, reverse, false, false))
1093             {
1094               emit_move_insn (xop0, tempreg);
1095               return true;
1096             }
1097           delete_insns_since (last);
1098         }
1099     }
1100
1101   if (!fallback_p)
1102     return false;
1103
1104   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1105                          bitregion_end, value, value_mode, reverse);
1106   return true;
1107 }
1108
1109 /* Generate code to store value from rtx VALUE
1110    into a bit-field within structure STR_RTX
1111    containing BITSIZE bits starting at bit BITNUM.
1112
1113    BITREGION_START is bitpos of the first bitfield in this region.
1114    BITREGION_END is the bitpos of the ending bitfield in this region.
1115    These two fields are 0, if the C++ memory model does not apply,
1116    or we are not interested in keeping track of bitfield regions.
1117
1118    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1119
1120    If REVERSE is true, the store is to be done in reverse order.
1121
1122    If UNDEFINED_P is true then STR_RTX is currently undefined.  */
1123
1124 void
1125 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1126                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1127                  machine_mode fieldmode,
1128                  rtx value, bool reverse, bool undefined_p)
1129 {
1130   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1131   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1132   scalar_int_mode int_mode;
1133   if (bitsize.is_constant (&ibitsize)
1134       && bitnum.is_constant (&ibitnum)
1135       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1136       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1137                                      bitregion_start, bitregion_end))
1138     {
1139       /* Storing of a full word can be done with a simple store.
1140          We know here that the field can be accessed with one single
1141          instruction.  For targets that support unaligned memory,
1142          an unaligned access may be necessary.  */
1143       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1144         {
1145           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1146                                              ibitnum / BITS_PER_UNIT);
1147           if (reverse)
1148             value = flip_storage_order (int_mode, value);
1149           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1150           emit_move_insn (str_rtx, value);
1151         }
1152       else
1153         {
1154           rtx temp;
1155
1156           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1157                                           ibitnum, &ibitnum);
1158           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1159           temp = copy_to_reg (str_rtx);
1160           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1161                                   int_mode, value, reverse, true, undefined_p))
1162             gcc_unreachable ();
1163
1164           emit_move_insn (str_rtx, temp);
1165         }
1166
1167       return;
1168     }
1169
1170   /* Under the C++0x memory model, we must not touch bits outside the
1171      bit region.  Adjust the address to start at the beginning of the
1172      bit region.  */
1173   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1174     {
1175       scalar_int_mode best_mode;
1176       machine_mode addr_mode = VOIDmode;
1177
1178       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1179       bitnum -= bitregion_start;
1180       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1181       bitregion_end -= bitregion_start;
1182       bitregion_start = 0;
1183       if (bitsize.is_constant (&ibitsize)
1184           && bitnum.is_constant (&ibitnum)
1185           && get_best_mode (ibitsize, ibitnum,
1186                             bitregion_start, bitregion_end,
1187                             MEM_ALIGN (str_rtx), INT_MAX,
1188                             MEM_VOLATILE_P (str_rtx), &best_mode))
1189         addr_mode = best_mode;
1190       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1191                                               offset, size);
1192     }
1193
1194   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1195                           bitregion_start, bitregion_end,
1196                           fieldmode, value, reverse, true, undefined_p))
1197     gcc_unreachable ();
1198 }
1199 \f
1200 /* Use shifts and boolean operations to store VALUE into a bit field of
1201    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1202    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1203    the mode of VALUE.
1204
1205    If REVERSE is true, the store is to be done in reverse order.  */
1206
1207 static void
1208 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1209                        unsigned HOST_WIDE_INT bitsize,
1210                        unsigned HOST_WIDE_INT bitnum,
1211                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1212                        rtx value, scalar_int_mode value_mode, bool reverse)
1213 {
1214   /* There is a case not handled here:
1215      a structure with a known alignment of just a halfword
1216      and a field split across two aligned halfwords within the structure.
1217      Or likewise a structure with a known alignment of just a byte
1218      and a field split across two bytes.
1219      Such cases are not supposed to be able to occur.  */
1220
1221   scalar_int_mode best_mode;
1222   if (MEM_P (op0))
1223     {
1224       unsigned int max_bitsize = BITS_PER_WORD;
1225       scalar_int_mode imode;
1226       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1227         max_bitsize = GET_MODE_BITSIZE (imode);
1228
1229       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1230                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1231                           &best_mode))
1232         {
1233           /* The only way this should occur is if the field spans word
1234              boundaries.  */
1235           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1236                                  bitregion_start, bitregion_end,
1237                                  value, value_mode, reverse);
1238           return;
1239         }
1240
1241       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1242     }
1243   else
1244     best_mode = op0_mode.require ();
1245
1246   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1247                            value, value_mode, reverse);
1248 }
1249
1250 /* Helper function for store_fixed_bit_field, stores
1251    the bit field always using MODE, which is the mode of OP0.  The other
1252    arguments are as for store_fixed_bit_field.  */
1253
1254 static void
1255 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1256                          unsigned HOST_WIDE_INT bitsize,
1257                          unsigned HOST_WIDE_INT bitnum,
1258                          rtx value, scalar_int_mode value_mode, bool reverse)
1259 {
1260   rtx temp;
1261   int all_zero = 0;
1262   int all_one = 0;
1263
1264   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1265      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1266
1267   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1268     /* BITNUM is the distance between our msb
1269        and that of the containing datum.
1270        Convert it to the distance from the lsb.  */
1271     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1272
1273   /* Now BITNUM is always the distance between our lsb
1274      and that of OP0.  */
1275
1276   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1277      we must first convert its mode to MODE.  */
1278
1279   if (CONST_INT_P (value))
1280     {
1281       unsigned HOST_WIDE_INT v = UINTVAL (value);
1282
1283       if (bitsize < HOST_BITS_PER_WIDE_INT)
1284         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1285
1286       if (v == 0)
1287         all_zero = 1;
1288       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1289                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1290                || (bitsize == HOST_BITS_PER_WIDE_INT
1291                    && v == HOST_WIDE_INT_M1U))
1292         all_one = 1;
1293
1294       value = lshift_value (mode, v, bitnum);
1295     }
1296   else
1297     {
1298       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1299                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1300
1301       if (value_mode != mode)
1302         value = convert_to_mode (mode, value, 1);
1303
1304       if (must_and)
1305         value = expand_binop (mode, and_optab, value,
1306                               mask_rtx (mode, 0, bitsize, 0),
1307                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1308       if (bitnum > 0)
1309         value = expand_shift (LSHIFT_EXPR, mode, value,
1310                               bitnum, NULL_RTX, 1);
1311     }
1312
1313   if (reverse)
1314     value = flip_storage_order (mode, value);
1315
1316   /* Now clear the chosen bits in OP0,
1317      except that if VALUE is -1 we need not bother.  */
1318   /* We keep the intermediates in registers to allow CSE to combine
1319      consecutive bitfield assignments.  */
1320
1321   temp = force_reg (mode, op0);
1322
1323   if (! all_one)
1324     {
1325       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1326       if (reverse)
1327         mask = flip_storage_order (mode, mask);
1328       temp = expand_binop (mode, and_optab, temp, mask,
1329                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1330       temp = force_reg (mode, temp);
1331     }
1332
1333   /* Now logical-or VALUE into OP0, unless it is zero.  */
1334
1335   if (! all_zero)
1336     {
1337       temp = expand_binop (mode, ior_optab, temp, value,
1338                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1339       temp = force_reg (mode, temp);
1340     }
1341
1342   if (op0 != temp)
1343     {
1344       op0 = copy_rtx (op0);
1345       emit_move_insn (op0, temp);
1346     }
1347 }
1348 \f
1349 /* Store a bit field that is split across multiple accessible memory objects.
1350
1351    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1352    BITSIZE is the field width; BITPOS the position of its first bit
1353    (within the word).
1354    VALUE is the value to store, which has mode VALUE_MODE.
1355    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1356    a BLKmode MEM.
1357
1358    If REVERSE is true, the store is to be done in reverse order.
1359
1360    This does not yet handle fields wider than BITS_PER_WORD.  */
1361
1362 static void
1363 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1364                        unsigned HOST_WIDE_INT bitsize,
1365                        unsigned HOST_WIDE_INT bitpos,
1366                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1367                        rtx value, scalar_int_mode value_mode, bool reverse)
1368 {
1369   unsigned int unit, total_bits, bitsdone = 0;
1370
1371   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1372      much at a time.  */
1373   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1374     unit = BITS_PER_WORD;
1375   else
1376     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1377
1378   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1379      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1380      again, and we will mutually recurse forever.  */
1381   if (MEM_P (op0) && op0_mode.exists ())
1382     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1383
1384   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1385      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1386      that VALUE might be a floating-point constant.  */
1387   if (CONSTANT_P (value) && !CONST_INT_P (value))
1388     {
1389       rtx word = gen_lowpart_common (word_mode, value);
1390
1391       if (word && (value != word))
1392         value = word;
1393       else
1394         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1395       value_mode = word_mode;
1396     }
1397
1398   total_bits = GET_MODE_BITSIZE (value_mode);
1399
1400   while (bitsdone < bitsize)
1401     {
1402       unsigned HOST_WIDE_INT thissize;
1403       unsigned HOST_WIDE_INT thispos;
1404       unsigned HOST_WIDE_INT offset;
1405       rtx part;
1406
1407       offset = (bitpos + bitsdone) / unit;
1408       thispos = (bitpos + bitsdone) % unit;
1409
1410       /* When region of bytes we can touch is restricted, decrease
1411          UNIT close to the end of the region as needed.  If op0 is a REG
1412          or SUBREG of REG, don't do this, as there can't be data races
1413          on a register and we can expand shorter code in some cases.  */
1414       if (maybe_ne (bitregion_end, 0U)
1415           && unit > BITS_PER_UNIT
1416           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1417           && !REG_P (op0)
1418           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1419         {
1420           unit = unit / 2;
1421           continue;
1422         }
1423
1424       /* THISSIZE must not overrun a word boundary.  Otherwise,
1425          store_fixed_bit_field will call us again, and we will mutually
1426          recurse forever.  */
1427       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1428       thissize = MIN (thissize, unit - thispos);
1429
1430       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1431         {
1432           /* Fetch successively less significant portions.  */
1433           if (CONST_INT_P (value))
1434             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1435                              >> (bitsize - bitsdone - thissize))
1436                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1437           /* Likewise, but the source is little-endian.  */
1438           else if (reverse)
1439             part = extract_fixed_bit_field (word_mode, value, value_mode,
1440                                             thissize,
1441                                             bitsize - bitsdone - thissize,
1442                                             NULL_RTX, 1, false);
1443           else
1444             /* The args are chosen so that the last part includes the
1445                lsb.  Give extract_bit_field the value it needs (with
1446                endianness compensation) to fetch the piece we want.  */
1447             part = extract_fixed_bit_field (word_mode, value, value_mode,
1448                                             thissize,
1449                                             total_bits - bitsize + bitsdone,
1450                                             NULL_RTX, 1, false);
1451         }
1452       else
1453         {
1454           /* Fetch successively more significant portions.  */
1455           if (CONST_INT_P (value))
1456             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1457                              >> bitsdone)
1458                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1459           /* Likewise, but the source is big-endian.  */
1460           else if (reverse)
1461             part = extract_fixed_bit_field (word_mode, value, value_mode,
1462                                             thissize,
1463                                             total_bits - bitsdone - thissize,
1464                                             NULL_RTX, 1, false);
1465           else
1466             part = extract_fixed_bit_field (word_mode, value, value_mode,
1467                                             thissize, bitsdone, NULL_RTX,
1468                                             1, false);
1469         }
1470
1471       /* If OP0 is a register, then handle OFFSET here.  */
1472       rtx op0_piece = op0;
1473       opt_scalar_int_mode op0_piece_mode = op0_mode;
1474       if (SUBREG_P (op0) || REG_P (op0))
1475         {
1476           scalar_int_mode imode;
1477           if (op0_mode.exists (&imode)
1478               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1479             {
1480               if (offset)
1481                 op0_piece = const0_rtx;
1482             }
1483           else
1484             {
1485               op0_piece = operand_subword_force (op0,
1486                                                  offset * unit / BITS_PER_WORD,
1487                                                  GET_MODE (op0));
1488               op0_piece_mode = word_mode;
1489             }
1490           offset &= BITS_PER_WORD / unit - 1;
1491         }
1492
1493       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1494          it is just an out-of-bounds access.  Ignore it.  */
1495       if (op0_piece != const0_rtx)
1496         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1497                                offset * unit + thispos, bitregion_start,
1498                                bitregion_end, part, word_mode, reverse);
1499       bitsdone += thissize;
1500     }
1501 }
1502 \f
1503 /* A subroutine of extract_bit_field_1 that converts return value X
1504    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1505    to extract_bit_field.  */
1506
1507 static rtx
1508 convert_extracted_bit_field (rtx x, machine_mode mode,
1509                              machine_mode tmode, bool unsignedp)
1510 {
1511   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1512     return x;
1513
1514   /* If the x mode is not a scalar integral, first convert to the
1515      integer mode of that size and then access it as a floating-point
1516      value via a SUBREG.  */
1517   if (!SCALAR_INT_MODE_P (tmode))
1518     {
1519       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1520       x = convert_to_mode (int_mode, x, unsignedp);
1521       x = force_reg (int_mode, x);
1522       return gen_lowpart (tmode, x);
1523     }
1524
1525   return convert_to_mode (tmode, x, unsignedp);
1526 }
1527
1528 /* Try to use an ext(z)v pattern to extract a field from OP0.
1529    Return the extracted value on success, otherwise return null.
1530    EXTV describes the extraction instruction to use.  If OP0_MODE
1531    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1532    The other arguments are as for extract_bit_field.  */
1533
1534 static rtx
1535 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1536                               opt_scalar_int_mode op0_mode,
1537                               unsigned HOST_WIDE_INT bitsize,
1538                               unsigned HOST_WIDE_INT bitnum,
1539                               int unsignedp, rtx target,
1540                               machine_mode mode, machine_mode tmode)
1541 {
1542   class expand_operand ops[4];
1543   rtx spec_target = target;
1544   rtx spec_target_subreg = 0;
1545   scalar_int_mode ext_mode = extv->field_mode;
1546   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1547
1548   if (bitsize == 0 || unit < bitsize)
1549     return NULL_RTX;
1550
1551   if (MEM_P (op0))
1552     /* Get a reference to the first byte of the field.  */
1553     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1554                                 &bitnum);
1555   else
1556     {
1557       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1558       if (BYTES_BIG_ENDIAN)
1559         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1560
1561       /* If op0 is a register, we need it in EXT_MODE to make it
1562          acceptable to the format of ext(z)v.  */
1563       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1564         return NULL_RTX;
1565       if (REG_P (op0) && op0_mode.require () != ext_mode)
1566         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1567     }
1568
1569   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1570      "backwards" from the size of the unit we are extracting from.
1571      Otherwise, we count bits from the most significant on a
1572      BYTES/BITS_BIG_ENDIAN machine.  */
1573
1574   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1575     bitnum = unit - bitsize - bitnum;
1576
1577   if (target == 0)
1578     target = spec_target = gen_reg_rtx (tmode);
1579
1580   if (GET_MODE (target) != ext_mode)
1581     {
1582       rtx temp;
1583       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1584          between the mode of the extraction (word_mode) and the target
1585          mode.  Instead, create a temporary and use convert_move to set
1586          the target.  */
1587       if (REG_P (target)
1588           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1589           && (temp = gen_lowpart_if_possible (ext_mode, target)))
1590         {
1591           target = temp;
1592           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1593             spec_target_subreg = target;
1594         }
1595       else
1596         target = gen_reg_rtx (ext_mode);
1597     }
1598
1599   create_output_operand (&ops[0], target, ext_mode);
1600   create_fixed_operand (&ops[1], op0);
1601   create_integer_operand (&ops[2], bitsize);
1602   create_integer_operand (&ops[3], bitnum);
1603   if (maybe_expand_insn (extv->icode, 4, ops))
1604     {
1605       target = ops[0].value;
1606       if (target == spec_target)
1607         return target;
1608       if (target == spec_target_subreg)
1609         return spec_target;
1610       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1611     }
1612   return NULL_RTX;
1613 }
1614
1615 /* See whether it would be valid to extract the part of OP0 with
1616    mode OP0_MODE described by BITNUM and BITSIZE into a value of
1617    mode MODE using a subreg operation.
1618    Return the subreg if so, otherwise return null.  */
1619
1620 static rtx
1621 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1622                              machine_mode op0_mode,
1623                              poly_uint64 bitsize, poly_uint64 bitnum)
1624 {
1625   poly_uint64 bytenum;
1626   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1627       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1628       && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1629       && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1630     return simplify_gen_subreg (mode, op0, op0_mode, bytenum);
1631   return NULL_RTX;
1632 }
1633
1634 /* A subroutine of extract_bit_field, with the same arguments.
1635    If UNSIGNEDP is -1, the result need not be sign or zero extended.
1636    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1637    if we can find no other means of implementing the operation.
1638    if FALLBACK_P is false, return NULL instead.  */
1639
1640 static rtx
1641 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1642                      int unsignedp, rtx target, machine_mode mode,
1643                      machine_mode tmode, bool reverse, bool fallback_p,
1644                      rtx *alt_rtl)
1645 {
1646   rtx op0 = str_rtx;
1647   machine_mode mode1;
1648
1649   if (tmode == VOIDmode)
1650     tmode = mode;
1651
1652   while (GET_CODE (op0) == SUBREG)
1653     {
1654       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1655       op0 = SUBREG_REG (op0);
1656     }
1657
1658   /* If we have an out-of-bounds access to a register, just return an
1659      uninitialized register of the required mode.  This can occur if the
1660      source code contains an out-of-bounds access to a small array.  */
1661   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1662     return gen_reg_rtx (tmode);
1663
1664   if (REG_P (op0)
1665       && mode == GET_MODE (op0)
1666       && known_eq (bitnum, 0U)
1667       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1668     {
1669       if (reverse)
1670         op0 = flip_storage_order (mode, op0);
1671       /* We're trying to extract a full register from itself.  */
1672       return op0;
1673     }
1674
1675   /* First try to check for vector from vector extractions.  */
1676   if (VECTOR_MODE_P (GET_MODE (op0))
1677       && !MEM_P (op0)
1678       && VECTOR_MODE_P (tmode)
1679       && known_eq (bitsize, GET_MODE_PRECISION (tmode))
1680       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1681     {
1682       machine_mode new_mode = GET_MODE (op0);
1683       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1684         {
1685           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1686           poly_uint64 nunits;
1687           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1688                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1689               || !related_vector_mode (tmode, inner_mode,
1690                                        nunits).exists (&new_mode)
1691               || maybe_ne (GET_MODE_SIZE (new_mode),
1692                            GET_MODE_SIZE (GET_MODE (op0))))
1693             new_mode = VOIDmode;
1694         }
1695       poly_uint64 pos;
1696       if (new_mode != VOIDmode
1697           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1698               != CODE_FOR_nothing)
1699           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1700         {
1701           class expand_operand ops[3];
1702           machine_mode outermode = new_mode;
1703           machine_mode innermode = tmode;
1704           enum insn_code icode
1705             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1706
1707           if (new_mode != GET_MODE (op0))
1708             op0 = gen_lowpart (new_mode, op0);
1709           create_output_operand (&ops[0], target, innermode);
1710           ops[0].target = 1;
1711           create_input_operand (&ops[1], op0, outermode);
1712           create_integer_operand (&ops[2], pos);
1713           if (maybe_expand_insn (icode, 3, ops))
1714             {
1715               if (alt_rtl && ops[0].target)
1716                 *alt_rtl = target;
1717               target = ops[0].value;
1718               if (GET_MODE (target) != mode)
1719                 return gen_lowpart (tmode, target);
1720               return target;
1721             }
1722         }
1723     }
1724
1725   /* See if we can get a better vector mode before extracting.  */
1726   if (VECTOR_MODE_P (GET_MODE (op0))
1727       && !MEM_P (op0)
1728       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1729     {
1730       machine_mode new_mode;
1731
1732       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1733         new_mode = MIN_MODE_VECTOR_FLOAT;
1734       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1735         new_mode = MIN_MODE_VECTOR_FRACT;
1736       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1737         new_mode = MIN_MODE_VECTOR_UFRACT;
1738       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1739         new_mode = MIN_MODE_VECTOR_ACCUM;
1740       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1741         new_mode = MIN_MODE_VECTOR_UACCUM;
1742       else
1743         new_mode = MIN_MODE_VECTOR_INT;
1744
1745       FOR_EACH_MODE_FROM (new_mode, new_mode)
1746         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1747             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1748             && known_eq (bitsize, GET_MODE_UNIT_PRECISION (new_mode))
1749             && multiple_p (bitnum, GET_MODE_UNIT_PRECISION (new_mode))
1750             && targetm.vector_mode_supported_p (new_mode)
1751             && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1752           break;
1753       if (new_mode != VOIDmode)
1754         op0 = gen_lowpart (new_mode, op0);
1755     }
1756
1757   /* Use vec_extract patterns for extracting parts of vectors whenever
1758      available.  If that fails, see whether the current modes and bitregion
1759      give a natural subreg.  */
1760   machine_mode outermode = GET_MODE (op0);
1761   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1762     {
1763       scalar_mode innermode = GET_MODE_INNER (outermode);
1764
1765       enum insn_code icode
1766         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1767
1768       poly_uint64 pos;
1769       if (icode != CODE_FOR_nothing
1770           && known_eq (bitsize, GET_MODE_PRECISION (innermode))
1771           && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
1772         {
1773           class expand_operand ops[3];
1774
1775           create_output_operand (&ops[0], target,
1776                                  insn_data[icode].operand[0].mode);
1777           ops[0].target = 1;
1778           create_input_operand (&ops[1], op0, outermode);
1779           create_integer_operand (&ops[2], pos);
1780           if (maybe_expand_insn (icode, 3, ops))
1781             {
1782               if (alt_rtl && ops[0].target)
1783                 *alt_rtl = target;
1784               target = ops[0].value;
1785               if (GET_MODE (target) != mode)
1786                 return gen_lowpart (tmode, target);
1787               return target;
1788             }
1789         }
1790       /* Using subregs is useful if we're extracting one register vector
1791          from a multi-register vector.  extract_bit_field_as_subreg checks
1792          for valid bitsize and bitnum, so we don't need to do that here.  */
1793       if (VECTOR_MODE_P (mode))
1794         {
1795           rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1796                                                  bitsize, bitnum);
1797           if (sub)
1798             return sub;
1799         }
1800     }
1801
1802   /* Make sure we are playing with integral modes.  Pun with subregs
1803      if we aren't.  */
1804   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1805   scalar_int_mode imode;
1806   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1807     {
1808       if (MEM_P (op0))
1809         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1810                                             0, MEM_SIZE (op0));
1811       else if (op0_mode.exists (&imode))
1812         {
1813           op0 = gen_lowpart (imode, op0);
1814
1815           /* If we got a SUBREG, force it into a register since we
1816              aren't going to be able to do another SUBREG on it.  */
1817           if (GET_CODE (op0) == SUBREG)
1818             op0 = force_reg (imode, op0);
1819         }
1820       else
1821         {
1822           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1823           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1824           emit_move_insn (mem, op0);
1825           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1826         }
1827     }
1828
1829   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1830      If that's wrong, the solution is to test for it and set TARGET to 0
1831      if needed.  */
1832
1833   /* Get the mode of the field to use for atomic access or subreg
1834      conversion.  */
1835   if (!SCALAR_INT_MODE_P (tmode)
1836       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1837     mode1 = mode;
1838   gcc_assert (mode1 != BLKmode);
1839
1840   /* Extraction of a full MODE1 value can be done with a subreg as long
1841      as the least significant bit of the value is the least significant
1842      bit of either OP0 or a word of OP0.  */
1843   if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1844     {
1845       rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1846                                              bitsize, bitnum);
1847       if (sub)
1848         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1849     }
1850
1851   /* Extraction of a full MODE1 value can be done with a load as long as
1852      the field is on a byte boundary and is sufficiently aligned.  */
1853   poly_uint64 bytenum;
1854   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1855     {
1856       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1857       if (reverse)
1858         op0 = flip_storage_order (mode1, op0);
1859       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1860     }
1861
1862   /* If we have a memory source and a non-constant bit offset, restrict
1863      the memory to the referenced bytes.  This is a worst-case fallback
1864      but is useful for things like vector booleans.  */
1865   if (MEM_P (op0) && !bitnum.is_constant ())
1866     {
1867       bytenum = bits_to_bytes_round_down (bitnum);
1868       bitnum = num_trailing_bits (bitnum);
1869       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1870       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1871       op0_mode = opt_scalar_int_mode ();
1872     }
1873
1874   /* It's possible we'll need to handle other cases here for
1875      polynomial bitnum and bitsize.  */
1876
1877   /* From here on we need to be looking at a fixed-size insertion.  */
1878   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1879                                      bitnum.to_constant (), unsignedp,
1880                                      target, mode, tmode, reverse, fallback_p);
1881 }
1882
1883 /* Subroutine of extract_bit_field_1, with the same arguments, except
1884    that BITSIZE and BITNUM are constant.  Handle cases specific to
1885    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1886    otherwise OP0 is a BLKmode MEM.  */
1887
1888 static rtx
1889 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1890                             unsigned HOST_WIDE_INT bitsize,
1891                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1892                             rtx target, machine_mode mode, machine_mode tmode,
1893                             bool reverse, bool fallback_p)
1894 {
1895   /* Handle fields bigger than a word.  */
1896
1897   if (bitsize > BITS_PER_WORD)
1898     {
1899       /* Here we transfer the words of the field
1900          in the order least significant first.
1901          This is because the most significant word is the one which may
1902          be less than full.  */
1903
1904       const bool backwards = WORDS_BIG_ENDIAN;
1905       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1906       unsigned int i;
1907       rtx_insn *last;
1908
1909       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1910         target = gen_reg_rtx (mode);
1911
1912       /* In case we're about to clobber a base register or something
1913          (see gcc.c-torture/execute/20040625-1.c).   */
1914       if (reg_mentioned_p (target, op0))
1915         target = gen_reg_rtx (mode);
1916
1917       /* Indicate for flow that the entire target reg is being set.  */
1918       emit_clobber (target);
1919
1920       /* The mode must be fixed-size, since extract_bit_field_1 handles
1921          extractions from variable-sized objects before calling this
1922          function.  */
1923       unsigned int target_size
1924         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1925       last = get_last_insn ();
1926       for (i = 0; i < nwords; i++)
1927         {
1928           /* If I is 0, use the low-order word in both field and target;
1929              if I is 1, use the next to lowest word; and so on.  */
1930           /* Word number in TARGET to use.  */
1931           unsigned int wordnum
1932             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1933           /* Offset from start of field in OP0.  */
1934           unsigned int bit_offset = (backwards ^ reverse
1935                                      ? MAX ((int) bitsize - ((int) i + 1)
1936                                             * BITS_PER_WORD,
1937                                             0)
1938                                      : (int) i * BITS_PER_WORD);
1939           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1940           rtx result_part
1941             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1942                                              bitsize - i * BITS_PER_WORD),
1943                                    bitnum + bit_offset,
1944                                    (unsignedp ? 1 : -1), target_part,
1945                                    mode, word_mode, reverse, fallback_p, NULL);
1946
1947           gcc_assert (target_part);
1948           if (!result_part)
1949             {
1950               delete_insns_since (last);
1951               return NULL;
1952             }
1953
1954           if (result_part != target_part)
1955             emit_move_insn (target_part, result_part);
1956         }
1957
1958       if (unsignedp)
1959         {
1960           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1961              need to be zero'd out.  */
1962           if (target_size > nwords * UNITS_PER_WORD)
1963             {
1964               unsigned int i, total_words;
1965
1966               total_words = target_size / UNITS_PER_WORD;
1967               for (i = nwords; i < total_words; i++)
1968                 emit_move_insn
1969                   (operand_subword (target,
1970                                     backwards ? total_words - i - 1 : i,
1971                                     1, VOIDmode),
1972                    const0_rtx);
1973             }
1974           return target;
1975         }
1976
1977       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1978       target = expand_shift (LSHIFT_EXPR, mode, target,
1979                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1980       return expand_shift (RSHIFT_EXPR, mode, target,
1981                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1982     }
1983
1984   /* If OP0 is a multi-word register, narrow it to the affected word.
1985      If the region spans two words, defer to extract_split_bit_field.  */
1986   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1987     {
1988       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1989         {
1990           if (!fallback_p)
1991             return NULL_RTX;
1992           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1993                                             unsignedp, reverse);
1994           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1995         }
1996       /* If OP0 is a hard register, copy it to a pseudo before calling
1997          simplify_gen_subreg.  */
1998       if (REG_P (op0) && HARD_REGISTER_P (op0))
1999         op0 = copy_to_reg (op0);
2000       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
2001                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
2002       op0_mode = word_mode;
2003       bitnum %= BITS_PER_WORD;
2004     }
2005
2006   /* From here on we know the desired field is smaller than a word.
2007      If OP0 is a register, it too fits within a word.  */
2008   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
2009   extraction_insn extv;
2010   if (!MEM_P (op0)
2011       && !reverse
2012       /* ??? We could limit the structure size to the part of OP0 that
2013          contains the field, with appropriate checks for endianness
2014          and TARGET_TRULY_NOOP_TRUNCATION.  */
2015       && get_best_reg_extraction_insn (&extv, pattern,
2016                                        GET_MODE_BITSIZE (op0_mode.require ()),
2017                                        tmode))
2018     {
2019       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2020                                                  bitsize, bitnum,
2021                                                  unsignedp, target, mode,
2022                                                  tmode);
2023       if (result)
2024         return result;
2025     }
2026
2027   /* If OP0 is a memory, try copying it to a register and seeing if a
2028      cheap register alternative is available.  */
2029   if (MEM_P (op0) & !reverse)
2030     {
2031       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2032                                         tmode))
2033         {
2034           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2035                                                      bitsize, bitnum,
2036                                                      unsignedp, target, mode,
2037                                                      tmode);
2038           if (result)
2039             return result;
2040         }
2041
2042       rtx_insn *last = get_last_insn ();
2043
2044       /* Try loading part of OP0 into a register and extracting the
2045          bitfield from that.  */
2046       unsigned HOST_WIDE_INT bitpos;
2047       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2048                                                0, 0, tmode, &bitpos);
2049       if (xop0)
2050         {
2051           xop0 = copy_to_reg (xop0);
2052           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2053                                             unsignedp, target,
2054                                             mode, tmode, reverse, false, NULL);
2055           if (result)
2056             return result;
2057           delete_insns_since (last);
2058         }
2059     }
2060
2061   if (!fallback_p)
2062     return NULL;
2063
2064   /* Find a correspondingly-sized integer field, so we can apply
2065      shifts and masks to it.  */
2066   scalar_int_mode int_mode;
2067   if (!int_mode_for_mode (tmode).exists (&int_mode))
2068     /* If this fails, we should probably push op0 out to memory and then
2069        do a load.  */
2070     int_mode = int_mode_for_mode (mode).require ();
2071
2072   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2073                                     bitnum, target, unsignedp, reverse);
2074
2075   /* Complex values must be reversed piecewise, so we need to undo the global
2076      reversal, convert to the complex mode and reverse again.  */
2077   if (reverse && COMPLEX_MODE_P (tmode))
2078     {
2079       target = flip_storage_order (int_mode, target);
2080       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2081       target = flip_storage_order (tmode, target);
2082     }
2083   else
2084     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2085
2086   return target;
2087 }
2088
2089 /* Generate code to extract a byte-field from STR_RTX
2090    containing BITSIZE bits, starting at BITNUM,
2091    and put it in TARGET if possible (if TARGET is nonzero).
2092    Regardless of TARGET, we return the rtx for where the value is placed.
2093
2094    STR_RTX is the structure containing the byte (a REG or MEM).
2095    UNSIGNEDP is nonzero if this is an unsigned bit field.
2096    MODE is the natural mode of the field value once extracted.
2097    TMODE is the mode the caller would like the value to have;
2098    but the value may be returned with type MODE instead.
2099
2100    If REVERSE is true, the extraction is to be done in reverse order.
2101
2102    If a TARGET is specified and we can store in it at no extra cost,
2103    we do so, and return TARGET.
2104    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2105    if they are equally easy.
2106
2107    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2108    then *ALT_RTL is set to TARGET (before legitimziation).  */
2109
2110 rtx
2111 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2112                    int unsignedp, rtx target, machine_mode mode,
2113                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2114 {
2115   machine_mode mode1;
2116
2117   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2118   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2119     mode1 = GET_MODE (str_rtx);
2120   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2121     mode1 = GET_MODE (target);
2122   else
2123     mode1 = tmode;
2124
2125   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2126   scalar_int_mode int_mode;
2127   if (bitsize.is_constant (&ibitsize)
2128       && bitnum.is_constant (&ibitnum)
2129       && is_a <scalar_int_mode> (mode1, &int_mode)
2130       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2131                                      int_mode, 0, 0))
2132     {
2133       /* Extraction of a full INT_MODE value can be done with a simple load.
2134          We know here that the field can be accessed with one single
2135          instruction.  For targets that support unaligned memory,
2136          an unaligned access may be necessary.  */
2137       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2138         {
2139           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2140                                                 ibitnum / BITS_PER_UNIT);
2141           if (reverse)
2142             result = flip_storage_order (int_mode, result);
2143           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2144           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2145         }
2146
2147       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2148                                       &ibitnum);
2149       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2150       str_rtx = copy_to_reg (str_rtx);
2151       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2152                                   target, mode, tmode, reverse, true, alt_rtl);
2153     }
2154
2155   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2156                               target, mode, tmode, reverse, true, alt_rtl);
2157 }
2158 \f
2159 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2160    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2161    otherwise OP0 is a BLKmode MEM.
2162
2163    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2164    If REVERSE is true, the extraction is to be done in reverse order.
2165
2166    If TARGET is nonzero, attempts to store the value there
2167    and return TARGET, but this is not guaranteed.
2168    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2169
2170 static rtx
2171 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2172                          opt_scalar_int_mode op0_mode,
2173                          unsigned HOST_WIDE_INT bitsize,
2174                          unsigned HOST_WIDE_INT bitnum, rtx target,
2175                          int unsignedp, bool reverse)
2176 {
2177   scalar_int_mode mode;
2178   if (MEM_P (op0))
2179     {
2180       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2181                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2182         /* The only way this should occur is if the field spans word
2183            boundaries.  */
2184         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2185                                         unsignedp, reverse);
2186
2187       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2188     }
2189   else
2190     mode = op0_mode.require ();
2191
2192   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2193                                     target, unsignedp, reverse);
2194 }
2195
2196 /* Helper function for extract_fixed_bit_field, extracts
2197    the bit field always using MODE, which is the mode of OP0.
2198    If UNSIGNEDP is -1, the result need not be sign or zero extended.
2199    The other arguments are as for extract_fixed_bit_field.  */
2200
2201 static rtx
2202 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2203                            unsigned HOST_WIDE_INT bitsize,
2204                            unsigned HOST_WIDE_INT bitnum, rtx target,
2205                            int unsignedp, bool reverse)
2206 {
2207   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2208      for invalid input, such as extract equivalent of f5 from
2209      gcc.dg/pr48335-2.c.  */
2210
2211   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2212     /* BITNUM is the distance between our msb and that of OP0.
2213        Convert it to the distance from the lsb.  */
2214     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2215
2216   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2217      We have reduced the big-endian case to the little-endian case.  */
2218   if (reverse)
2219     op0 = flip_storage_order (mode, op0);
2220
2221   if (unsignedp)
2222     {
2223       if (bitnum)
2224         {
2225           /* If the field does not already start at the lsb,
2226              shift it so it does.  */
2227           /* Maybe propagate the target for the shift.  */
2228           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2229           if (tmode != mode)
2230             subtarget = 0;
2231           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2232         }
2233       /* Convert the value to the desired mode.  TMODE must also be a
2234          scalar integer for this conversion to make sense, since we
2235          shouldn't reinterpret the bits.  */
2236       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2237       if (mode != new_mode)
2238         op0 = convert_to_mode (new_mode, op0, 1);
2239
2240       /* Unless the msb of the field used to be the msb when we shifted,
2241          mask out the upper bits.  */
2242
2243       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize
2244           && unsignedp != -1)
2245         return expand_binop (new_mode, and_optab, op0,
2246                              mask_rtx (new_mode, 0, bitsize, 0),
2247                              target, 1, OPTAB_LIB_WIDEN);
2248       return op0;
2249     }
2250
2251   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2252      then arithmetic-shift its lsb to the lsb of the word.  */
2253   op0 = force_reg (mode, op0);
2254
2255   /* Find the narrowest integer mode that contains the field.  */
2256
2257   opt_scalar_int_mode mode_iter;
2258   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2259     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2260       break;
2261
2262   mode = mode_iter.require ();
2263   op0 = convert_to_mode (mode, op0, 0);
2264
2265   if (mode != tmode)
2266     target = 0;
2267
2268   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2269     {
2270       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2271       /* Maybe propagate the target for the shift.  */
2272       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2273       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2274     }
2275
2276   return expand_shift (RSHIFT_EXPR, mode, op0,
2277                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2278 }
2279
2280 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2281    VALUE << BITPOS.  */
2282
2283 static rtx
2284 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2285               int bitpos)
2286 {
2287   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2288 }
2289 \f
2290 /* Extract a bit field that is split across two words
2291    and return an RTX for the result.
2292
2293    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2294    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2295    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2296    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2297    a BLKmode MEM.
2298
2299    If REVERSE is true, the extraction is to be done in reverse order.  */
2300
2301 static rtx
2302 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2303                          unsigned HOST_WIDE_INT bitsize,
2304                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2305                          bool reverse)
2306 {
2307   unsigned int unit;
2308   unsigned int bitsdone = 0;
2309   rtx result = NULL_RTX;
2310   int first = 1;
2311
2312   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2313      much at a time.  */
2314   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2315     unit = BITS_PER_WORD;
2316   else
2317     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2318
2319   while (bitsdone < bitsize)
2320     {
2321       unsigned HOST_WIDE_INT thissize;
2322       rtx part;
2323       unsigned HOST_WIDE_INT thispos;
2324       unsigned HOST_WIDE_INT offset;
2325
2326       offset = (bitpos + bitsdone) / unit;
2327       thispos = (bitpos + bitsdone) % unit;
2328
2329       /* THISSIZE must not overrun a word boundary.  Otherwise,
2330          extract_fixed_bit_field will call us again, and we will mutually
2331          recurse forever.  */
2332       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2333       thissize = MIN (thissize, unit - thispos);
2334
2335       /* If OP0 is a register, then handle OFFSET here.  */
2336       rtx op0_piece = op0;
2337       opt_scalar_int_mode op0_piece_mode = op0_mode;
2338       if (SUBREG_P (op0) || REG_P (op0))
2339         {
2340           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2341           op0_piece_mode = word_mode;
2342           offset = 0;
2343         }
2344
2345       /* Extract the parts in bit-counting order,
2346          whose meaning is determined by BYTES_PER_UNIT.
2347          OFFSET is in UNITs, and UNIT is in bits.  */
2348       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2349                                       thissize, offset * unit + thispos,
2350                                       0, 1, reverse);
2351       bitsdone += thissize;
2352
2353       /* Shift this part into place for the result.  */
2354       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2355         {
2356           if (bitsize != bitsdone)
2357             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2358                                  bitsize - bitsdone, 0, 1);
2359         }
2360       else
2361         {
2362           if (bitsdone != thissize)
2363             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2364                                  bitsdone - thissize, 0, 1);
2365         }
2366
2367       if (first)
2368         result = part;
2369       else
2370         /* Combine the parts with bitwise or.  This works
2371            because we extracted each part as an unsigned bit field.  */
2372         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2373                                OPTAB_LIB_WIDEN);
2374
2375       first = 0;
2376     }
2377
2378   /* Unsigned bit field: we are done.  */
2379   if (unsignedp)
2380     return result;
2381   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2382   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2383                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2384   return expand_shift (RSHIFT_EXPR, word_mode, result,
2385                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2386 }
2387 \f
2388 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2389    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2390    MODE, fill the upper bits with zeros.  Fail if the layout of either
2391    mode is unknown (as for CC modes) or if the extraction would involve
2392    unprofitable mode punning.  Return the value on success, otherwise
2393    return null.
2394
2395    This is different from gen_lowpart* in these respects:
2396
2397      - the returned value must always be considered an rvalue
2398
2399      - when MODE is wider than SRC_MODE, the extraction involves
2400        a zero extension
2401
2402      - when MODE is smaller than SRC_MODE, the extraction involves
2403        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2404
2405    In other words, this routine performs a computation, whereas the
2406    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2407    operations.  */
2408
2409 rtx
2410 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2411 {
2412   scalar_int_mode int_mode, src_int_mode;
2413
2414   if (mode == src_mode)
2415     return src;
2416
2417   if (CONSTANT_P (src))
2418     {
2419       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2420          fails, it will happily create (subreg (symbol_ref)) or similar
2421          invalid SUBREGs.  */
2422       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2423       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2424       if (ret)
2425         return ret;
2426
2427       if (GET_MODE (src) == VOIDmode
2428           || !validate_subreg (mode, src_mode, src, byte))
2429         return NULL_RTX;
2430
2431       src = force_reg (GET_MODE (src), src);
2432       return gen_rtx_SUBREG (mode, src, byte);
2433     }
2434
2435   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2436     return NULL_RTX;
2437
2438   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2439       && targetm.modes_tieable_p (mode, src_mode))
2440     {
2441       rtx x = gen_lowpart_common (mode, src);
2442       if (x)
2443         return x;
2444     }
2445
2446   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2447       || !int_mode_for_mode (mode).exists (&int_mode))
2448     return NULL_RTX;
2449
2450   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2451     return NULL_RTX;
2452   if (!targetm.modes_tieable_p (int_mode, mode))
2453     return NULL_RTX;
2454
2455   src = gen_lowpart (src_int_mode, src);
2456   if (!validate_subreg (int_mode, src_int_mode, src,
2457                         subreg_lowpart_offset (int_mode, src_int_mode)))
2458     return NULL_RTX;
2459
2460   src = convert_modes (int_mode, src_int_mode, src, true);
2461   src = gen_lowpart (mode, src);
2462   return src;
2463 }
2464 \f
2465 /* Add INC into TARGET.  */
2466
2467 void
2468 expand_inc (rtx target, rtx inc)
2469 {
2470   rtx value = expand_binop (GET_MODE (target), add_optab,
2471                             target, inc,
2472                             target, 0, OPTAB_LIB_WIDEN);
2473   if (value != target)
2474     emit_move_insn (target, value);
2475 }
2476
2477 /* Subtract DEC from TARGET.  */
2478
2479 void
2480 expand_dec (rtx target, rtx dec)
2481 {
2482   rtx value = expand_binop (GET_MODE (target), sub_optab,
2483                             target, dec,
2484                             target, 0, OPTAB_LIB_WIDEN);
2485   if (value != target)
2486     emit_move_insn (target, value);
2487 }
2488 \f
2489 /* Output a shift instruction for expression code CODE,
2490    with SHIFTED being the rtx for the value to shift,
2491    and AMOUNT the rtx for the amount to shift by.
2492    Store the result in the rtx TARGET, if that is convenient.
2493    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2494    Return the rtx for where the value is.
2495    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2496    in which case 0 is returned.  */
2497
2498 static rtx
2499 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2500                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2501 {
2502   rtx op1, temp = 0;
2503   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2504   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2505   optab lshift_optab = ashl_optab;
2506   optab rshift_arith_optab = ashr_optab;
2507   optab rshift_uns_optab = lshr_optab;
2508   optab lrotate_optab = rotl_optab;
2509   optab rrotate_optab = rotr_optab;
2510   machine_mode op1_mode;
2511   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2512   int attempt;
2513   bool speed = optimize_insn_for_speed_p ();
2514
2515   op1 = amount;
2516   op1_mode = GET_MODE (op1);
2517
2518   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2519      shift amount is a vector, use the vector/vector shift patterns.  */
2520   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2521     {
2522       lshift_optab = vashl_optab;
2523       rshift_arith_optab = vashr_optab;
2524       rshift_uns_optab = vlshr_optab;
2525       lrotate_optab = vrotl_optab;
2526       rrotate_optab = vrotr_optab;
2527     }
2528
2529   /* Previously detected shift-counts computed by NEGATE_EXPR
2530      and shifted in the other direction; but that does not work
2531      on all machines.  */
2532
2533   if (SHIFT_COUNT_TRUNCATED)
2534     {
2535       if (CONST_INT_P (op1)
2536           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2537               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2538         op1 = gen_int_shift_amount (mode,
2539                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2540                                     % GET_MODE_BITSIZE (scalar_mode));
2541       else if (GET_CODE (op1) == SUBREG
2542                && subreg_lowpart_p (op1)
2543                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2544                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2545         op1 = SUBREG_REG (op1);
2546     }
2547
2548   /* Canonicalize rotates by constant amount.  We may canonicalize
2549      to reduce the immediate or if the ISA can rotate by constants
2550      in only on direction.  */
2551   if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1))
2552     {
2553       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2554                                          - INTVAL (op1)));
2555       left = !left;
2556       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2557     }
2558
2559   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2560      Note that this is not the case for bigger values.  For instance a rotation
2561      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2562      0x04030201 (bswapsi).  */
2563   if (rotate
2564       && CONST_INT_P (op1)
2565       && INTVAL (op1) == BITS_PER_UNIT
2566       && GET_MODE_SIZE (scalar_mode) == 2
2567       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2568     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2569
2570   if (op1 == const0_rtx)
2571     return shifted;
2572
2573   /* Check whether its cheaper to implement a left shift by a constant
2574      bit count by a sequence of additions.  */
2575   if (code == LSHIFT_EXPR
2576       && CONST_INT_P (op1)
2577       && INTVAL (op1) > 0
2578       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2579       && INTVAL (op1) < MAX_BITS_PER_WORD
2580       && (shift_cost (speed, mode, INTVAL (op1))
2581           > INTVAL (op1) * add_cost (speed, mode))
2582       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2583     {
2584       int i;
2585       for (i = 0; i < INTVAL (op1); i++)
2586         {
2587           temp = force_reg (mode, shifted);
2588           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2589                                   unsignedp, OPTAB_LIB_WIDEN);
2590         }
2591       return shifted;
2592     }
2593
2594   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2595     {
2596       enum optab_methods methods;
2597
2598       if (attempt == 0)
2599         methods = OPTAB_DIRECT;
2600       else if (attempt == 1)
2601         methods = OPTAB_WIDEN;
2602       else
2603         methods = OPTAB_LIB_WIDEN;
2604
2605       if (rotate)
2606         {
2607           /* Widening does not work for rotation.  */
2608           if (methods == OPTAB_WIDEN)
2609             continue;
2610           else if (methods == OPTAB_LIB_WIDEN)
2611             {
2612               /* If we have been unable to open-code this by a rotation,
2613                  do it as the IOR of two shifts.  I.e., to rotate A
2614                  by N bits, compute
2615                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2616                  where C is the bitsize of A.
2617
2618                  It is theoretically possible that the target machine might
2619                  not be able to perform either shift and hence we would
2620                  be making two libcalls rather than just the one for the
2621                  shift (similarly if IOR could not be done).  We will allow
2622                  this extremely unlikely lossage to avoid complicating the
2623                  code below.  */
2624
2625               rtx subtarget = target == shifted ? 0 : target;
2626               rtx new_amount, other_amount;
2627               rtx temp1;
2628
2629               new_amount = op1;
2630               if (op1 == const0_rtx)
2631                 return shifted;
2632               else if (CONST_INT_P (op1))
2633                 other_amount = gen_int_shift_amount
2634                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2635               else
2636                 {
2637                   other_amount
2638                     = simplify_gen_unary (NEG, GET_MODE (op1),
2639                                           op1, GET_MODE (op1));
2640                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2641                   other_amount
2642                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2643                                            gen_int_mode (mask, GET_MODE (op1)));
2644                 }
2645
2646               shifted = force_reg (mode, shifted);
2647
2648               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2649                                      mode, shifted, new_amount, 0, 1);
2650               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2651                                       mode, shifted, other_amount,
2652                                       subtarget, 1);
2653               return expand_binop (mode, ior_optab, temp, temp1, target,
2654                                    unsignedp, methods);
2655             }
2656
2657           temp = expand_binop (mode,
2658                                left ? lrotate_optab : rrotate_optab,
2659                                shifted, op1, target, unsignedp, methods);
2660         }
2661       else if (unsignedp)
2662         temp = expand_binop (mode,
2663                              left ? lshift_optab : rshift_uns_optab,
2664                              shifted, op1, target, unsignedp, methods);
2665
2666       /* Do arithmetic shifts.
2667          Also, if we are going to widen the operand, we can just as well
2668          use an arithmetic right-shift instead of a logical one.  */
2669       if (temp == 0 && ! rotate
2670           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2671         {
2672           enum optab_methods methods1 = methods;
2673
2674           /* If trying to widen a log shift to an arithmetic shift,
2675              don't accept an arithmetic shift of the same size.  */
2676           if (unsignedp)
2677             methods1 = OPTAB_MUST_WIDEN;
2678
2679           /* Arithmetic shift */
2680
2681           temp = expand_binop (mode,
2682                                left ? lshift_optab : rshift_arith_optab,
2683                                shifted, op1, target, unsignedp, methods1);
2684         }
2685
2686       /* We used to try extzv here for logical right shifts, but that was
2687          only useful for one machine, the VAX, and caused poor code
2688          generation there for lshrdi3, so the code was deleted and a
2689          define_expand for lshrsi3 was added to vax.md.  */
2690     }
2691
2692   gcc_assert (temp != NULL_RTX || may_fail);
2693   return temp;
2694 }
2695
2696 /* Output a shift instruction for expression code CODE,
2697    with SHIFTED being the rtx for the value to shift,
2698    and AMOUNT the amount to shift by.
2699    Store the result in the rtx TARGET, if that is convenient.
2700    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2701    Return the rtx for where the value is.  */
2702
2703 rtx
2704 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2705               poly_int64 amount, rtx target, int unsignedp)
2706 {
2707   return expand_shift_1 (code, mode, shifted,
2708                          gen_int_shift_amount (mode, amount),
2709                          target, unsignedp);
2710 }
2711
2712 /* Likewise, but return 0 if that cannot be done.  */
2713
2714 rtx
2715 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2716                     int amount, rtx target, int unsignedp)
2717 {
2718   return expand_shift_1 (code, mode,
2719                          shifted, GEN_INT (amount), target, unsignedp, true);
2720 }
2721
2722 /* Output a shift instruction for expression code CODE,
2723    with SHIFTED being the rtx for the value to shift,
2724    and AMOUNT the tree for the amount to shift by.
2725    Store the result in the rtx TARGET, if that is convenient.
2726    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2727    Return the rtx for where the value is.  */
2728
2729 rtx
2730 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2731                        tree amount, rtx target, int unsignedp)
2732 {
2733   return expand_shift_1 (code, mode,
2734                          shifted, expand_normal (amount), target, unsignedp);
2735 }
2736
2737 \f
2738 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2739                         const struct mult_cost *, machine_mode mode);
2740 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2741                               const struct algorithm *, enum mult_variant);
2742 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2743 static rtx extract_high_half (scalar_int_mode, rtx);
2744 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2745 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2746                                        int, int);
2747 /* Compute and return the best algorithm for multiplying by T.
2748    The algorithm must cost less than cost_limit
2749    If retval.cost >= COST_LIMIT, no algorithm was found and all
2750    other field of the returned struct are undefined.
2751    MODE is the machine mode of the multiplication.  */
2752
2753 static void
2754 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2755             const struct mult_cost *cost_limit, machine_mode mode)
2756 {
2757   int m;
2758   struct algorithm *alg_in, *best_alg;
2759   struct mult_cost best_cost;
2760   struct mult_cost new_limit;
2761   int op_cost, op_latency;
2762   unsigned HOST_WIDE_INT orig_t = t;
2763   unsigned HOST_WIDE_INT q;
2764   int maxm, hash_index;
2765   bool cache_hit = false;
2766   enum alg_code cache_alg = alg_zero;
2767   bool speed = optimize_insn_for_speed_p ();
2768   scalar_int_mode imode;
2769   struct alg_hash_entry *entry_ptr;
2770
2771   /* Indicate that no algorithm is yet found.  If no algorithm
2772      is found, this value will be returned and indicate failure.  */
2773   alg_out->cost.cost = cost_limit->cost + 1;
2774   alg_out->cost.latency = cost_limit->latency + 1;
2775
2776   if (cost_limit->cost < 0
2777       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2778     return;
2779
2780   /* Be prepared for vector modes.  */
2781   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2782
2783   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2784
2785   /* Restrict the bits of "t" to the multiplication's mode.  */
2786   t &= GET_MODE_MASK (imode);
2787
2788   /* t == 1 can be done in zero cost.  */
2789   if (t == 1)
2790     {
2791       alg_out->ops = 1;
2792       alg_out->cost.cost = 0;
2793       alg_out->cost.latency = 0;
2794       alg_out->op[0] = alg_m;
2795       return;
2796     }
2797
2798   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2799      fail now.  */
2800   if (t == 0)
2801     {
2802       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2803         return;
2804       else
2805         {
2806           alg_out->ops = 1;
2807           alg_out->cost.cost = zero_cost (speed);
2808           alg_out->cost.latency = zero_cost (speed);
2809           alg_out->op[0] = alg_zero;
2810           return;
2811         }
2812     }
2813
2814   /* We'll be needing a couple extra algorithm structures now.  */
2815
2816   alg_in = XALLOCA (struct algorithm);
2817   best_alg = XALLOCA (struct algorithm);
2818   best_cost = *cost_limit;
2819
2820   /* Compute the hash index.  */
2821   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2822
2823   /* See if we already know what to do for T.  */
2824   entry_ptr = alg_hash_entry_ptr (hash_index);
2825   if (entry_ptr->t == t
2826       && entry_ptr->mode == mode
2827       && entry_ptr->speed == speed
2828       && entry_ptr->alg != alg_unknown)
2829     {
2830       cache_alg = entry_ptr->alg;
2831
2832       if (cache_alg == alg_impossible)
2833         {
2834           /* The cache tells us that it's impossible to synthesize
2835              multiplication by T within entry_ptr->cost.  */
2836           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2837             /* COST_LIMIT is at least as restrictive as the one
2838                recorded in the hash table, in which case we have no
2839                hope of synthesizing a multiplication.  Just
2840                return.  */
2841             return;
2842
2843           /* If we get here, COST_LIMIT is less restrictive than the
2844              one recorded in the hash table, so we may be able to
2845              synthesize a multiplication.  Proceed as if we didn't
2846              have the cache entry.  */
2847         }
2848       else
2849         {
2850           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2851             /* The cached algorithm shows that this multiplication
2852                requires more cost than COST_LIMIT.  Just return.  This
2853                way, we don't clobber this cache entry with
2854                alg_impossible but retain useful information.  */
2855             return;
2856
2857           cache_hit = true;
2858
2859           switch (cache_alg)
2860             {
2861             case alg_shift:
2862               goto do_alg_shift;
2863
2864             case alg_add_t_m2:
2865             case alg_sub_t_m2:
2866               goto do_alg_addsub_t_m2;
2867
2868             case alg_add_factor:
2869             case alg_sub_factor:
2870               goto do_alg_addsub_factor;
2871
2872             case alg_add_t2_m:
2873               goto do_alg_add_t2_m;
2874
2875             case alg_sub_t2_m:
2876               goto do_alg_sub_t2_m;
2877
2878             default:
2879               gcc_unreachable ();
2880             }
2881         }
2882     }
2883
2884   /* If we have a group of zero bits at the low-order part of T, try
2885      multiplying by the remaining bits and then doing a shift.  */
2886
2887   if ((t & 1) == 0)
2888     {
2889     do_alg_shift:
2890       m = ctz_or_zero (t); /* m = number of low zero bits */
2891       if (m < maxm)
2892         {
2893           q = t >> m;
2894           /* The function expand_shift will choose between a shift and
2895              a sequence of additions, so the observed cost is given as
2896              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2897           op_cost = m * add_cost (speed, mode);
2898           if (shift_cost (speed, mode, m) < op_cost)
2899             op_cost = shift_cost (speed, mode, m);
2900           new_limit.cost = best_cost.cost - op_cost;
2901           new_limit.latency = best_cost.latency - op_cost;
2902           synth_mult (alg_in, q, &new_limit, mode);
2903
2904           alg_in->cost.cost += op_cost;
2905           alg_in->cost.latency += op_cost;
2906           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2907             {
2908               best_cost = alg_in->cost;
2909               std::swap (alg_in, best_alg);
2910               best_alg->log[best_alg->ops] = m;
2911               best_alg->op[best_alg->ops] = alg_shift;
2912             }
2913
2914           /* See if treating ORIG_T as a signed number yields a better
2915              sequence.  Try this sequence only for a negative ORIG_T
2916              as it would be useless for a non-negative ORIG_T.  */
2917           if ((HOST_WIDE_INT) orig_t < 0)
2918             {
2919               /* Shift ORIG_T as follows because a right shift of a
2920                  negative-valued signed type is implementation
2921                  defined.  */
2922               q = ~(~orig_t >> m);
2923               /* The function expand_shift will choose between a shift
2924                  and a sequence of additions, so the observed cost is
2925                  given as MIN (m * add_cost(speed, mode),
2926                  shift_cost(speed, mode, m)).  */
2927               op_cost = m * add_cost (speed, mode);
2928               if (shift_cost (speed, mode, m) < op_cost)
2929                 op_cost = shift_cost (speed, mode, m);
2930               new_limit.cost = best_cost.cost - op_cost;
2931               new_limit.latency = best_cost.latency - op_cost;
2932               synth_mult (alg_in, q, &new_limit, mode);
2933
2934               alg_in->cost.cost += op_cost;
2935               alg_in->cost.latency += op_cost;
2936               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2937                 {
2938                   best_cost = alg_in->cost;
2939                   std::swap (alg_in, best_alg);
2940                   best_alg->log[best_alg->ops] = m;
2941                   best_alg->op[best_alg->ops] = alg_shift;
2942                 }
2943             }
2944         }
2945       if (cache_hit)
2946         goto done;
2947     }
2948
2949   /* If we have an odd number, add or subtract one.  */
2950   if ((t & 1) != 0)
2951     {
2952       unsigned HOST_WIDE_INT w;
2953
2954     do_alg_addsub_t_m2:
2955       for (w = 1; (w & t) != 0; w <<= 1)
2956         ;
2957       /* If T was -1, then W will be zero after the loop.  This is another
2958          case where T ends with ...111.  Handling this with (T + 1) and
2959          subtract 1 produces slightly better code and results in algorithm
2960          selection much faster than treating it like the ...0111 case
2961          below.  */
2962       if (w == 0
2963           || (w > 2
2964               /* Reject the case where t is 3.
2965                  Thus we prefer addition in that case.  */
2966               && t != 3))
2967         {
2968           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2969
2970           op_cost = add_cost (speed, mode);
2971           new_limit.cost = best_cost.cost - op_cost;
2972           new_limit.latency = best_cost.latency - op_cost;
2973           synth_mult (alg_in, t + 1, &new_limit, mode);
2974
2975           alg_in->cost.cost += op_cost;
2976           alg_in->cost.latency += op_cost;
2977           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2978             {
2979               best_cost = alg_in->cost;
2980               std::swap (alg_in, best_alg);
2981               best_alg->log[best_alg->ops] = 0;
2982               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2983             }
2984         }
2985       else
2986         {
2987           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2988
2989           op_cost = add_cost (speed, mode);
2990           new_limit.cost = best_cost.cost - op_cost;
2991           new_limit.latency = best_cost.latency - op_cost;
2992           synth_mult (alg_in, t - 1, &new_limit, mode);
2993
2994           alg_in->cost.cost += op_cost;
2995           alg_in->cost.latency += op_cost;
2996           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2997             {
2998               best_cost = alg_in->cost;
2999               std::swap (alg_in, best_alg);
3000               best_alg->log[best_alg->ops] = 0;
3001               best_alg->op[best_alg->ops] = alg_add_t_m2;
3002             }
3003         }
3004
3005       /* We may be able to calculate a * -7, a * -15, a * -31, etc
3006          quickly with a - a * n for some appropriate constant n.  */
3007       m = exact_log2 (-orig_t + 1);
3008       if (m >= 0 && m < maxm)
3009         {
3010           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3011           /* If the target has a cheap shift-and-subtract insn use
3012              that in preference to a shift insn followed by a sub insn.
3013              Assume that the shift-and-sub is "atomic" with a latency
3014              equal to it's cost, otherwise assume that on superscalar
3015              hardware the shift may be executed concurrently with the
3016              earlier steps in the algorithm.  */
3017           if (shiftsub1_cost (speed, mode, m) <= op_cost)
3018             {
3019               op_cost = shiftsub1_cost (speed, mode, m);
3020               op_latency = op_cost;
3021             }
3022           else
3023             op_latency = add_cost (speed, mode);
3024
3025           new_limit.cost = best_cost.cost - op_cost;
3026           new_limit.latency = best_cost.latency - op_latency;
3027           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3028                       &new_limit, mode);
3029
3030           alg_in->cost.cost += op_cost;
3031           alg_in->cost.latency += op_latency;
3032           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3033             {
3034               best_cost = alg_in->cost;
3035               std::swap (alg_in, best_alg);
3036               best_alg->log[best_alg->ops] = m;
3037               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3038             }
3039         }
3040
3041       if (cache_hit)
3042         goto done;
3043     }
3044
3045   /* Look for factors of t of the form
3046      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3047      If we find such a factor, we can multiply by t using an algorithm that
3048      multiplies by q, shift the result by m and add/subtract it to itself.
3049
3050      We search for large factors first and loop down, even if large factors
3051      are less probable than small; if we find a large factor we will find a
3052      good sequence quickly, and therefore be able to prune (by decreasing
3053      COST_LIMIT) the search.  */
3054
3055  do_alg_addsub_factor:
3056   for (m = floor_log2 (t - 1); m >= 2; m--)
3057     {
3058       unsigned HOST_WIDE_INT d;
3059
3060       d = (HOST_WIDE_INT_1U << m) + 1;
3061       if (t % d == 0 && t > d && m < maxm
3062           && (!cache_hit || cache_alg == alg_add_factor))
3063         {
3064           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3065           if (shiftadd_cost (speed, mode, m) <= op_cost)
3066             op_cost = shiftadd_cost (speed, mode, m);
3067
3068           op_latency = op_cost;
3069
3070
3071           new_limit.cost = best_cost.cost - op_cost;
3072           new_limit.latency = best_cost.latency - op_latency;
3073           synth_mult (alg_in, t / d, &new_limit, mode);
3074
3075           alg_in->cost.cost += op_cost;
3076           alg_in->cost.latency += op_latency;
3077           if (alg_in->cost.latency < op_cost)
3078             alg_in->cost.latency = op_cost;
3079           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3080             {
3081               best_cost = alg_in->cost;
3082               std::swap (alg_in, best_alg);
3083               best_alg->log[best_alg->ops] = m;
3084               best_alg->op[best_alg->ops] = alg_add_factor;
3085             }
3086           /* Other factors will have been taken care of in the recursion.  */
3087           break;
3088         }
3089
3090       d = (HOST_WIDE_INT_1U << m) - 1;
3091       if (t % d == 0 && t > d && m < maxm
3092           && (!cache_hit || cache_alg == alg_sub_factor))
3093         {
3094           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3095           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3096             op_cost = shiftsub0_cost (speed, mode, m);
3097
3098           op_latency = op_cost;
3099
3100           new_limit.cost = best_cost.cost - op_cost;
3101           new_limit.latency = best_cost.latency - op_latency;
3102           synth_mult (alg_in, t / d, &new_limit, mode);
3103
3104           alg_in->cost.cost += op_cost;
3105           alg_in->cost.latency += op_latency;
3106           if (alg_in->cost.latency < op_cost)
3107             alg_in->cost.latency = op_cost;
3108           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3109             {
3110               best_cost = alg_in->cost;
3111               std::swap (alg_in, best_alg);
3112               best_alg->log[best_alg->ops] = m;
3113               best_alg->op[best_alg->ops] = alg_sub_factor;
3114             }
3115           break;
3116         }
3117     }
3118   if (cache_hit)
3119     goto done;
3120
3121   /* Try shift-and-add (load effective address) instructions,
3122      i.e. do a*3, a*5, a*9.  */
3123   if ((t & 1) != 0)
3124     {
3125     do_alg_add_t2_m:
3126       q = t - 1;
3127       m = ctz_hwi (q);
3128       if (q && m < maxm)
3129         {
3130           op_cost = shiftadd_cost (speed, mode, m);
3131           new_limit.cost = best_cost.cost - op_cost;
3132           new_limit.latency = best_cost.latency - op_cost;
3133           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3134
3135           alg_in->cost.cost += op_cost;
3136           alg_in->cost.latency += op_cost;
3137           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3138             {
3139               best_cost = alg_in->cost;
3140               std::swap (alg_in, best_alg);
3141               best_alg->log[best_alg->ops] = m;
3142               best_alg->op[best_alg->ops] = alg_add_t2_m;
3143             }
3144         }
3145       if (cache_hit)
3146         goto done;
3147
3148     do_alg_sub_t2_m:
3149       q = t + 1;
3150       m = ctz_hwi (q);
3151       if (q && m < maxm)
3152         {
3153           op_cost = shiftsub0_cost (speed, mode, m);
3154           new_limit.cost = best_cost.cost - op_cost;
3155           new_limit.latency = best_cost.latency - op_cost;
3156           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3157
3158           alg_in->cost.cost += op_cost;
3159           alg_in->cost.latency += op_cost;
3160           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3161             {
3162               best_cost = alg_in->cost;
3163               std::swap (alg_in, best_alg);
3164               best_alg->log[best_alg->ops] = m;
3165               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3166             }
3167         }
3168       if (cache_hit)
3169         goto done;
3170     }
3171
3172  done:
3173   /* If best_cost has not decreased, we have not found any algorithm.  */
3174   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3175     {
3176       /* We failed to find an algorithm.  Record alg_impossible for
3177          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3178          we are asked to find an algorithm for T within the same or
3179          lower COST_LIMIT, we can immediately return to the
3180          caller.  */
3181       entry_ptr->t = t;
3182       entry_ptr->mode = mode;
3183       entry_ptr->speed = speed;
3184       entry_ptr->alg = alg_impossible;
3185       entry_ptr->cost = *cost_limit;
3186       return;
3187     }
3188
3189   /* Cache the result.  */
3190   if (!cache_hit)
3191     {
3192       entry_ptr->t = t;
3193       entry_ptr->mode = mode;
3194       entry_ptr->speed = speed;
3195       entry_ptr->alg = best_alg->op[best_alg->ops];
3196       entry_ptr->cost.cost = best_cost.cost;
3197       entry_ptr->cost.latency = best_cost.latency;
3198     }
3199
3200   /* If we are getting a too long sequence for `struct algorithm'
3201      to record, make this search fail.  */
3202   if (best_alg->ops == MAX_BITS_PER_WORD)
3203     return;
3204
3205   /* Copy the algorithm from temporary space to the space at alg_out.
3206      We avoid using structure assignment because the majority of
3207      best_alg is normally undefined, and this is a critical function.  */
3208   alg_out->ops = best_alg->ops + 1;
3209   alg_out->cost = best_cost;
3210   memcpy (alg_out->op, best_alg->op,
3211           alg_out->ops * sizeof *alg_out->op);
3212   memcpy (alg_out->log, best_alg->log,
3213           alg_out->ops * sizeof *alg_out->log);
3214 }
3215 \f
3216 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3217    Try three variations:
3218
3219        - a shift/add sequence based on VAL itself
3220        - a shift/add sequence based on -VAL, followed by a negation
3221        - a shift/add sequence based on VAL - 1, followed by an addition.
3222
3223    Return true if the cheapest of these cost less than MULT_COST,
3224    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3225
3226 bool
3227 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3228                      struct algorithm *alg, enum mult_variant *variant,
3229                      int mult_cost)
3230 {
3231   struct algorithm alg2;
3232   struct mult_cost limit;
3233   int op_cost;
3234   bool speed = optimize_insn_for_speed_p ();
3235
3236   /* Fail quickly for impossible bounds.  */
3237   if (mult_cost < 0)
3238     return false;
3239
3240   /* Ensure that mult_cost provides a reasonable upper bound.
3241      Any constant multiplication can be performed with less
3242      than 2 * bits additions.  */
3243   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3244   if (mult_cost > op_cost)
3245     mult_cost = op_cost;
3246
3247   *variant = basic_variant;
3248   limit.cost = mult_cost;
3249   limit.latency = mult_cost;
3250   synth_mult (alg, val, &limit, mode);
3251
3252   /* This works only if the inverted value actually fits in an
3253      `unsigned int' */
3254   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3255     {
3256       op_cost = neg_cost (speed, mode);
3257       if (MULT_COST_LESS (&alg->cost, mult_cost))
3258         {
3259           limit.cost = alg->cost.cost - op_cost;
3260           limit.latency = alg->cost.latency - op_cost;
3261         }
3262       else
3263         {
3264           limit.cost = mult_cost - op_cost;
3265           limit.latency = mult_cost - op_cost;
3266         }
3267
3268       synth_mult (&alg2, -val, &limit, mode);
3269       alg2.cost.cost += op_cost;
3270       alg2.cost.latency += op_cost;
3271       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3272         *alg = alg2, *variant = negate_variant;
3273     }
3274
3275   /* This proves very useful for division-by-constant.  */
3276   op_cost = add_cost (speed, mode);
3277   if (MULT_COST_LESS (&alg->cost, mult_cost))
3278     {
3279       limit.cost = alg->cost.cost - op_cost;
3280       limit.latency = alg->cost.latency - op_cost;
3281     }
3282   else
3283     {
3284       limit.cost = mult_cost - op_cost;
3285       limit.latency = mult_cost - op_cost;
3286     }
3287
3288   if (val != HOST_WIDE_INT_MIN
3289       || GET_MODE_UNIT_PRECISION (mode) == HOST_BITS_PER_WIDE_INT)
3290     {
3291       synth_mult (&alg2, val - HOST_WIDE_INT_1U, &limit, mode);
3292       alg2.cost.cost += op_cost;
3293       alg2.cost.latency += op_cost;
3294       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3295         *alg = alg2, *variant = add_variant;
3296     }
3297
3298   return MULT_COST_LESS (&alg->cost, mult_cost);
3299 }
3300
3301 /* A subroutine of expand_mult, used for constant multiplications.
3302    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3303    convenient.  Use the shift/add sequence described by ALG and apply
3304    the final fixup specified by VARIANT.  */
3305
3306 static rtx
3307 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3308                    rtx target, const struct algorithm *alg,
3309                    enum mult_variant variant)
3310 {
3311   unsigned HOST_WIDE_INT val_so_far;
3312   rtx_insn *insn;
3313   rtx accum, tem;
3314   int opno;
3315   machine_mode nmode;
3316
3317   /* Avoid referencing memory over and over and invalid sharing
3318      on SUBREGs.  */
3319   op0 = force_reg (mode, op0);
3320
3321   /* ACCUM starts out either as OP0 or as a zero, depending on
3322      the first operation.  */
3323
3324   if (alg->op[0] == alg_zero)
3325     {
3326       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3327       val_so_far = 0;
3328     }
3329   else if (alg->op[0] == alg_m)
3330     {
3331       accum = copy_to_mode_reg (mode, op0);
3332       val_so_far = 1;
3333     }
3334   else
3335     gcc_unreachable ();
3336
3337   for (opno = 1; opno < alg->ops; opno++)
3338     {
3339       int log = alg->log[opno];
3340       rtx shift_subtarget = optimize ? 0 : accum;
3341       rtx add_target
3342         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3343            && !optimize)
3344           ? target : 0;
3345       rtx accum_target = optimize ? 0 : accum;
3346       rtx accum_inner;
3347
3348       switch (alg->op[opno])
3349         {
3350         case alg_shift:
3351           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3352           /* REG_EQUAL note will be attached to the following insn.  */
3353           emit_move_insn (accum, tem);
3354           val_so_far <<= log;
3355           break;
3356
3357         case alg_add_t_m2:
3358           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3359           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3360                                  add_target ? add_target : accum_target);
3361           val_so_far += HOST_WIDE_INT_1U << log;
3362           break;
3363
3364         case alg_sub_t_m2:
3365           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3366           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3367                                  add_target ? add_target : accum_target);
3368           val_so_far -= HOST_WIDE_INT_1U << log;
3369           break;
3370
3371         case alg_add_t2_m:
3372           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3373                                 log, shift_subtarget, 0);
3374           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3375                                  add_target ? add_target : accum_target);
3376           val_so_far = (val_so_far << log) + 1;
3377           break;
3378
3379         case alg_sub_t2_m:
3380           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3381                                 log, shift_subtarget, 0);
3382           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3383                                  add_target ? add_target : accum_target);
3384           val_so_far = (val_so_far << log) - 1;
3385           break;
3386
3387         case alg_add_factor:
3388           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3389           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3390                                  add_target ? add_target : accum_target);
3391           val_so_far += val_so_far << log;
3392           break;
3393
3394         case alg_sub_factor:
3395           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3396           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3397                                  (add_target
3398                                   ? add_target : (optimize ? 0 : tem)));
3399           val_so_far = (val_so_far << log) - val_so_far;
3400           break;
3401
3402         default:
3403           gcc_unreachable ();
3404         }
3405
3406       if (SCALAR_INT_MODE_P (mode))
3407         {
3408           /* Write a REG_EQUAL note on the last insn so that we can cse
3409              multiplication sequences.  Note that if ACCUM is a SUBREG,
3410              we've set the inner register and must properly indicate that.  */
3411           tem = op0, nmode = mode;
3412           accum_inner = accum;
3413           if (GET_CODE (accum) == SUBREG)
3414             {
3415               accum_inner = SUBREG_REG (accum);
3416               nmode = GET_MODE (accum_inner);
3417               tem = gen_lowpart (nmode, op0);
3418             }
3419
3420           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3421              In that case, only the low bits of accum would be guaranteed to
3422              be equal to the content of the REG_EQUAL note, the upper bits
3423              can be anything.  */
3424           if (!paradoxical_subreg_p (tem))
3425             {
3426               insn = get_last_insn ();
3427               wide_int wval_so_far
3428                 = wi::uhwi (val_so_far,
3429                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3430               rtx c = immed_wide_int_const (wval_so_far, nmode);
3431               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3432                                 accum_inner);
3433             }
3434         }
3435     }
3436
3437   if (variant == negate_variant)
3438     {
3439       val_so_far = -val_so_far;
3440       accum = expand_unop (mode, neg_optab, accum, target, 0);
3441     }
3442   else if (variant == add_variant)
3443     {
3444       val_so_far = val_so_far + 1;
3445       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3446     }
3447
3448   /* Compare only the bits of val and val_so_far that are significant
3449      in the result mode, to avoid sign-/zero-extension confusion.  */
3450   nmode = GET_MODE_INNER (mode);
3451   val &= GET_MODE_MASK (nmode);
3452   val_so_far &= GET_MODE_MASK (nmode);
3453   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3454
3455   return accum;
3456 }
3457
3458 /* Perform a multiplication and return an rtx for the result.
3459    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3460    TARGET is a suggestion for where to store the result (an rtx).
3461
3462    We check specially for a constant integer as OP1.
3463    If you want this check for OP0 as well, then before calling
3464    you should swap the two operands if OP0 would be constant.  */
3465
3466 rtx
3467 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3468              int unsignedp, bool no_libcall)
3469 {
3470   enum mult_variant variant;
3471   struct algorithm algorithm;
3472   rtx scalar_op1;
3473   int max_cost;
3474   bool speed = optimize_insn_for_speed_p ();
3475   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3476
3477   if (CONSTANT_P (op0))
3478     std::swap (op0, op1);
3479
3480   /* For vectors, there are several simplifications that can be made if
3481      all elements of the vector constant are identical.  */
3482   scalar_op1 = unwrap_const_vec_duplicate (op1);
3483
3484   if (INTEGRAL_MODE_P (mode))
3485     {
3486       rtx fake_reg;
3487       HOST_WIDE_INT coeff;
3488       bool is_neg;
3489       int mode_bitsize;
3490
3491       if (op1 == CONST0_RTX (mode))
3492         return op1;
3493       if (op1 == CONST1_RTX (mode))
3494         return op0;
3495       if (op1 == CONSTM1_RTX (mode))
3496         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3497                             op0, target, 0);
3498
3499       if (do_trapv)
3500         goto skip_synth;
3501
3502       /* If mode is integer vector mode, check if the backend supports
3503          vector lshift (by scalar or vector) at all.  If not, we can't use
3504          synthetized multiply.  */
3505       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3506           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3507           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3508         goto skip_synth;
3509
3510       /* These are the operations that are potentially turned into
3511          a sequence of shifts and additions.  */
3512       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3513
3514       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3515          less than or equal in size to `unsigned int' this doesn't matter.
3516          If the mode is larger than `unsigned int', then synth_mult works
3517          only if the constant value exactly fits in an `unsigned int' without
3518          any truncation.  This means that multiplying by negative values does
3519          not work; results are off by 2^32 on a 32 bit machine.  */
3520       if (CONST_INT_P (scalar_op1))
3521         {
3522           coeff = INTVAL (scalar_op1);
3523           is_neg = coeff < 0;
3524         }
3525 #if TARGET_SUPPORTS_WIDE_INT
3526       else if (CONST_WIDE_INT_P (scalar_op1))
3527 #else
3528       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3529 #endif
3530         {
3531           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3532           /* Perfect power of 2 (other than 1, which is handled above).  */
3533           if (shift > 0)
3534             return expand_shift (LSHIFT_EXPR, mode, op0,
3535                                  shift, target, unsignedp);
3536           else
3537             goto skip_synth;
3538         }
3539       else
3540         goto skip_synth;
3541
3542       /* We used to test optimize here, on the grounds that it's better to
3543          produce a smaller program when -O is not used.  But this causes
3544          such a terrible slowdown sometimes that it seems better to always
3545          use synth_mult.  */
3546
3547       /* Special case powers of two.  */
3548       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3549           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3550         return expand_shift (LSHIFT_EXPR, mode, op0,
3551                              floor_log2 (coeff), target, unsignedp);
3552
3553       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3554
3555       /* Attempt to handle multiplication of DImode values by negative
3556          coefficients, by performing the multiplication by a positive
3557          multiplier and then inverting the result.  */
3558       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3559         {
3560           /* Its safe to use -coeff even for INT_MIN, as the
3561              result is interpreted as an unsigned coefficient.
3562              Exclude cost of op0 from max_cost to match the cost
3563              calculation of the synth_mult.  */
3564           coeff = -(unsigned HOST_WIDE_INT) coeff;
3565           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3566                                     mode, speed)
3567                       - neg_cost (speed, mode));
3568           if (max_cost <= 0)
3569             goto skip_synth;
3570
3571           /* Special case powers of two.  */
3572           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3573             {
3574               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3575                                        floor_log2 (coeff), target, unsignedp);
3576               return expand_unop (mode, neg_optab, temp, target, 0);
3577             }
3578
3579           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3580                                    max_cost))
3581             {
3582               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3583                                             &algorithm, variant);
3584               return expand_unop (mode, neg_optab, temp, target, 0);
3585             }
3586           goto skip_synth;
3587         }
3588
3589       /* Exclude cost of op0 from max_cost to match the cost
3590          calculation of the synth_mult.  */
3591       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3592       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3593         return expand_mult_const (mode, op0, coeff, target,
3594                                   &algorithm, variant);
3595     }
3596  skip_synth:
3597
3598   /* Expand x*2.0 as x+x.  */
3599   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3600       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3601     {
3602       op0 = force_reg (GET_MODE (op0), op0);
3603       return expand_binop (mode, add_optab, op0, op0,
3604                            target, unsignedp,
3605                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3606     }
3607
3608   /* This used to use umul_optab if unsigned, but for non-widening multiply
3609      there is no difference between signed and unsigned.  */
3610   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3611                       op0, op1, target, unsignedp,
3612                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3613   gcc_assert (op0 || no_libcall);
3614   return op0;
3615 }
3616
3617 /* Return a cost estimate for multiplying a register by the given
3618    COEFFicient in the given MODE and SPEED.  */
3619
3620 int
3621 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3622 {
3623   int max_cost;
3624   struct algorithm algorithm;
3625   enum mult_variant variant;
3626
3627   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3628   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3629                            mode, speed);
3630   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3631     return algorithm.cost.cost;
3632   else
3633     return max_cost;
3634 }
3635
3636 /* Perform a widening multiplication and return an rtx for the result.
3637    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3638    TARGET is a suggestion for where to store the result (an rtx).
3639    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3640    or smul_widen_optab.
3641
3642    We check specially for a constant integer as OP1, comparing the
3643    cost of a widening multiply against the cost of a sequence of shifts
3644    and adds.  */
3645
3646 rtx
3647 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3648                       int unsignedp, optab this_optab)
3649 {
3650   bool speed = optimize_insn_for_speed_p ();
3651   rtx cop1;
3652
3653   if (CONST_INT_P (op1)
3654       && GET_MODE (op0) != VOIDmode
3655       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3656                                 this_optab == umul_widen_optab))
3657       && CONST_INT_P (cop1)
3658       && (INTVAL (cop1) >= 0
3659           || HWI_COMPUTABLE_MODE_P (mode)))
3660     {
3661       HOST_WIDE_INT coeff = INTVAL (cop1);
3662       int max_cost;
3663       enum mult_variant variant;
3664       struct algorithm algorithm;
3665
3666       if (coeff == 0)
3667         return CONST0_RTX (mode);
3668
3669       /* Special case powers of two.  */
3670       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3671         {
3672           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3673           return expand_shift (LSHIFT_EXPR, mode, op0,
3674                                floor_log2 (coeff), target, unsignedp);
3675         }
3676
3677       /* Exclude cost of op0 from max_cost to match the cost
3678          calculation of the synth_mult.  */
3679       max_cost = mul_widen_cost (speed, mode);
3680       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3681                                max_cost))
3682         {
3683           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3684           return expand_mult_const (mode, op0, coeff, target,
3685                                     &algorithm, variant);
3686         }
3687     }
3688   return expand_binop (mode, this_optab, op0, op1, target,
3689                        unsignedp, OPTAB_LIB_WIDEN);
3690 }
3691 \f
3692 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3693    replace division by D, and put the least significant N bits of the result
3694    in *MULTIPLIER_PTR and return the most significant bit.
3695
3696    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3697    needed precision is in PRECISION (should be <= N).
3698
3699    PRECISION should be as small as possible so this function can choose
3700    multiplier more freely.
3701
3702    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3703    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3704
3705    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3706    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3707
3708 unsigned HOST_WIDE_INT
3709 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3710                    unsigned HOST_WIDE_INT *multiplier_ptr,
3711                    int *post_shift_ptr, int *lgup_ptr)
3712 {
3713   int lgup, post_shift;
3714   int pow, pow2;
3715
3716   /* lgup = ceil(log2(divisor)); */
3717   lgup = ceil_log2 (d);
3718
3719   gcc_assert (lgup <= n);
3720
3721   pow = n + lgup;
3722   pow2 = n + lgup - precision;
3723
3724   /* mlow = 2^(N + lgup)/d */
3725   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3726   wide_int mlow = wi::udiv_trunc (val, d);
3727
3728   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3729   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3730   wide_int mhigh = wi::udiv_trunc (val, d);
3731
3732   /* If precision == N, then mlow, mhigh exceed 2^N
3733      (but they do not exceed 2^(N+1)).  */
3734
3735   /* Reduce to lowest terms.  */
3736   for (post_shift = lgup; post_shift > 0; post_shift--)
3737     {
3738       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3739                                                        HOST_BITS_PER_WIDE_INT);
3740       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3741                                                        HOST_BITS_PER_WIDE_INT);
3742       if (ml_lo >= mh_lo)
3743         break;
3744
3745       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3746       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3747     }
3748
3749   *post_shift_ptr = post_shift;
3750   *lgup_ptr = lgup;
3751   if (n < HOST_BITS_PER_WIDE_INT)
3752     {
3753       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3754       *multiplier_ptr = mhigh.to_uhwi () & mask;
3755       return mhigh.to_uhwi () > mask;
3756     }
3757   else
3758     {
3759       *multiplier_ptr = mhigh.to_uhwi ();
3760       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3761     }
3762 }
3763
3764 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3765    congruent to 1 (mod 2**N).  */
3766
3767 static unsigned HOST_WIDE_INT
3768 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3769 {
3770   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3771
3772   /* The algorithm notes that the choice y = x satisfies
3773      x*y == 1 mod 2^3, since x is assumed odd.
3774      Each iteration doubles the number of bits of significance in y.  */
3775
3776   unsigned HOST_WIDE_INT mask;
3777   unsigned HOST_WIDE_INT y = x;
3778   int nbit = 3;
3779
3780   mask = (n == HOST_BITS_PER_WIDE_INT
3781           ? HOST_WIDE_INT_M1U
3782           : (HOST_WIDE_INT_1U << n) - 1);
3783
3784   while (nbit < n)
3785     {
3786       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3787       nbit *= 2;
3788     }
3789   return y;
3790 }
3791
3792 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3793    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3794    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3795    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3796    become signed.
3797
3798    The result is put in TARGET if that is convenient.
3799
3800    MODE is the mode of operation.  */
3801
3802 rtx
3803 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3804                              rtx op1, rtx target, int unsignedp)
3805 {
3806   rtx tem;
3807   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3808
3809   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3810                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3811   tem = expand_and (mode, tem, op1, NULL_RTX);
3812   adj_operand
3813     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3814                      adj_operand);
3815
3816   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3817                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3818   tem = expand_and (mode, tem, op0, NULL_RTX);
3819   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3820                           target);
3821
3822   return target;
3823 }
3824
3825 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3826
3827 static rtx
3828 extract_high_half (scalar_int_mode mode, rtx op)
3829 {
3830   if (mode == word_mode)
3831     return gen_highpart (mode, op);
3832
3833   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3834
3835   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3836                      GET_MODE_BITSIZE (mode), 0, 1);
3837   return convert_modes (mode, wider_mode, op, 0);
3838 }
3839
3840 /* Like expmed_mult_highpart, but only consider using a multiplication
3841    optab.  OP1 is an rtx for the constant operand.  */
3842
3843 static rtx
3844 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3845                             rtx target, int unsignedp, int max_cost)
3846 {
3847   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3848   optab moptab;
3849   rtx tem;
3850   int size;
3851   bool speed = optimize_insn_for_speed_p ();
3852
3853   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3854
3855   size = GET_MODE_BITSIZE (mode);
3856
3857   /* Firstly, try using a multiplication insn that only generates the needed
3858      high part of the product, and in the sign flavor of unsignedp.  */
3859   if (mul_highpart_cost (speed, mode) < max_cost)
3860     {
3861       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3862       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3863                           unsignedp, OPTAB_DIRECT);
3864       if (tem)
3865         return tem;
3866     }
3867
3868   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3869      Need to adjust the result after the multiplication.  */
3870   if (size - 1 < BITS_PER_WORD
3871       && (mul_highpart_cost (speed, mode)
3872           + 2 * shift_cost (speed, mode, size-1)
3873           + 4 * add_cost (speed, mode) < max_cost))
3874     {
3875       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3876       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3877                           unsignedp, OPTAB_DIRECT);
3878       if (tem)
3879         /* We used the wrong signedness.  Adjust the result.  */
3880         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3881                                             tem, unsignedp);
3882     }
3883
3884   /* Try widening multiplication.  */
3885   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3886   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3887       && mul_widen_cost (speed, wider_mode) < max_cost)
3888     {
3889       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3890                           unsignedp, OPTAB_WIDEN);
3891       if (tem)
3892         return extract_high_half (mode, tem);
3893     }
3894
3895   /* Try widening the mode and perform a non-widening multiplication.  */
3896   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3897       && size - 1 < BITS_PER_WORD
3898       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3899           < max_cost))
3900     {
3901       rtx_insn *insns;
3902       rtx wop0, wop1;
3903
3904       /* We need to widen the operands, for example to ensure the
3905          constant multiplier is correctly sign or zero extended.
3906          Use a sequence to clean-up any instructions emitted by
3907          the conversions if things don't work out.  */
3908       start_sequence ();
3909       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3910       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3911       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3912                           unsignedp, OPTAB_WIDEN);
3913       insns = get_insns ();
3914       end_sequence ();
3915
3916       if (tem)
3917         {
3918           emit_insn (insns);
3919           return extract_high_half (mode, tem);
3920         }
3921     }
3922
3923   /* Try widening multiplication of opposite signedness, and adjust.  */
3924   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3925   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3926       && size - 1 < BITS_PER_WORD
3927       && (mul_widen_cost (speed, wider_mode)
3928           + 2 * shift_cost (speed, mode, size-1)
3929           + 4 * add_cost (speed, mode) < max_cost))
3930     {
3931       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3932                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3933       if (tem != 0)
3934         {
3935           tem = extract_high_half (mode, tem);
3936           /* We used the wrong signedness.  Adjust the result.  */
3937           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3938                                               target, unsignedp);
3939         }
3940     }
3941
3942   return 0;
3943 }
3944
3945 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3946    putting the high half of the result in TARGET if that is convenient,
3947    and return where the result is.  If the operation cannot be performed,
3948    0 is returned.
3949
3950    MODE is the mode of operation and result.
3951
3952    UNSIGNEDP nonzero means unsigned multiply.
3953
3954    MAX_COST is the total allowed cost for the expanded RTL.  */
3955
3956 static rtx
3957 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3958                       rtx target, int unsignedp, int max_cost)
3959 {
3960   unsigned HOST_WIDE_INT cnst1;
3961   int extra_cost;
3962   bool sign_adjust = false;
3963   enum mult_variant variant;
3964   struct algorithm alg;
3965   rtx tem;
3966   bool speed = optimize_insn_for_speed_p ();
3967
3968   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3969   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3970
3971   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3972
3973   /* We can't optimize modes wider than BITS_PER_WORD.
3974      ??? We might be able to perform double-word arithmetic if
3975      mode == word_mode, however all the cost calculations in
3976      synth_mult etc. assume single-word operations.  */
3977   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3978   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3979     return expmed_mult_highpart_optab (mode, op0, op1, target,
3980                                        unsignedp, max_cost);
3981
3982   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3983
3984   /* Check whether we try to multiply by a negative constant.  */
3985   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3986     {
3987       sign_adjust = true;
3988       extra_cost += add_cost (speed, mode);
3989     }
3990
3991   /* See whether shift/add multiplication is cheap enough.  */
3992   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3993                            max_cost - extra_cost))
3994     {
3995       /* See whether the specialized multiplication optabs are
3996          cheaper than the shift/add version.  */
3997       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3998                                         alg.cost.cost + extra_cost);
3999       if (tem)
4000         return tem;
4001
4002       tem = convert_to_mode (wider_mode, op0, unsignedp);
4003       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
4004       tem = extract_high_half (mode, tem);
4005
4006       /* Adjust result for signedness.  */
4007       if (sign_adjust)
4008         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
4009
4010       return tem;
4011     }
4012   return expmed_mult_highpart_optab (mode, op0, op1, target,
4013                                      unsignedp, max_cost);
4014 }
4015
4016
4017 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
4018
4019 static rtx
4020 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4021 {
4022   rtx result, temp, shift;
4023   rtx_code_label *label;
4024   int logd;
4025   int prec = GET_MODE_PRECISION (mode);
4026
4027   logd = floor_log2 (d);
4028   result = gen_reg_rtx (mode);
4029
4030   /* Avoid conditional branches when they're expensive.  */
4031   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4032       && optimize_insn_for_speed_p ())
4033     {
4034       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4035                                       mode, 0, -1);
4036       if (signmask)
4037         {
4038           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4039           signmask = force_reg (mode, signmask);
4040           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4041
4042           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4043              which instruction sequence to use.  If logical right shifts
4044              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4045              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4046
4047           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4048           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4049               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4050                   > COSTS_N_INSNS (2)))
4051             {
4052               temp = expand_binop (mode, xor_optab, op0, signmask,
4053                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4054               temp = expand_binop (mode, sub_optab, temp, signmask,
4055                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4056               temp = expand_binop (mode, and_optab, temp,
4057                                    gen_int_mode (masklow, mode),
4058                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4059               temp = expand_binop (mode, xor_optab, temp, signmask,
4060                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4061               temp = expand_binop (mode, sub_optab, temp, signmask,
4062                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4063             }
4064           else
4065             {
4066               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4067                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4068               signmask = force_reg (mode, signmask);
4069
4070               temp = expand_binop (mode, add_optab, op0, signmask,
4071                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4072               temp = expand_binop (mode, and_optab, temp,
4073                                    gen_int_mode (masklow, mode),
4074                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4075               temp = expand_binop (mode, sub_optab, temp, signmask,
4076                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4077             }
4078           return temp;
4079         }
4080     }
4081
4082   /* Mask contains the mode's signbit and the significant bits of the
4083      modulus.  By including the signbit in the operation, many targets
4084      can avoid an explicit compare operation in the following comparison
4085      against zero.  */
4086   wide_int mask = wi::mask (logd, false, prec);
4087   mask = wi::set_bit (mask, prec - 1);
4088
4089   temp = expand_binop (mode, and_optab, op0,
4090                        immed_wide_int_const (mask, mode),
4091                        result, 1, OPTAB_LIB_WIDEN);
4092   if (temp != result)
4093     emit_move_insn (result, temp);
4094
4095   label = gen_label_rtx ();
4096   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4097
4098   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4099                        0, OPTAB_LIB_WIDEN);
4100
4101   mask = wi::mask (logd, true, prec);
4102   temp = expand_binop (mode, ior_optab, temp,
4103                        immed_wide_int_const (mask, mode),
4104                        result, 1, OPTAB_LIB_WIDEN);
4105   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4106                        0, OPTAB_LIB_WIDEN);
4107   if (temp != result)
4108     emit_move_insn (result, temp);
4109   emit_label (label);
4110   return result;
4111 }
4112
4113 /* Expand signed division of OP0 by a power of two D in mode MODE.
4114    This routine is only called for positive values of D.  */
4115
4116 static rtx
4117 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4118 {
4119   rtx temp;
4120   rtx_code_label *label;
4121   int logd;
4122
4123   logd = floor_log2 (d);
4124
4125   if (d == 2
4126       && BRANCH_COST (optimize_insn_for_speed_p (),
4127                       false) >= 1)
4128     {
4129       temp = gen_reg_rtx (mode);
4130       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4131       if (temp != NULL_RTX)
4132         {
4133           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4134                                0, OPTAB_LIB_WIDEN);
4135           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4136         }
4137     }
4138
4139   if (HAVE_conditional_move
4140       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4141     {
4142       rtx temp2;
4143
4144       start_sequence ();
4145       temp2 = copy_to_mode_reg (mode, op0);
4146       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4147                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4148       temp = force_reg (mode, temp);
4149
4150       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4151       temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4152                                      temp, temp2, mode, 0);
4153       if (temp2)
4154         {
4155           rtx_insn *seq = get_insns ();
4156           end_sequence ();
4157           emit_insn (seq);
4158           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4159         }
4160       end_sequence ();
4161     }
4162
4163   if (BRANCH_COST (optimize_insn_for_speed_p (),
4164                    false) >= 2)
4165     {
4166       int ushift = GET_MODE_BITSIZE (mode) - logd;
4167
4168       temp = gen_reg_rtx (mode);
4169       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4170       if (temp != NULL_RTX)
4171         {
4172           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4173               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4174               > COSTS_N_INSNS (1))
4175             temp = expand_binop (mode, and_optab, temp,
4176                                  gen_int_mode (d - 1, mode),
4177                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4178           else
4179             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4180                                  ushift, NULL_RTX, 1);
4181           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4182                                0, OPTAB_LIB_WIDEN);
4183           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4184         }
4185     }
4186
4187   label = gen_label_rtx ();
4188   temp = copy_to_mode_reg (mode, op0);
4189   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4190   expand_inc (temp, gen_int_mode (d - 1, mode));
4191   emit_label (label);
4192   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4193 }
4194 \f
4195 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4196    if that is convenient, and returning where the result is.
4197    You may request either the quotient or the remainder as the result;
4198    specify REM_FLAG nonzero to get the remainder.
4199
4200    CODE is the expression code for which kind of division this is;
4201    it controls how rounding is done.  MODE is the machine mode to use.
4202    UNSIGNEDP nonzero means do unsigned division.  */
4203
4204 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4205    and then correct it by or'ing in missing high bits
4206    if result of ANDI is nonzero.
4207    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4208    This could optimize to a bfexts instruction.
4209    But C doesn't use these operations, so their optimizations are
4210    left for later.  */
4211 /* ??? For modulo, we don't actually need the highpart of the first product,
4212    the low part will do nicely.  And for small divisors, the second multiply
4213    can also be a low-part only multiply or even be completely left out.
4214    E.g. to calculate the remainder of a division by 3 with a 32 bit
4215    multiply, multiply with 0x55555556 and extract the upper two bits;
4216    the result is exact for inputs up to 0x1fffffff.
4217    The input range can be reduced by using cross-sum rules.
4218    For odd divisors >= 3, the following table gives right shift counts
4219    so that if a number is shifted by an integer multiple of the given
4220    amount, the remainder stays the same:
4221    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4222    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4223    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4224    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4225    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4226
4227    Cross-sum rules for even numbers can be derived by leaving as many bits
4228    to the right alone as the divisor has zeros to the right.
4229    E.g. if x is an unsigned 32 bit number:
4230    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4231    */
4232
4233 rtx
4234 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4235                rtx op0, rtx op1, rtx target, int unsignedp,
4236                enum optab_methods methods)
4237 {
4238   machine_mode compute_mode;
4239   rtx tquotient;
4240   rtx quotient = 0, remainder = 0;
4241   rtx_insn *last;
4242   rtx_insn *insn;
4243   optab optab1, optab2;
4244   int op1_is_constant, op1_is_pow2 = 0;
4245   int max_cost, extra_cost;
4246   static HOST_WIDE_INT last_div_const = 0;
4247   bool speed = optimize_insn_for_speed_p ();
4248
4249   op1_is_constant = CONST_INT_P (op1);
4250   if (op1_is_constant)
4251     {
4252       wide_int ext_op1 = rtx_mode_t (op1, mode);
4253       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4254                      || (! unsignedp
4255                          && wi::popcount (wi::neg (ext_op1)) == 1));
4256     }
4257
4258   /*
4259      This is the structure of expand_divmod:
4260
4261      First comes code to fix up the operands so we can perform the operations
4262      correctly and efficiently.
4263
4264      Second comes a switch statement with code specific for each rounding mode.
4265      For some special operands this code emits all RTL for the desired
4266      operation, for other cases, it generates only a quotient and stores it in
4267      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4268      to indicate that it has not done anything.
4269
4270      Last comes code that finishes the operation.  If QUOTIENT is set and
4271      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4272      QUOTIENT is not set, it is computed using trunc rounding.
4273
4274      We try to generate special code for division and remainder when OP1 is a
4275      constant.  If |OP1| = 2**n we can use shifts and some other fast
4276      operations.  For other values of OP1, we compute a carefully selected
4277      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4278      by m.
4279
4280      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4281      half of the product.  Different strategies for generating the product are
4282      implemented in expmed_mult_highpart.
4283
4284      If what we actually want is the remainder, we generate that by another
4285      by-constant multiplication and a subtraction.  */
4286
4287   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4288      code below will malfunction if we are, so check here and handle
4289      the special case if so.  */
4290   if (op1 == const1_rtx)
4291     return rem_flag ? const0_rtx : op0;
4292
4293     /* When dividing by -1, we could get an overflow.
4294      negv_optab can handle overflows.  */
4295   if (! unsignedp && op1 == constm1_rtx)
4296     {
4297       if (rem_flag)
4298         return const0_rtx;
4299       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4300                           ? negv_optab : neg_optab, op0, target, 0);
4301     }
4302
4303   if (target
4304       /* Don't use the function value register as a target
4305          since we have to read it as well as write it,
4306          and function-inlining gets confused by this.  */
4307       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4308           /* Don't clobber an operand while doing a multi-step calculation.  */
4309           || ((rem_flag || op1_is_constant)
4310               && (reg_mentioned_p (target, op0)
4311                   || (MEM_P (op0) && MEM_P (target))))
4312           || reg_mentioned_p (target, op1)
4313           || (MEM_P (op1) && MEM_P (target))))
4314     target = 0;
4315
4316   /* Get the mode in which to perform this computation.  Normally it will
4317      be MODE, but sometimes we can't do the desired operation in MODE.
4318      If so, pick a wider mode in which we can do the operation.  Convert
4319      to that mode at the start to avoid repeated conversions.
4320
4321      First see what operations we need.  These depend on the expression
4322      we are evaluating.  (We assume that divxx3 insns exist under the
4323      same conditions that modxx3 insns and that these insns don't normally
4324      fail.  If these assumptions are not correct, we may generate less
4325      efficient code in some cases.)
4326
4327      Then see if we find a mode in which we can open-code that operation
4328      (either a division, modulus, or shift).  Finally, check for the smallest
4329      mode for which we can do the operation with a library call.  */
4330
4331   /* We might want to refine this now that we have division-by-constant
4332      optimization.  Since expmed_mult_highpart tries so many variants, it is
4333      not straightforward to generalize this.  Maybe we should make an array
4334      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4335
4336   optab1 = (op1_is_pow2
4337             ? (unsignedp ? lshr_optab : ashr_optab)
4338             : (unsignedp ? udiv_optab : sdiv_optab));
4339   optab2 = (op1_is_pow2 ? optab1
4340             : (unsignedp ? udivmod_optab : sdivmod_optab));
4341
4342   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4343     {
4344       FOR_EACH_MODE_FROM (compute_mode, mode)
4345       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4346           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4347         break;
4348
4349       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4350         FOR_EACH_MODE_FROM (compute_mode, mode)
4351           if (optab_libfunc (optab1, compute_mode)
4352               || optab_libfunc (optab2, compute_mode))
4353             break;
4354     }
4355   else
4356     compute_mode = mode;
4357
4358   /* If we still couldn't find a mode, use MODE, but expand_binop will
4359      probably die.  */
4360   if (compute_mode == VOIDmode)
4361     compute_mode = mode;
4362
4363   if (target && GET_MODE (target) == compute_mode)
4364     tquotient = target;
4365   else
4366     tquotient = gen_reg_rtx (compute_mode);
4367
4368 #if 0
4369   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4370      (mode), and thereby get better code when OP1 is a constant.  Do that
4371      later.  It will require going over all usages of SIZE below.  */
4372   size = GET_MODE_BITSIZE (mode);
4373 #endif
4374
4375   /* Only deduct something for a REM if the last divide done was
4376      for a different constant.   Then set the constant of the last
4377      divide.  */
4378   max_cost = (unsignedp
4379               ? udiv_cost (speed, compute_mode)
4380               : sdiv_cost (speed, compute_mode));
4381   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4382                      && INTVAL (op1) == last_div_const))
4383     max_cost -= (mul_cost (speed, compute_mode)
4384                  + add_cost (speed, compute_mode));
4385
4386   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4387
4388   /* Now convert to the best mode to use.  */
4389   if (compute_mode != mode)
4390     {
4391       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4392       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4393
4394       /* convert_modes may have placed op1 into a register, so we
4395          must recompute the following.  */
4396       op1_is_constant = CONST_INT_P (op1);
4397       if (op1_is_constant)
4398         {
4399           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4400           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4401                          || (! unsignedp
4402                              && wi::popcount (wi::neg (ext_op1)) == 1));
4403         }
4404       else
4405         op1_is_pow2 = 0;
4406     }
4407
4408   /* If one of the operands is a volatile MEM, copy it into a register.  */
4409
4410   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4411     op0 = force_reg (compute_mode, op0);
4412   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4413     op1 = force_reg (compute_mode, op1);
4414
4415   /* If we need the remainder or if OP1 is constant, we need to
4416      put OP0 in a register in case it has any queued subexpressions.  */
4417   if (rem_flag || op1_is_constant)
4418     op0 = force_reg (compute_mode, op0);
4419
4420   last = get_last_insn ();
4421
4422   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4423   if (unsignedp)
4424     {
4425       if (code == FLOOR_DIV_EXPR)
4426         code = TRUNC_DIV_EXPR;
4427       if (code == FLOOR_MOD_EXPR)
4428         code = TRUNC_MOD_EXPR;
4429       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4430         code = TRUNC_DIV_EXPR;
4431     }
4432
4433   if (op1 != const0_rtx)
4434     switch (code)
4435       {
4436       case TRUNC_MOD_EXPR:
4437       case TRUNC_DIV_EXPR:
4438         if (op1_is_constant)
4439           {
4440             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4441             int size = GET_MODE_BITSIZE (int_mode);
4442             if (unsignedp)
4443               {
4444                 unsigned HOST_WIDE_INT mh, ml;
4445                 int pre_shift, post_shift;
4446                 int dummy;
4447                 wide_int wd = rtx_mode_t (op1, int_mode);
4448                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4449
4450                 if (wi::popcount (wd) == 1)
4451                   {
4452                     pre_shift = floor_log2 (d);
4453                     if (rem_flag)
4454                       {
4455                         unsigned HOST_WIDE_INT mask
4456                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4457                         remainder
4458                           = expand_binop (int_mode, and_optab, op0,
4459                                           gen_int_mode (mask, int_mode),
4460                                           remainder, 1, methods);
4461                         if (remainder)
4462                           return gen_lowpart (mode, remainder);
4463                       }
4464                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4465                                              pre_shift, tquotient, 1);
4466                   }
4467                 else if (size <= HOST_BITS_PER_WIDE_INT)
4468                   {
4469                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4470                       {
4471                         /* Most significant bit of divisor is set; emit an scc
4472                            insn.  */
4473                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4474                                                           int_mode, 1, 1);
4475                       }
4476                     else
4477                       {
4478                         /* Find a suitable multiplier and right shift count
4479                            instead of multiplying with D.  */
4480
4481                         mh = choose_multiplier (d, size, size,
4482                                                 &ml, &post_shift, &dummy);
4483
4484                         /* If the suggested multiplier is more than SIZE bits,
4485                            we can do better for even divisors, using an
4486                            initial right shift.  */
4487                         if (mh != 0 && (d & 1) == 0)
4488                           {
4489                             pre_shift = ctz_or_zero (d);
4490                             mh = choose_multiplier (d >> pre_shift, size,
4491                                                     size - pre_shift,
4492                                                     &ml, &post_shift, &dummy);
4493                             gcc_assert (!mh);
4494                           }
4495                         else
4496                           pre_shift = 0;
4497
4498                         if (mh != 0)
4499                           {
4500                             rtx t1, t2, t3, t4;
4501
4502                             if (post_shift - 1 >= BITS_PER_WORD)
4503                               goto fail1;
4504
4505                             extra_cost
4506                               = (shift_cost (speed, int_mode, post_shift - 1)
4507                                  + shift_cost (speed, int_mode, 1)
4508                                  + 2 * add_cost (speed, int_mode));
4509                             t1 = expmed_mult_highpart
4510                               (int_mode, op0, gen_int_mode (ml, int_mode),
4511                                NULL_RTX, 1, max_cost - extra_cost);
4512                             if (t1 == 0)
4513                               goto fail1;
4514                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4515                                                                op0, t1),
4516                                                 NULL_RTX);
4517                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4518                                                t2, 1, NULL_RTX, 1);
4519                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4520                                                               t1, t3),
4521                                                 NULL_RTX);
4522                             quotient = expand_shift
4523                               (RSHIFT_EXPR, int_mode, t4,
4524                                post_shift - 1, tquotient, 1);
4525                           }
4526                         else
4527                           {
4528                             rtx t1, t2;
4529
4530                             if (pre_shift >= BITS_PER_WORD
4531                                 || post_shift >= BITS_PER_WORD)
4532                               goto fail1;
4533
4534                             t1 = expand_shift
4535                               (RSHIFT_EXPR, int_mode, op0,
4536                                pre_shift, NULL_RTX, 1);
4537                             extra_cost
4538                               = (shift_cost (speed, int_mode, pre_shift)
4539                                  + shift_cost (speed, int_mode, post_shift));
4540                             t2 = expmed_mult_highpart
4541                               (int_mode, t1,
4542                                gen_int_mode (ml, int_mode),
4543                                NULL_RTX, 1, max_cost - extra_cost);
4544                             if (t2 == 0)
4545                               goto fail1;
4546                             quotient = expand_shift
4547                               (RSHIFT_EXPR, int_mode, t2,
4548                                post_shift, tquotient, 1);
4549                           }
4550                       }
4551                   }
4552                 else            /* Too wide mode to use tricky code */
4553                   break;
4554
4555                 insn = get_last_insn ();
4556                 if (insn != last)
4557                   set_dst_reg_note (insn, REG_EQUAL,
4558                                     gen_rtx_UDIV (int_mode, op0, op1),
4559                                     quotient);
4560               }
4561             else                /* TRUNC_DIV, signed */
4562               {
4563                 unsigned HOST_WIDE_INT ml;
4564                 int lgup, post_shift;
4565                 rtx mlr;
4566                 HOST_WIDE_INT d = INTVAL (op1);
4567                 unsigned HOST_WIDE_INT abs_d;
4568
4569                 /* Not prepared to handle division/remainder by
4570                    0xffffffffffffffff8000000000000000 etc.  */
4571                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4572                   break;
4573
4574                 /* Since d might be INT_MIN, we have to cast to
4575                    unsigned HOST_WIDE_INT before negating to avoid
4576                    undefined signed overflow.  */
4577                 abs_d = (d >= 0
4578                          ? (unsigned HOST_WIDE_INT) d
4579                          : - (unsigned HOST_WIDE_INT) d);
4580
4581                 /* n rem d = n rem -d */
4582                 if (rem_flag && d < 0)
4583                   {
4584                     d = abs_d;
4585                     op1 = gen_int_mode (abs_d, int_mode);
4586                   }
4587
4588                 if (d == 1)
4589                   quotient = op0;
4590                 else if (d == -1)
4591                   quotient = expand_unop (int_mode, neg_optab, op0,
4592                                           tquotient, 0);
4593                 else if (size <= HOST_BITS_PER_WIDE_INT
4594                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4595                   {
4596                     /* This case is not handled correctly below.  */
4597                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4598                                                 int_mode, 1, 1);
4599                     if (quotient == 0)
4600                       goto fail1;
4601                   }
4602                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4603                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4604                          && (rem_flag
4605                              ? smod_pow2_cheap (speed, int_mode)
4606                              : sdiv_pow2_cheap (speed, int_mode))
4607                          /* We assume that cheap metric is true if the
4608                             optab has an expander for this mode.  */
4609                          && ((optab_handler ((rem_flag ? smod_optab
4610                                               : sdiv_optab),
4611                                              int_mode)
4612                               != CODE_FOR_nothing)
4613                              || (optab_handler (sdivmod_optab, int_mode)
4614                                  != CODE_FOR_nothing)))
4615                   ;
4616                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4617                   {
4618                     if (rem_flag)
4619                       {
4620                         remainder = expand_smod_pow2 (int_mode, op0, d);
4621                         if (remainder)
4622                           return gen_lowpart (mode, remainder);
4623                       }
4624
4625                     if (sdiv_pow2_cheap (speed, int_mode)
4626                         && ((optab_handler (sdiv_optab, int_mode)
4627                              != CODE_FOR_nothing)
4628                             || (optab_handler (sdivmod_optab, int_mode)
4629                                 != CODE_FOR_nothing)))
4630                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4631                                                 int_mode, op0,
4632                                                 gen_int_mode (abs_d,
4633                                                               int_mode),
4634                                                 NULL_RTX, 0);
4635                     else
4636                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4637
4638                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4639                        negate the quotient.  */
4640                     if (d < 0)
4641                       {
4642                         insn = get_last_insn ();
4643                         if (insn != last
4644                             && abs_d < (HOST_WIDE_INT_1U
4645                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4646                           set_dst_reg_note (insn, REG_EQUAL,
4647                                             gen_rtx_DIV (int_mode, op0,
4648                                                          gen_int_mode
4649                                                            (abs_d,
4650                                                             int_mode)),
4651                                             quotient);
4652
4653                         quotient = expand_unop (int_mode, neg_optab,
4654                                                 quotient, quotient, 0);
4655                       }
4656                   }
4657                 else if (size <= HOST_BITS_PER_WIDE_INT)
4658                   {
4659                     choose_multiplier (abs_d, size, size - 1,
4660                                        &ml, &post_shift, &lgup);
4661                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4662                       {
4663                         rtx t1, t2, t3;
4664
4665                         if (post_shift >= BITS_PER_WORD
4666                             || size - 1 >= BITS_PER_WORD)
4667                           goto fail1;
4668
4669                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4670                                       + shift_cost (speed, int_mode, size - 1)
4671                                       + add_cost (speed, int_mode));
4672                         t1 = expmed_mult_highpart
4673                           (int_mode, op0, gen_int_mode (ml, int_mode),
4674                            NULL_RTX, 0, max_cost - extra_cost);
4675                         if (t1 == 0)
4676                           goto fail1;
4677                         t2 = expand_shift
4678                           (RSHIFT_EXPR, int_mode, t1,
4679                            post_shift, NULL_RTX, 0);
4680                         t3 = expand_shift
4681                           (RSHIFT_EXPR, int_mode, op0,
4682                            size - 1, NULL_RTX, 0);
4683                         if (d < 0)
4684                           quotient
4685                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4686                                              tquotient);
4687                         else
4688                           quotient
4689                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4690                                              tquotient);
4691                       }
4692                     else
4693                       {
4694                         rtx t1, t2, t3, t4;
4695
4696                         if (post_shift >= BITS_PER_WORD
4697                             || size - 1 >= BITS_PER_WORD)
4698                           goto fail1;
4699
4700                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4701                         mlr = gen_int_mode (ml, int_mode);
4702                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4703                                       + shift_cost (speed, int_mode, size - 1)
4704                                       + 2 * add_cost (speed, int_mode));
4705                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4706                                                    NULL_RTX, 0,
4707                                                    max_cost - extra_cost);
4708                         if (t1 == 0)
4709                           goto fail1;
4710                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4711                                             NULL_RTX);
4712                         t3 = expand_shift
4713                           (RSHIFT_EXPR, int_mode, t2,
4714                            post_shift, NULL_RTX, 0);
4715                         t4 = expand_shift
4716                           (RSHIFT_EXPR, int_mode, op0,
4717                            size - 1, NULL_RTX, 0);
4718                         if (d < 0)
4719                           quotient
4720                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4721                                              tquotient);
4722                         else
4723                           quotient
4724                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4725                                              tquotient);
4726                       }
4727                   }
4728                 else            /* Too wide mode to use tricky code */
4729                   break;
4730
4731                 insn = get_last_insn ();
4732                 if (insn != last)
4733                   set_dst_reg_note (insn, REG_EQUAL,
4734                                     gen_rtx_DIV (int_mode, op0, op1),
4735                                     quotient);
4736               }
4737             break;
4738           }
4739       fail1:
4740         delete_insns_since (last);
4741         break;
4742
4743       case FLOOR_DIV_EXPR:
4744       case FLOOR_MOD_EXPR:
4745       /* We will come here only for signed operations.  */
4746         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4747           {
4748             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4749             int size = GET_MODE_BITSIZE (int_mode);
4750             unsigned HOST_WIDE_INT mh, ml;
4751             int pre_shift, lgup, post_shift;
4752             HOST_WIDE_INT d = INTVAL (op1);
4753
4754             if (d > 0)
4755               {
4756                 /* We could just as easily deal with negative constants here,
4757                    but it does not seem worth the trouble for GCC 2.6.  */
4758                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4759                   {
4760                     pre_shift = floor_log2 (d);
4761                     if (rem_flag)
4762                       {
4763                         unsigned HOST_WIDE_INT mask
4764                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4765                         remainder = expand_binop
4766                           (int_mode, and_optab, op0,
4767                            gen_int_mode (mask, int_mode),
4768                            remainder, 0, methods);
4769                         if (remainder)
4770                           return gen_lowpart (mode, remainder);
4771                       }
4772                     quotient = expand_shift
4773                       (RSHIFT_EXPR, int_mode, op0,
4774                        pre_shift, tquotient, 0);
4775                   }
4776                 else
4777                   {
4778                     rtx t1, t2, t3, t4;
4779
4780                     mh = choose_multiplier (d, size, size - 1,
4781                                             &ml, &post_shift, &lgup);
4782                     gcc_assert (!mh);
4783
4784                     if (post_shift < BITS_PER_WORD
4785                         && size - 1 < BITS_PER_WORD)
4786                       {
4787                         t1 = expand_shift
4788                           (RSHIFT_EXPR, int_mode, op0,
4789                            size - 1, NULL_RTX, 0);
4790                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4791                                            NULL_RTX, 0, OPTAB_WIDEN);
4792                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4793                                       + shift_cost (speed, int_mode, size - 1)
4794                                       + 2 * add_cost (speed, int_mode));
4795                         t3 = expmed_mult_highpart
4796                           (int_mode, t2, gen_int_mode (ml, int_mode),
4797                            NULL_RTX, 1, max_cost - extra_cost);
4798                         if (t3 != 0)
4799                           {
4800                             t4 = expand_shift
4801                               (RSHIFT_EXPR, int_mode, t3,
4802                                post_shift, NULL_RTX, 1);
4803                             quotient = expand_binop (int_mode, xor_optab,
4804                                                      t4, t1, tquotient, 0,
4805                                                      OPTAB_WIDEN);
4806                           }
4807                       }
4808                   }
4809               }
4810             else
4811               {
4812                 rtx nsign, t1, t2, t3, t4;
4813                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4814                                                   op0, constm1_rtx), NULL_RTX);
4815                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4816                                    0, OPTAB_WIDEN);
4817                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4818                                       size - 1, NULL_RTX, 0);
4819                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4820                                     NULL_RTX);
4821                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4822                                     NULL_RTX, 0);
4823                 if (t4)
4824                   {
4825                     rtx t5;
4826                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4827                                       NULL_RTX, 0);
4828                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4829                                               tquotient);
4830                   }
4831               }
4832           }
4833
4834         if (quotient != 0)
4835           break;
4836         delete_insns_since (last);
4837
4838         /* Try using an instruction that produces both the quotient and
4839            remainder, using truncation.  We can easily compensate the quotient
4840            or remainder to get floor rounding, once we have the remainder.
4841            Notice that we compute also the final remainder value here,
4842            and return the result right away.  */
4843         if (target == 0 || GET_MODE (target) != compute_mode)
4844           target = gen_reg_rtx (compute_mode);
4845
4846         if (rem_flag)
4847           {
4848             remainder
4849               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4850             quotient = gen_reg_rtx (compute_mode);
4851           }
4852         else
4853           {
4854             quotient
4855               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4856             remainder = gen_reg_rtx (compute_mode);
4857           }
4858
4859         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4860                                  quotient, remainder, 0))
4861           {
4862             /* This could be computed with a branch-less sequence.
4863                Save that for later.  */
4864             rtx tem;
4865             rtx_code_label *label = gen_label_rtx ();
4866             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4867             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4868                                 NULL_RTX, 0, OPTAB_WIDEN);
4869             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4870             expand_dec (quotient, const1_rtx);
4871             expand_inc (remainder, op1);
4872             emit_label (label);
4873             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4874           }
4875
4876         /* No luck with division elimination or divmod.  Have to do it
4877            by conditionally adjusting op0 *and* the result.  */
4878         {
4879           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4880           rtx adjusted_op0;
4881           rtx tem;
4882
4883           quotient = gen_reg_rtx (compute_mode);
4884           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4885           label1 = gen_label_rtx ();
4886           label2 = gen_label_rtx ();
4887           label3 = gen_label_rtx ();
4888           label4 = gen_label_rtx ();
4889           label5 = gen_label_rtx ();
4890           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4891           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4892           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4893                               quotient, 0, methods);
4894           if (tem != quotient)
4895             emit_move_insn (quotient, tem);
4896           emit_jump_insn (targetm.gen_jump (label5));
4897           emit_barrier ();
4898           emit_label (label1);
4899           expand_inc (adjusted_op0, const1_rtx);
4900           emit_jump_insn (targetm.gen_jump (label4));
4901           emit_barrier ();
4902           emit_label (label2);
4903           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4904           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4905                               quotient, 0, methods);
4906           if (tem != quotient)
4907             emit_move_insn (quotient, tem);
4908           emit_jump_insn (targetm.gen_jump (label5));
4909           emit_barrier ();
4910           emit_label (label3);
4911           expand_dec (adjusted_op0, const1_rtx);
4912           emit_label (label4);
4913           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4914                               quotient, 0, methods);
4915           if (tem != quotient)
4916             emit_move_insn (quotient, tem);
4917           expand_dec (quotient, const1_rtx);
4918           emit_label (label5);
4919         }
4920         break;
4921
4922       case CEIL_DIV_EXPR:
4923       case CEIL_MOD_EXPR:
4924         if (unsignedp)
4925           {
4926             if (op1_is_constant
4927                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4928                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4929                     || INTVAL (op1) >= 0))
4930               {
4931                 scalar_int_mode int_mode
4932                   = as_a <scalar_int_mode> (compute_mode);
4933                 rtx t1, t2, t3;
4934                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4935                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4936                                    floor_log2 (d), tquotient, 1);
4937                 t2 = expand_binop (int_mode, and_optab, op0,
4938                                    gen_int_mode (d - 1, int_mode),
4939                                    NULL_RTX, 1, methods);
4940                 t3 = gen_reg_rtx (int_mode);
4941                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4942                 if (t3 == 0)
4943                   {
4944                     rtx_code_label *lab;
4945                     lab = gen_label_rtx ();
4946                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4947                     expand_inc (t1, const1_rtx);
4948                     emit_label (lab);
4949                     quotient = t1;
4950                   }
4951                 else
4952                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4953                                             tquotient);
4954                 break;
4955               }
4956
4957             /* Try using an instruction that produces both the quotient and
4958                remainder, using truncation.  We can easily compensate the
4959                quotient or remainder to get ceiling rounding, once we have the
4960                remainder.  Notice that we compute also the final remainder
4961                value here, and return the result right away.  */
4962             if (target == 0 || GET_MODE (target) != compute_mode)
4963               target = gen_reg_rtx (compute_mode);
4964
4965             if (rem_flag)
4966               {
4967                 remainder = (REG_P (target)
4968                              ? target : gen_reg_rtx (compute_mode));
4969                 quotient = gen_reg_rtx (compute_mode);
4970               }
4971             else
4972               {
4973                 quotient = (REG_P (target)
4974                             ? target : gen_reg_rtx (compute_mode));
4975                 remainder = gen_reg_rtx (compute_mode);
4976               }
4977
4978             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4979                                      remainder, 1))
4980               {
4981                 /* This could be computed with a branch-less sequence.
4982                    Save that for later.  */
4983                 rtx_code_label *label = gen_label_rtx ();
4984                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4985                                  compute_mode, label);
4986                 expand_inc (quotient, const1_rtx);
4987                 expand_dec (remainder, op1);
4988                 emit_label (label);
4989                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4990               }
4991
4992             /* No luck with division elimination or divmod.  Have to do it
4993                by conditionally adjusting op0 *and* the result.  */
4994             {
4995               rtx_code_label *label1, *label2;
4996               rtx adjusted_op0, tem;
4997
4998               quotient = gen_reg_rtx (compute_mode);
4999               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5000               label1 = gen_label_rtx ();
5001               label2 = gen_label_rtx ();
5002               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
5003                                compute_mode, label1);
5004               emit_move_insn  (quotient, const0_rtx);
5005               emit_jump_insn (targetm.gen_jump (label2));
5006               emit_barrier ();
5007               emit_label (label1);
5008               expand_dec (adjusted_op0, const1_rtx);
5009               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
5010                                   quotient, 1, methods);
5011               if (tem != quotient)
5012                 emit_move_insn (quotient, tem);
5013               expand_inc (quotient, const1_rtx);
5014               emit_label (label2);
5015             }
5016           }
5017         else /* signed */
5018           {
5019             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5020                 && INTVAL (op1) >= 0)
5021               {
5022                 /* This is extremely similar to the code for the unsigned case
5023                    above.  For 2.7 we should merge these variants, but for
5024                    2.6.1 I don't want to touch the code for unsigned since that
5025                    get used in C.  The signed case will only be used by other
5026                    languages (Ada).  */
5027
5028                 rtx t1, t2, t3;
5029                 unsigned HOST_WIDE_INT d = INTVAL (op1);
5030                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5031                                    floor_log2 (d), tquotient, 0);
5032                 t2 = expand_binop (compute_mode, and_optab, op0,
5033                                    gen_int_mode (d - 1, compute_mode),
5034                                    NULL_RTX, 1, methods);
5035                 t3 = gen_reg_rtx (compute_mode);
5036                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5037                                       compute_mode, 1, 1);
5038                 if (t3 == 0)
5039                   {
5040                     rtx_code_label *lab;
5041                     lab = gen_label_rtx ();
5042                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5043                     expand_inc (t1, const1_rtx);
5044                     emit_label (lab);
5045                     quotient = t1;
5046                   }
5047                 else
5048                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5049                                                           t1, t3),
5050                                             tquotient);
5051                 break;
5052               }
5053
5054             /* Try using an instruction that produces both the quotient and
5055                remainder, using truncation.  We can easily compensate the
5056                quotient or remainder to get ceiling rounding, once we have the
5057                remainder.  Notice that we compute also the final remainder
5058                value here, and return the result right away.  */
5059             if (target == 0 || GET_MODE (target) != compute_mode)
5060               target = gen_reg_rtx (compute_mode);
5061             if (rem_flag)
5062               {
5063                 remainder= (REG_P (target)
5064                             ? target : gen_reg_rtx (compute_mode));
5065                 quotient = gen_reg_rtx (compute_mode);
5066               }
5067             else
5068               {
5069                 quotient = (REG_P (target)
5070                             ? target : gen_reg_rtx (compute_mode));
5071                 remainder = gen_reg_rtx (compute_mode);
5072               }
5073
5074             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5075                                      remainder, 0))
5076               {
5077                 /* This could be computed with a branch-less sequence.
5078                    Save that for later.  */
5079                 rtx tem;
5080                 rtx_code_label *label = gen_label_rtx ();
5081                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5082                                  compute_mode, label);
5083                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5084                                     NULL_RTX, 0, OPTAB_WIDEN);
5085                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5086                 expand_inc (quotient, const1_rtx);
5087                 expand_dec (remainder, op1);
5088                 emit_label (label);
5089                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5090               }
5091
5092             /* No luck with division elimination or divmod.  Have to do it
5093                by conditionally adjusting op0 *and* the result.  */
5094             {
5095               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5096               rtx adjusted_op0;
5097               rtx tem;
5098
5099               quotient = gen_reg_rtx (compute_mode);
5100               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5101               label1 = gen_label_rtx ();
5102               label2 = gen_label_rtx ();
5103               label3 = gen_label_rtx ();
5104               label4 = gen_label_rtx ();
5105               label5 = gen_label_rtx ();
5106               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5107               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5108                                compute_mode, label1);
5109               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5110                                   quotient, 0, methods);
5111               if (tem != quotient)
5112                 emit_move_insn (quotient, tem);
5113               emit_jump_insn (targetm.gen_jump (label5));
5114               emit_barrier ();
5115               emit_label (label1);
5116               expand_dec (adjusted_op0, const1_rtx);
5117               emit_jump_insn (targetm.gen_jump (label4));
5118               emit_barrier ();
5119               emit_label (label2);
5120               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5121                                compute_mode, label3);
5122               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5123                                   quotient, 0, methods);
5124               if (tem != quotient)
5125                 emit_move_insn (quotient, tem);
5126               emit_jump_insn (targetm.gen_jump (label5));
5127               emit_barrier ();
5128               emit_label (label3);
5129               expand_inc (adjusted_op0, const1_rtx);
5130               emit_label (label4);
5131               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5132                                   quotient, 0, methods);
5133               if (tem != quotient)
5134                 emit_move_insn (quotient, tem);
5135               expand_inc (quotient, const1_rtx);
5136               emit_label (label5);
5137             }
5138           }
5139         break;
5140
5141       case EXACT_DIV_EXPR:
5142         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5143           {
5144             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5145             int size = GET_MODE_BITSIZE (int_mode);
5146             HOST_WIDE_INT d = INTVAL (op1);
5147             unsigned HOST_WIDE_INT ml;
5148             int pre_shift;
5149             rtx t1;
5150
5151             pre_shift = ctz_or_zero (d);
5152             ml = invert_mod2n (d >> pre_shift, size);
5153             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5154                                pre_shift, NULL_RTX, unsignedp);
5155             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5156                                     NULL_RTX, 1);
5157
5158             insn = get_last_insn ();
5159             set_dst_reg_note (insn, REG_EQUAL,
5160                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5161                                               int_mode, op0, op1),
5162                               quotient);
5163           }
5164         break;
5165
5166       case ROUND_DIV_EXPR:
5167       case ROUND_MOD_EXPR:
5168         if (unsignedp)
5169           {
5170             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5171             rtx tem;
5172             rtx_code_label *label;
5173             label = gen_label_rtx ();
5174             quotient = gen_reg_rtx (int_mode);
5175             remainder = gen_reg_rtx (int_mode);
5176             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5177               {
5178                 rtx tem;
5179                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5180                                          quotient, 1, methods);
5181                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5182                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5183                                           remainder, 1, methods);
5184               }
5185             tem = plus_constant (int_mode, op1, -1);
5186             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5187             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5188             expand_inc (quotient, const1_rtx);
5189             expand_dec (remainder, op1);
5190             emit_label (label);
5191           }
5192         else
5193           {
5194             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5195             int size = GET_MODE_BITSIZE (int_mode);
5196             rtx abs_rem, abs_op1, tem, mask;
5197             rtx_code_label *label;
5198             label = gen_label_rtx ();
5199             quotient = gen_reg_rtx (int_mode);
5200             remainder = gen_reg_rtx (int_mode);
5201             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5202               {
5203                 rtx tem;
5204                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5205                                          quotient, 0, methods);
5206                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5207                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5208                                           remainder, 0, methods);
5209               }
5210             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5211             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5212             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5213                                 1, NULL_RTX, 1);
5214             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5215             tem = expand_binop (int_mode, xor_optab, op0, op1,
5216                                 NULL_RTX, 0, OPTAB_WIDEN);
5217             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5218                                  size - 1, NULL_RTX, 0);
5219             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5220                                 NULL_RTX, 0, OPTAB_WIDEN);
5221             tem = expand_binop (int_mode, sub_optab, tem, mask,
5222                                 NULL_RTX, 0, OPTAB_WIDEN);
5223             expand_inc (quotient, tem);
5224             tem = expand_binop (int_mode, xor_optab, mask, op1,
5225                                 NULL_RTX, 0, OPTAB_WIDEN);
5226             tem = expand_binop (int_mode, sub_optab, tem, mask,
5227                                 NULL_RTX, 0, OPTAB_WIDEN);
5228             expand_dec (remainder, tem);
5229             emit_label (label);
5230           }
5231         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5232
5233       default:
5234         gcc_unreachable ();
5235       }
5236
5237   if (quotient == 0)
5238     {
5239       if (target && GET_MODE (target) != compute_mode)
5240         target = 0;
5241
5242       if (rem_flag)
5243         {
5244           /* Try to produce the remainder without producing the quotient.
5245              If we seem to have a divmod pattern that does not require widening,
5246              don't try widening here.  We should really have a WIDEN argument
5247              to expand_twoval_binop, since what we'd really like to do here is
5248              1) try a mod insn in compute_mode
5249              2) try a divmod insn in compute_mode
5250              3) try a div insn in compute_mode and multiply-subtract to get
5251                 remainder
5252              4) try the same things with widening allowed.  */
5253           remainder
5254             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5255                                  op0, op1, target,
5256                                  unsignedp,
5257                                  ((optab_handler (optab2, compute_mode)
5258                                    != CODE_FOR_nothing)
5259                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5260           if (remainder == 0)
5261             {
5262               /* No luck there.  Can we do remainder and divide at once
5263                  without a library call?  */
5264               remainder = gen_reg_rtx (compute_mode);
5265               if (! expand_twoval_binop ((unsignedp
5266                                           ? udivmod_optab
5267                                           : sdivmod_optab),
5268                                          op0, op1,
5269                                          NULL_RTX, remainder, unsignedp))
5270                 remainder = 0;
5271             }
5272
5273           if (remainder)
5274             return gen_lowpart (mode, remainder);
5275         }
5276
5277       /* Produce the quotient.  Try a quotient insn, but not a library call.
5278          If we have a divmod in this mode, use it in preference to widening
5279          the div (for this test we assume it will not fail). Note that optab2
5280          is set to the one of the two optabs that the call below will use.  */
5281       quotient
5282         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5283                              op0, op1, rem_flag ? NULL_RTX : target,
5284                              unsignedp,
5285                              ((optab_handler (optab2, compute_mode)
5286                                != CODE_FOR_nothing)
5287                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5288
5289       if (quotient == 0)
5290         {
5291           /* No luck there.  Try a quotient-and-remainder insn,
5292              keeping the quotient alone.  */
5293           quotient = gen_reg_rtx (compute_mode);
5294           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5295                                      op0, op1,
5296                                      quotient, NULL_RTX, unsignedp))
5297             {
5298               quotient = 0;
5299               if (! rem_flag)
5300                 /* Still no luck.  If we are not computing the remainder,
5301                    use a library call for the quotient.  */
5302                 quotient = sign_expand_binop (compute_mode,
5303                                               udiv_optab, sdiv_optab,
5304                                               op0, op1, target,
5305                                               unsignedp, methods);
5306             }
5307         }
5308     }
5309
5310   if (rem_flag)
5311     {
5312       if (target && GET_MODE (target) != compute_mode)
5313         target = 0;
5314
5315       if (quotient == 0)
5316         {
5317           /* No divide instruction either.  Use library for remainder.  */
5318           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5319                                          op0, op1, target,
5320                                          unsignedp, methods);
5321           /* No remainder function.  Try a quotient-and-remainder
5322              function, keeping the remainder.  */
5323           if (!remainder
5324               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5325             {
5326               remainder = gen_reg_rtx (compute_mode);
5327               if (!expand_twoval_binop_libfunc
5328                   (unsignedp ? udivmod_optab : sdivmod_optab,
5329                    op0, op1,
5330                    NULL_RTX, remainder,
5331                    unsignedp ? UMOD : MOD))
5332                 remainder = NULL_RTX;
5333             }
5334         }
5335       else
5336         {
5337           /* We divided.  Now finish doing X - Y * (X / Y).  */
5338           remainder = expand_mult (compute_mode, quotient, op1,
5339                                    NULL_RTX, unsignedp);
5340           remainder = expand_binop (compute_mode, sub_optab, op0,
5341                                     remainder, target, unsignedp,
5342                                     methods);
5343         }
5344     }
5345
5346   if (methods != OPTAB_LIB_WIDEN
5347       && (rem_flag ? remainder : quotient) == NULL_RTX)
5348     return NULL_RTX;
5349
5350   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5351 }
5352 \f
5353 /* Return a tree node with data type TYPE, describing the value of X.
5354    Usually this is an VAR_DECL, if there is no obvious better choice.
5355    X may be an expression, however we only support those expressions
5356    generated by loop.c.  */
5357
5358 tree
5359 make_tree (tree type, rtx x)
5360 {
5361   tree t;
5362
5363   switch (GET_CODE (x))
5364     {
5365     case CONST_INT:
5366     case CONST_WIDE_INT:
5367       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5368       return t;
5369
5370     case CONST_DOUBLE:
5371       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5372       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5373         t = wide_int_to_tree (type,
5374                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5375                                                     HOST_BITS_PER_WIDE_INT * 2));
5376       else
5377         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5378
5379       return t;
5380
5381     case CONST_VECTOR:
5382       {
5383         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5384         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5385         tree itype = TREE_TYPE (type);
5386
5387         /* Build a tree with vector elements.  */
5388         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5389         unsigned int count = elts.encoded_nelts ();
5390         for (unsigned int i = 0; i < count; ++i)
5391           {
5392             rtx elt = CONST_VECTOR_ELT (x, i);
5393             elts.quick_push (make_tree (itype, elt));
5394           }
5395
5396         return elts.build ();
5397       }
5398
5399     case PLUS:
5400       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5401                           make_tree (type, XEXP (x, 1)));
5402
5403     case MINUS:
5404       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5405                           make_tree (type, XEXP (x, 1)));
5406
5407     case NEG:
5408       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5409
5410     case MULT:
5411       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5412                           make_tree (type, XEXP (x, 1)));
5413
5414     case ASHIFT:
5415       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5416                           make_tree (type, XEXP (x, 1)));
5417
5418     case LSHIFTRT:
5419       t = unsigned_type_for (type);
5420       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5421                                          make_tree (t, XEXP (x, 0)),
5422                                          make_tree (type, XEXP (x, 1))));
5423
5424     case ASHIFTRT:
5425       t = signed_type_for (type);
5426       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5427                                          make_tree (t, XEXP (x, 0)),
5428                                          make_tree (type, XEXP (x, 1))));
5429
5430     case DIV:
5431       if (TREE_CODE (type) != REAL_TYPE)
5432         t = signed_type_for (type);
5433       else
5434         t = type;
5435
5436       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5437                                          make_tree (t, XEXP (x, 0)),
5438                                          make_tree (t, XEXP (x, 1))));
5439     case UDIV:
5440       t = unsigned_type_for (type);
5441       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5442                                          make_tree (t, XEXP (x, 0)),
5443                                          make_tree (t, XEXP (x, 1))));
5444
5445     case SIGN_EXTEND:
5446     case ZERO_EXTEND:
5447       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5448                                           GET_CODE (x) == ZERO_EXTEND);
5449       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5450
5451     case CONST:
5452       return make_tree (type, XEXP (x, 0));
5453
5454     case SYMBOL_REF:
5455       t = SYMBOL_REF_DECL (x);
5456       if (t)
5457         return fold_convert (type, build_fold_addr_expr (t));
5458       /* fall through.  */
5459
5460     default:
5461       if (CONST_POLY_INT_P (x))
5462         return wide_int_to_tree (t, const_poly_int_value (x));
5463
5464       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5465
5466       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5467          address mode to pointer mode.  */
5468       if (POINTER_TYPE_P (type))
5469         x = convert_memory_address_addr_space
5470           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5471
5472       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5473          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5474       t->decl_with_rtl.rtl = x;
5475
5476       return t;
5477     }
5478 }
5479 \f
5480 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5481    and returning TARGET.
5482
5483    If TARGET is 0, a pseudo-register or constant is returned.  */
5484
5485 rtx
5486 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5487 {
5488   rtx tem = 0;
5489
5490   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5491     tem = simplify_binary_operation (AND, mode, op0, op1);
5492   if (tem == 0)
5493     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5494
5495   if (target == 0)
5496     target = tem;
5497   else if (tem != target)
5498     emit_move_insn (target, tem);
5499   return target;
5500 }
5501
5502 /* Helper function for emit_store_flag.  */
5503 rtx
5504 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5505              machine_mode mode, machine_mode compare_mode,
5506              int unsignedp, rtx x, rtx y, int normalizep,
5507              machine_mode target_mode)
5508 {
5509   class expand_operand ops[4];
5510   rtx op0, comparison, subtarget;
5511   rtx_insn *last;
5512   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5513   scalar_int_mode int_target_mode;
5514
5515   last = get_last_insn ();
5516   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5517   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5518   if (!x || !y)
5519     {
5520       delete_insns_since (last);
5521       return NULL_RTX;
5522     }
5523
5524   if (target_mode == VOIDmode)
5525     int_target_mode = result_mode;
5526   else
5527     int_target_mode = as_a <scalar_int_mode> (target_mode);
5528   if (!target)
5529     target = gen_reg_rtx (int_target_mode);
5530
5531   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5532
5533   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5534   create_fixed_operand (&ops[1], comparison);
5535   create_fixed_operand (&ops[2], x);
5536   create_fixed_operand (&ops[3], y);
5537   if (!maybe_expand_insn (icode, 4, ops))
5538     {
5539       delete_insns_since (last);
5540       return NULL_RTX;
5541     }
5542   subtarget = ops[0].value;
5543
5544   /* If we are converting to a wider mode, first convert to
5545      INT_TARGET_MODE, then normalize.  This produces better combining
5546      opportunities on machines that have a SIGN_EXTRACT when we are
5547      testing a single bit.  This mostly benefits the 68k.
5548
5549      If STORE_FLAG_VALUE does not have the sign bit set when
5550      interpreted in MODE, we can do this conversion as unsigned, which
5551      is usually more efficient.  */
5552   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5553     {
5554       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5555                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5556
5557       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5558       convert_move (target, subtarget, unsignedp);
5559
5560       op0 = target;
5561       result_mode = int_target_mode;
5562     }
5563   else
5564     op0 = subtarget;
5565
5566   /* If we want to keep subexpressions around, don't reuse our last
5567      target.  */
5568   if (optimize)
5569     subtarget = 0;
5570
5571   /* Now normalize to the proper value in MODE.  Sometimes we don't
5572      have to do anything.  */
5573   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5574     ;
5575   /* STORE_FLAG_VALUE might be the most negative number, so write
5576      the comparison this way to avoid a compiler-time warning.  */
5577   else if (- normalizep == STORE_FLAG_VALUE)
5578     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5579
5580   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5581      it hard to use a value of just the sign bit due to ANSI integer
5582      constant typing rules.  */
5583   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5584     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5585                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5586                         normalizep == 1);
5587   else
5588     {
5589       gcc_assert (STORE_FLAG_VALUE & 1);
5590
5591       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5592       if (normalizep == -1)
5593         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5594     }
5595
5596   /* If we were converting to a smaller mode, do the conversion now.  */
5597   if (int_target_mode != result_mode)
5598     {
5599       convert_move (target, op0, 0);
5600       return target;
5601     }
5602   else
5603     return op0;
5604 }
5605
5606
5607 /* A subroutine of emit_store_flag only including "tricks" that do not
5608    need a recursive call.  These are kept separate to avoid infinite
5609    loops.  */
5610
5611 static rtx
5612 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5613                    machine_mode mode, int unsignedp, int normalizep,
5614                    machine_mode target_mode)
5615 {
5616   rtx subtarget;
5617   enum insn_code icode;
5618   machine_mode compare_mode;
5619   enum mode_class mclass;
5620   enum rtx_code scode;
5621
5622   if (unsignedp)
5623     code = unsigned_condition (code);
5624   scode = swap_condition (code);
5625
5626   /* If one operand is constant, make it the second one.  Only do this
5627      if the other operand is not constant as well.  */
5628
5629   if (swap_commutative_operands_p (op0, op1))
5630     {
5631       std::swap (op0, op1);
5632       code = swap_condition (code);
5633     }
5634
5635   if (mode == VOIDmode)
5636     mode = GET_MODE (op0);
5637
5638   if (CONST_SCALAR_INT_P (op1))
5639     canonicalize_comparison (mode, &code, &op1);
5640
5641   /* For some comparisons with 1 and -1, we can convert this to
5642      comparisons with zero.  This will often produce more opportunities for
5643      store-flag insns.  */
5644
5645   switch (code)
5646     {
5647     case LT:
5648       if (op1 == const1_rtx)
5649         op1 = const0_rtx, code = LE;
5650       break;
5651     case LE:
5652       if (op1 == constm1_rtx)
5653         op1 = const0_rtx, code = LT;
5654       break;
5655     case GE:
5656       if (op1 == const1_rtx)
5657         op1 = const0_rtx, code = GT;
5658       break;
5659     case GT:
5660       if (op1 == constm1_rtx)
5661         op1 = const0_rtx, code = GE;
5662       break;
5663     case GEU:
5664       if (op1 == const1_rtx)
5665         op1 = const0_rtx, code = NE;
5666       break;
5667     case LTU:
5668       if (op1 == const1_rtx)
5669         op1 = const0_rtx, code = EQ;
5670       break;
5671     default:
5672       break;
5673     }
5674
5675   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5676      complement of A (for GE) and shifting the sign bit to the low bit.  */
5677   scalar_int_mode int_mode;
5678   if (op1 == const0_rtx && (code == LT || code == GE)
5679       && is_int_mode (mode, &int_mode)
5680       && (normalizep || STORE_FLAG_VALUE == 1
5681           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5682     {
5683       scalar_int_mode int_target_mode;
5684       subtarget = target;
5685
5686       if (!target)
5687         int_target_mode = int_mode;
5688       else
5689         {
5690           /* If the result is to be wider than OP0, it is best to convert it
5691              first.  If it is to be narrower, it is *incorrect* to convert it
5692              first.  */
5693           int_target_mode = as_a <scalar_int_mode> (target_mode);
5694           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5695             {
5696               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5697               int_mode = int_target_mode;
5698             }
5699         }
5700
5701       if (int_target_mode != int_mode)
5702         subtarget = 0;
5703
5704       if (code == GE)
5705         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5706                            ((STORE_FLAG_VALUE == 1 || normalizep)
5707                             ? 0 : subtarget), 0);
5708
5709       if (STORE_FLAG_VALUE == 1 || normalizep)
5710         /* If we are supposed to produce a 0/1 value, we want to do
5711            a logical shift from the sign bit to the low-order bit; for
5712            a -1/0 value, we do an arithmetic shift.  */
5713         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5714                             GET_MODE_BITSIZE (int_mode) - 1,
5715                             subtarget, normalizep != -1);
5716
5717       if (int_mode != int_target_mode)
5718         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5719
5720       return op0;
5721     }
5722
5723   /* Next try expanding this via the backend's cstore<mode>4.  */
5724   mclass = GET_MODE_CLASS (mode);
5725   FOR_EACH_WIDER_MODE_FROM (compare_mode, mode)
5726     {
5727      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5728      icode = optab_handler (cstore_optab, optab_mode);
5729      if (icode != CODE_FOR_nothing)
5730         {
5731           do_pending_stack_adjust ();
5732           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5733                                  unsignedp, op0, op1, normalizep, target_mode);
5734           if (tem)
5735             return tem;
5736
5737           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5738             {
5739               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5740                                  unsignedp, op1, op0, normalizep, target_mode);
5741               if (tem)
5742                 return tem;
5743             }
5744           break;
5745         }
5746     }
5747
5748   /* If we are comparing a double-word integer with zero or -1, we can
5749      convert the comparison into one involving a single word.  */
5750   if (is_int_mode (mode, &int_mode)
5751       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5752       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5753     {
5754       rtx tem;
5755       if ((code == EQ || code == NE)
5756           && (op1 == const0_rtx || op1 == constm1_rtx))
5757         {
5758           rtx op00, op01;
5759
5760           /* Do a logical OR or AND of the two words and compare the
5761              result.  */
5762           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5763           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5764           tem = expand_binop (word_mode,
5765                               op1 == const0_rtx ? ior_optab : and_optab,
5766                               op00, op01, NULL_RTX, unsignedp,
5767                               OPTAB_DIRECT);
5768
5769           if (tem != 0)
5770             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5771                                    unsignedp, normalizep);
5772         }
5773       else if ((code == LT || code == GE) && op1 == const0_rtx)
5774         {
5775           rtx op0h;
5776
5777           /* If testing the sign bit, can just test on high word.  */
5778           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5779                                       subreg_highpart_offset (word_mode,
5780                                                               int_mode));
5781           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5782                                  unsignedp, normalizep);
5783         }
5784       else
5785         tem = NULL_RTX;
5786
5787       if (tem)
5788         {
5789           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5790             return tem;
5791           if (!target)
5792             target = gen_reg_rtx (target_mode);
5793
5794           convert_move (target, tem,
5795                         !val_signbit_known_set_p (word_mode,
5796                                                   (normalizep ? normalizep
5797                                                    : STORE_FLAG_VALUE)));
5798           return target;
5799         }
5800     }
5801
5802   return 0;
5803 }
5804
5805 /* Subroutine of emit_store_flag that handles cases in which the operands
5806    are scalar integers.  SUBTARGET is the target to use for temporary
5807    operations and TRUEVAL is the value to store when the condition is
5808    true.  All other arguments are as for emit_store_flag.  */
5809
5810 rtx
5811 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5812                      rtx op1, scalar_int_mode mode, int unsignedp,
5813                      int normalizep, rtx trueval)
5814 {
5815   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5816   rtx_insn *last = get_last_insn ();
5817
5818   /* If this is an equality comparison of integers, we can try to exclusive-or
5819      (or subtract) the two operands and use a recursive call to try the
5820      comparison with zero.  Don't do any of these cases if branches are
5821      very cheap.  */
5822
5823   if ((code == EQ || code == NE) && op1 != const0_rtx)
5824     {
5825       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5826                               OPTAB_WIDEN);
5827
5828       if (tem == 0)
5829         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5830                             OPTAB_WIDEN);
5831       if (tem != 0)
5832         tem = emit_store_flag (target, code, tem, const0_rtx,
5833                                mode, unsignedp, normalizep);
5834       if (tem != 0)
5835         return tem;
5836
5837       delete_insns_since (last);
5838     }
5839
5840   /* For integer comparisons, try the reverse comparison.  However, for
5841      small X and if we'd have anyway to extend, implementing "X != 0"
5842      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5843   rtx_code rcode = reverse_condition (code);
5844   if (can_compare_p (rcode, mode, ccp_store_flag)
5845       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5846             && code == NE
5847             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5848             && op1 == const0_rtx))
5849     {
5850       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5851                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5852
5853       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5854       if (want_add
5855           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5856                        optimize_insn_for_speed_p ()) == 0)
5857         {
5858           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5859                                        STORE_FLAG_VALUE, target_mode);
5860           if (tem != 0)
5861             tem = expand_binop (target_mode, add_optab, tem,
5862                                 gen_int_mode (normalizep, target_mode),
5863                                 target, 0, OPTAB_WIDEN);
5864           if (tem != 0)
5865             return tem;
5866         }
5867       else if (!want_add
5868                && rtx_cost (trueval, mode, XOR, 1,
5869                             optimize_insn_for_speed_p ()) == 0)
5870         {
5871           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5872                                        normalizep, target_mode);
5873           if (tem != 0)
5874             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5875                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5876           if (tem != 0)
5877             return tem;
5878         }
5879
5880       delete_insns_since (last);
5881     }
5882
5883   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5884      the constant zero.  Reject all other comparisons at this point.  Only
5885      do LE and GT if branches are expensive since they are expensive on
5886      2-operand machines.  */
5887
5888   if (op1 != const0_rtx
5889       || (code != EQ && code != NE
5890           && (BRANCH_COST (optimize_insn_for_speed_p (),
5891                            false) <= 1 || (code != LE && code != GT))))
5892     return 0;
5893
5894   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5895      do the necessary operation below.  */
5896
5897   rtx tem = 0;
5898
5899   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5900      the sign bit set.  */
5901
5902   if (code == LE)
5903     {
5904       /* This is destructive, so SUBTARGET can't be OP0.  */
5905       if (rtx_equal_p (subtarget, op0))
5906         subtarget = 0;
5907
5908       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5909                           OPTAB_WIDEN);
5910       if (tem)
5911         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5912                             OPTAB_WIDEN);
5913     }
5914
5915   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5916      number of bits in the mode of OP0, minus one.  */
5917
5918   if (code == GT)
5919     {
5920       if (rtx_equal_p (subtarget, op0))
5921         subtarget = 0;
5922
5923       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5924                                 GET_MODE_BITSIZE (mode) - 1,
5925                                 subtarget, 0);
5926       if (tem)
5927         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5928                             OPTAB_WIDEN);
5929     }
5930
5931   if (code == EQ || code == NE)
5932     {
5933       /* For EQ or NE, one way to do the comparison is to apply an operation
5934          that converts the operand into a positive number if it is nonzero
5935          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5936          for NE we negate.  This puts the result in the sign bit.  Then we
5937          normalize with a shift, if needed.
5938
5939          Two operations that can do the above actions are ABS and FFS, so try
5940          them.  If that doesn't work, and MODE is smaller than a full word,
5941          we can use zero-extension to the wider mode (an unsigned conversion)
5942          as the operation.  */
5943
5944       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5945          that is compensated by the subsequent overflow when subtracting
5946          one / negating.  */
5947
5948       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5949         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5950       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5951         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5952       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5953         {
5954           tem = convert_modes (word_mode, mode, op0, 1);
5955           mode = word_mode;
5956         }
5957
5958       if (tem != 0)
5959         {
5960           if (code == EQ)
5961             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5962                                 0, OPTAB_WIDEN);
5963           else
5964             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5965         }
5966
5967       /* If we couldn't do it that way, for NE we can "or" the two's complement
5968          of the value with itself.  For EQ, we take the one's complement of
5969          that "or", which is an extra insn, so we only handle EQ if branches
5970          are expensive.  */
5971
5972       if (tem == 0
5973           && (code == NE
5974               || BRANCH_COST (optimize_insn_for_speed_p (),
5975                               false) > 1))
5976         {
5977           if (rtx_equal_p (subtarget, op0))
5978             subtarget = 0;
5979
5980           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5981           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5982                               OPTAB_WIDEN);
5983
5984           if (tem && code == EQ)
5985             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5986         }
5987     }
5988
5989   if (tem && normalizep)
5990     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5991                               GET_MODE_BITSIZE (mode) - 1,
5992                               subtarget, normalizep == 1);
5993
5994   if (tem)
5995     {
5996       if (!target)
5997         ;
5998       else if (GET_MODE (tem) != target_mode)
5999         {
6000           convert_move (target, tem, 0);
6001           tem = target;
6002         }
6003       else if (!subtarget)
6004         {
6005           emit_move_insn (target, tem);
6006           tem = target;
6007         }
6008     }
6009   else
6010     delete_insns_since (last);
6011
6012   return tem;
6013 }
6014
6015 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
6016    and storing in TARGET.  Normally return TARGET.
6017    Return 0 if that cannot be done.
6018
6019    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
6020    it is VOIDmode, they cannot both be CONST_INT.
6021
6022    UNSIGNEDP is for the case where we have to widen the operands
6023    to perform the operation.  It says to use zero-extension.
6024
6025    NORMALIZEP is 1 if we should convert the result to be either zero
6026    or one.  Normalize is -1 if we should convert the result to be
6027    either zero or -1.  If NORMALIZEP is zero, the result will be left
6028    "raw" out of the scc insn.  */
6029
6030 rtx
6031 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6032                  machine_mode mode, int unsignedp, int normalizep)
6033 {
6034   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6035   enum rtx_code rcode;
6036   rtx subtarget;
6037   rtx tem, trueval;
6038   rtx_insn *last;
6039
6040   /* If we compare constants, we shouldn't use a store-flag operation,
6041      but a constant load.  We can get there via the vanilla route that
6042      usually generates a compare-branch sequence, but will in this case
6043      fold the comparison to a constant, and thus elide the branch.  */
6044   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6045     return NULL_RTX;
6046
6047   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6048                            target_mode);
6049   if (tem)
6050     return tem;
6051
6052   /* If we reached here, we can't do this with a scc insn, however there
6053      are some comparisons that can be done in other ways.  Don't do any
6054      of these cases if branches are very cheap.  */
6055   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6056     return 0;
6057
6058   /* See what we need to return.  We can only return a 1, -1, or the
6059      sign bit.  */
6060
6061   if (normalizep == 0)
6062     {
6063       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6064         normalizep = STORE_FLAG_VALUE;
6065
6066       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6067         ;
6068       else
6069         return 0;
6070     }
6071
6072   last = get_last_insn ();
6073
6074   /* If optimizing, use different pseudo registers for each insn, instead
6075      of reusing the same pseudo.  This leads to better CSE, but slows
6076      down the compiler, since there are more pseudos.  */
6077   subtarget = (!optimize
6078                && (target_mode == mode)) ? target : NULL_RTX;
6079   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6080
6081   /* For floating-point comparisons, try the reverse comparison or try
6082      changing the "orderedness" of the comparison.  */
6083   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6084     {
6085       enum rtx_code first_code;
6086       bool and_them;
6087
6088       rcode = reverse_condition_maybe_unordered (code);
6089       if (can_compare_p (rcode, mode, ccp_store_flag)
6090           && (code == ORDERED || code == UNORDERED
6091               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6092               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6093         {
6094           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6095                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6096
6097           /* For the reverse comparison, use either an addition or a XOR.  */
6098           if (want_add
6099               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6100                            optimize_insn_for_speed_p ()) == 0)
6101             {
6102               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6103                                        STORE_FLAG_VALUE, target_mode);
6104               if (tem)
6105                 return expand_binop (target_mode, add_optab, tem,
6106                                      gen_int_mode (normalizep, target_mode),
6107                                      target, 0, OPTAB_WIDEN);
6108             }
6109           else if (!want_add
6110                    && rtx_cost (trueval, mode, XOR, 1,
6111                                 optimize_insn_for_speed_p ()) == 0)
6112             {
6113               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6114                                        normalizep, target_mode);
6115               if (tem)
6116                 return expand_binop (target_mode, xor_optab, tem, trueval,
6117                                      target, INTVAL (trueval) >= 0,
6118                                      OPTAB_WIDEN);
6119             }
6120         }
6121
6122       delete_insns_since (last);
6123
6124       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6125       if (code == ORDERED || code == UNORDERED)
6126         return 0;
6127
6128       and_them = split_comparison (code, mode, &first_code, &code);
6129
6130       /* If there are no NaNs, the first comparison should always fall through.
6131          Effectively change the comparison to the other one.  */
6132       if (!HONOR_NANS (mode))
6133         {
6134           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6135           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6136                                     target_mode);
6137         }
6138
6139       if (!HAVE_conditional_move)
6140         return 0;
6141
6142       /* Do not turn a trapping comparison into a non-trapping one.  */
6143       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6144           && flag_trapping_math)
6145         return 0;
6146
6147       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6148          conditional move.  */
6149       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6150                                normalizep, target_mode);
6151       if (tem == 0)
6152         return 0;
6153
6154       if (and_them)
6155         tem = emit_conditional_move (target, { code, op0, op1, mode },
6156                                      tem, const0_rtx, GET_MODE (tem), 0);
6157       else
6158         tem = emit_conditional_move (target, { code, op0, op1, mode },
6159                                      trueval, tem, GET_MODE (tem), 0);
6160
6161       if (tem == 0)
6162         delete_insns_since (last);
6163       return tem;
6164     }
6165
6166   /* The remaining tricks only apply to integer comparisons.  */
6167
6168   scalar_int_mode int_mode;
6169   if (is_int_mode (mode, &int_mode))
6170     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6171                                 unsignedp, normalizep, trueval);
6172
6173   return 0;
6174 }
6175
6176 /* Like emit_store_flag, but always succeeds.  */
6177
6178 rtx
6179 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6180                        machine_mode mode, int unsignedp, int normalizep)
6181 {
6182   rtx tem;
6183   rtx_code_label *label;
6184   rtx trueval, falseval;
6185
6186   /* First see if emit_store_flag can do the job.  */
6187   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6188   if (tem != 0)
6189     return tem;
6190
6191   /* If one operand is constant, make it the second one.  Only do this
6192      if the other operand is not constant as well.  */
6193   if (swap_commutative_operands_p (op0, op1))
6194     {
6195       std::swap (op0, op1);
6196       code = swap_condition (code);
6197     }
6198
6199   if (mode == VOIDmode)
6200     mode = GET_MODE (op0);
6201
6202   if (!target)
6203     target = gen_reg_rtx (word_mode);
6204
6205   /* If this failed, we have to do this with set/compare/jump/set code.
6206      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6207   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6208   if (code == NE
6209       && GET_MODE_CLASS (mode) == MODE_INT
6210       && REG_P (target)
6211       && op0 == target
6212       && op1 == const0_rtx)
6213     {
6214       label = gen_label_rtx ();
6215       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6216                                NULL_RTX, NULL, label,
6217                                profile_probability::uninitialized ());
6218       emit_move_insn (target, trueval);
6219       emit_label (label);
6220       return target;
6221     }
6222
6223   if (!REG_P (target)
6224       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6225     target = gen_reg_rtx (GET_MODE (target));
6226
6227   /* Jump in the right direction if the target cannot implement CODE
6228      but can jump on its reverse condition.  */
6229   falseval = const0_rtx;
6230   if (! can_compare_p (code, mode, ccp_jump)
6231       && (! FLOAT_MODE_P (mode)
6232           || code == ORDERED || code == UNORDERED
6233           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6234           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6235     {
6236       enum rtx_code rcode;
6237       if (FLOAT_MODE_P (mode))
6238         rcode = reverse_condition_maybe_unordered (code);
6239       else
6240         rcode = reverse_condition (code);
6241
6242       /* Canonicalize to UNORDERED for the libcall.  */
6243       if (can_compare_p (rcode, mode, ccp_jump)
6244           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6245         {
6246           falseval = trueval;
6247           trueval = const0_rtx;
6248           code = rcode;
6249         }
6250     }
6251
6252   emit_move_insn (target, trueval);
6253   label = gen_label_rtx ();
6254   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6255                            label, profile_probability::uninitialized ());
6256
6257   emit_move_insn (target, falseval);
6258   emit_label (label);
6259
6260   return target;
6261 }
6262
6263 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6264    and exclusive ranges in order to create an equivalent comparison.  See
6265    canonicalize_cmp_for_target for the possible cases.  */
6266
6267 static enum rtx_code
6268 equivalent_cmp_code (enum rtx_code code)
6269 {
6270   switch (code)
6271     {
6272     case GT:
6273       return GE;
6274     case GE:
6275       return GT;
6276     case LT:
6277       return LE;
6278     case LE:
6279       return LT;
6280     case GTU:
6281       return GEU;
6282     case GEU:
6283       return GTU;
6284     case LTU:
6285       return LEU;
6286     case LEU:
6287       return LTU;
6288
6289     default:
6290       return code;
6291     }
6292 }
6293
6294 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6295    purpose of this is to end up with an immediate which can be loaded into a
6296    register in fewer moves, if possible.
6297
6298    For each integer comparison there exists an equivalent choice:
6299      i)   a >  b or a >= b + 1
6300      ii)  a <= b or a <  b + 1
6301      iii) a >= b or a >  b - 1
6302      iv)  a <  b or a <= b - 1
6303
6304    MODE is the mode of the first operand.
6305    CODE points to the comparison code.
6306    IMM points to the rtx containing the immediate.  *IMM must satisfy
6307    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6308    on exit.  */
6309
6310 void
6311 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6312 {
6313   if (!SCALAR_INT_MODE_P (mode))
6314     return;
6315
6316   int to_add = 0;
6317   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6318
6319   /* Extract the immediate value from the rtx.  */
6320   wide_int imm_val = rtx_mode_t (*imm, mode);
6321
6322   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6323     to_add = 1;
6324   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6325     to_add = -1;
6326   else
6327     return;
6328
6329   /* Check for overflow/underflow in the case of signed values and
6330      wrapping around in the case of unsigned values.  If any occur
6331      cancel the optimization.  */
6332   wi::overflow_type overflow = wi::OVF_NONE;
6333   wide_int imm_modif;
6334
6335   if (to_add == 1)
6336     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6337   else
6338     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6339
6340   if (overflow)
6341     return;
6342
6343   /* The following creates a pseudo; if we cannot do that, bail out.  */
6344   if (!can_create_pseudo_p ())
6345     return;
6346
6347   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6348   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6349
6350   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6351   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6352
6353   /* Update the immediate and the code.  */
6354   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6355     {
6356       *code = equivalent_cmp_code (*code);
6357       *imm = new_imm;
6358     }
6359 }
6360
6361
6362 \f
6363 /* Perform possibly multi-word comparison and conditional jump to LABEL
6364    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6365    now a thin wrapper around do_compare_rtx_and_jump.  */
6366
6367 static void
6368 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6369                  rtx_code_label *label)
6370 {
6371   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6372   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6373                            NULL, label, profile_probability::uninitialized ());
6374 }