gcc/expmed.cc

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2024 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46 #include "recog.h"
  47
  48 struct target_expmed default_target_expmed;
  49 #if SWITCHABLE_TARGET
  50 struct target_expmed *this_target_expmed = &default_target_expmed;
  51 #endif
  52
  53 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  54                                       unsigned HOST_WIDE_INT,
  55                                       unsigned HOST_WIDE_INT,
  56                                       poly_uint64, poly_uint64,
  57                                       machine_mode, rtx, bool, bool);
  58 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  59                                    unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    poly_uint64, poly_uint64,
  62                                    rtx, scalar_int_mode, bool);
  63 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  64                                      unsigned HOST_WIDE_INT,
  65                                      unsigned HOST_WIDE_INT,
  66                                      rtx, scalar_int_mode, bool);
  67 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  68                                    unsigned HOST_WIDE_INT,
  69                                    unsigned HOST_WIDE_INT,
  70                                    poly_uint64, poly_uint64,
  71                                    rtx, scalar_int_mode, bool);
  72 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  73                                        unsigned HOST_WIDE_INT,
  74                                        unsigned HOST_WIDE_INT, int, rtx,
  75                                        machine_mode, machine_mode, bool, bool);
  76 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  77                                     unsigned HOST_WIDE_INT,
  78                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  79 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  80                                       unsigned HOST_WIDE_INT,
  81                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  82 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  83 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  84                                     unsigned HOST_WIDE_INT,
  85                                     unsigned HOST_WIDE_INT, int, bool);
  86 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  87 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  89
  90 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  91    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  92    The mask is truncated if necessary to the width of mode MODE.  The
  93    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  94
  95 static inline rtx
  96 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  97 {
  98   return immed_wide_int_const
  99     (wi::shifted_mask (bitpos, bitsize, complement,
 100                        GET_MODE_PRECISION (mode)), mode);
 101 }
 102
 103 /* Test whether a value is zero of a power of two.  */
 104 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 105   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 106
 107 struct init_expmed_rtl
 108 {
 109   rtx reg;
 110   rtx plus;
 111   rtx neg;
 112   rtx mult;
 113   rtx sdiv;
 114   rtx udiv;
 115   rtx sdiv_32;
 116   rtx smod_32;
 117   rtx wide_mult;
 118   rtx wide_lshr;
 119   rtx wide_trunc;
 120   rtx shift;
 121   rtx shift_mult;
 122   rtx shift_add;
 123   rtx shift_sub0;
 124   rtx shift_sub1;
 125   rtx zext;
 126   rtx trunc;
 127
 128   rtx pow2[MAX_BITS_PER_WORD];
 129   rtx cint[MAX_BITS_PER_WORD];
 130 };
 131
 132 static void
 133 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 134                       scalar_int_mode from_mode, bool speed)
 135 {
 136   int to_size, from_size;
 137   rtx which;
 138
 139   to_size = GET_MODE_PRECISION (to_mode);
 140   from_size = GET_MODE_PRECISION (from_mode);
 141
 142   /* Most partial integers have a precision less than the "full"
 143      integer it requires for storage.  In case one doesn't, for
 144      comparison purposes here, reduce the bit size by one in that
 145      case.  */
 146   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 147       && pow2p_hwi (to_size))
 148     to_size --;
 149   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 150       && pow2p_hwi (from_size))
 151     from_size --;
 152
 153   /* Assume cost of zero-extend and sign-extend is the same.  */
 154   which = (to_size < from_size ? all->trunc : all->zext);
 155
 156   PUT_MODE (all->reg, from_mode);
 157   set_convert_cost (to_mode, from_mode, speed,
 158                     set_src_cost (which, to_mode, speed));
 159   /* Restore all->reg's mode.  */
 160   PUT_MODE (all->reg, to_mode);
 161 }
 162
 163 static void
 164 init_expmed_one_mode (struct init_expmed_rtl *all,
 165                       machine_mode mode, int speed)
 166 {
 167   int m, n, mode_bitsize;
 168   machine_mode mode_from;
 169
 170   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 171
 172   PUT_MODE (all->reg, mode);
 173   PUT_MODE (all->plus, mode);
 174   PUT_MODE (all->neg, mode);
 175   PUT_MODE (all->mult, mode);
 176   PUT_MODE (all->sdiv, mode);
 177   PUT_MODE (all->udiv, mode);
 178   PUT_MODE (all->sdiv_32, mode);
 179   PUT_MODE (all->smod_32, mode);
 180   PUT_MODE (all->wide_trunc, mode);
 181   PUT_MODE (all->shift, mode);
 182   PUT_MODE (all->shift_mult, mode);
 183   PUT_MODE (all->shift_add, mode);
 184   PUT_MODE (all->shift_sub0, mode);
 185   PUT_MODE (all->shift_sub1, mode);
 186   PUT_MODE (all->zext, mode);
 187   PUT_MODE (all->trunc, mode);
 188
 189   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 190   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 191   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 192   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 193   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 194
 195   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 196                                      <= 2 * add_cost (speed, mode)));
 197   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 198                                      <= 4 * add_cost (speed, mode)));
 199
 200   set_shift_cost (speed, mode, 0, 0);
 201   {
 202     int cost = add_cost (speed, mode);
 203     set_shiftadd_cost (speed, mode, 0, cost);
 204     set_shiftsub0_cost (speed, mode, 0, cost);
 205     set_shiftsub1_cost (speed, mode, 0, cost);
 206   }
 207
 208   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 209   for (m = 1; m < n; m++)
 210     {
 211       XEXP (all->shift, 1) = all->cint[m];
 212       XEXP (all->shift_mult, 1) = all->pow2[m];
 213
 214       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 215       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 216                                                        speed));
 217       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 218                                                         speed));
 219       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 220                                                         speed));
 221     }
 222
 223   scalar_int_mode int_mode_to;
 224   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 225     {
 226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 227            mode_from = (machine_mode)(mode_from + 1))
 228         init_expmed_one_conv (all, int_mode_to,
 229                               as_a <scalar_int_mode> (mode_from), speed);
 230
 231       scalar_int_mode wider_mode;
 232       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 233           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 234         {
 235           PUT_MODE (all->reg, mode);
 236           PUT_MODE (all->zext, wider_mode);
 237           PUT_MODE (all->wide_mult, wider_mode);
 238           PUT_MODE (all->wide_lshr, wider_mode);
 239           XEXP (all->wide_lshr, 1)
 240             = gen_int_shift_amount (wider_mode, mode_bitsize);
 241
 242           set_mul_widen_cost (speed, wider_mode,
 243                               set_src_cost (all->wide_mult, wider_mode, speed));
 244           set_mul_highpart_cost (speed, int_mode_to,
 245                                  set_src_cost (all->wide_trunc,
 246                                                int_mode_to, speed));
 247         }
 248     }
 249 }
 250
 251 void
 252 init_expmed (void)
 253 {
 254   struct init_expmed_rtl all;
 255   machine_mode mode = QImode;
 256   int m, speed;
 257
 258   memset (&all, 0, sizeof all);
 259   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 260     {
 261       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 262       all.cint[m] = GEN_INT (m);
 263     }
 264
 265   /* Avoid using hard regs in ways which may be unsupported.  */
 266   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 267   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 268   all.neg = gen_rtx_NEG (mode, all.reg);
 269   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 270   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 271   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 272   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 273   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 274   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 275   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 276   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 277   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 278   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 279   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 280   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 282   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 283   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 284
 285   for (speed = 0; speed < 2; speed++)
 286     {
 287       crtl->maybe_hot_insn_p = speed;
 288       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 289
 290       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 291            mode = (machine_mode)(mode + 1))
 292         init_expmed_one_mode (&all, mode, speed);
 293
 294       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 295         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 296              mode = (machine_mode)(mode + 1))
 297           init_expmed_one_mode (&all, mode, speed);
 298
 299       if (MIN_MODE_VECTOR_INT != VOIDmode)
 300         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 301              mode = (machine_mode)(mode + 1))
 302           init_expmed_one_mode (&all, mode, speed);
 303     }
 304
 305   if (alg_hash_used_p ())
 306     {
 307       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 308       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 309     }
 310   else
 311     set_alg_hash_used_p (true);
 312   default_rtl_profile ();
 313
 314   ggc_free (all.trunc);
 315   ggc_free (all.shift_sub1);
 316   ggc_free (all.shift_sub0);
 317   ggc_free (all.shift_add);
 318   ggc_free (all.shift_mult);
 319   ggc_free (all.shift);
 320   ggc_free (all.wide_trunc);
 321   ggc_free (all.wide_lshr);
 322   ggc_free (all.wide_mult);
 323   ggc_free (all.zext);
 324   ggc_free (all.smod_32);
 325   ggc_free (all.sdiv_32);
 326   ggc_free (all.udiv);
 327   ggc_free (all.sdiv);
 328   ggc_free (all.mult);
 329   ggc_free (all.neg);
 330   ggc_free (all.plus);
 331   ggc_free (all.reg);
 332 }
 333
 334 /* Return an rtx representing minus the value of X.
 335    MODE is the intended mode of the result,
 336    useful if X is a CONST_INT.  */
 337
 338 rtx
 339 negate_rtx (machine_mode mode, rtx x)
 340 {
 341   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 342
 343   if (result == 0)
 344     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 345
 346   return result;
 347 }
 348
 349 /* Whether reverse storage order is supported on the target.  */
 350 static int reverse_storage_order_supported = -1;
 351
 352 /* Check whether reverse storage order is supported on the target.  */
 353
 354 static void
 355 check_reverse_storage_order_support (void)
 356 {
 357   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 358     {
 359       reverse_storage_order_supported = 0;
 360       sorry ("reverse scalar storage order");
 361     }
 362   else
 363     reverse_storage_order_supported = 1;
 364 }
 365
 366 /* Whether reverse FP storage order is supported on the target.  */
 367 static int reverse_float_storage_order_supported = -1;
 368
 369 /* Check whether reverse FP storage order is supported on the target.  */
 370
 371 static void
 372 check_reverse_float_storage_order_support (void)
 373 {
 374   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 375     {
 376       reverse_float_storage_order_supported = 0;
 377       sorry ("reverse floating-point scalar storage order");
 378     }
 379   else
 380     reverse_float_storage_order_supported = 1;
 381 }
 382
 383 /* Return an rtx representing value of X with reverse storage order.
 384    MODE is the intended mode of the result,
 385    useful if X is a CONST_INT.  */
 386
 387 rtx
 388 flip_storage_order (machine_mode mode, rtx x)
 389 {
 390   scalar_int_mode int_mode;
 391   rtx result;
 392
 393   if (mode == QImode)
 394     return x;
 395
 396   if (COMPLEX_MODE_P (mode))
 397     {
 398       rtx real = read_complex_part (x, false);
 399       rtx imag = read_complex_part (x, true);
 400
 401       real = flip_storage_order (GET_MODE_INNER (mode), real);
 402       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 403
 404       return gen_rtx_CONCAT (mode, real, imag);
 405     }
 406
 407   if (UNLIKELY (reverse_storage_order_supported < 0))
 408     check_reverse_storage_order_support ();
 409
 410   if (!is_a <scalar_int_mode> (mode, &int_mode))
 411     {
 412       if (FLOAT_MODE_P (mode)
 413           && UNLIKELY (reverse_float_storage_order_supported < 0))
 414         check_reverse_float_storage_order_support ();
 415
 416       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 417           || !targetm.scalar_mode_supported_p (int_mode))
 418         {
 419           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 420           return x;
 421         }
 422       x = gen_lowpart (int_mode, x);
 423     }
 424
 425   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 426   if (result == 0)
 427     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 428
 429   if (int_mode != mode)
 430     result = gen_lowpart (mode, result);
 431
 432   return result;
 433 }
 434
 435 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 436    first unit of mode MODE that contains a bitfield of size BITSIZE at
 437    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 438    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 439    of the field within the new memory.  */
 440
 441 static rtx
 442 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 443                       unsigned HOST_WIDE_INT bitsize,
 444                       unsigned HOST_WIDE_INT bitnum,
 445                       unsigned HOST_WIDE_INT *new_bitnum)
 446 {
 447   scalar_int_mode imode;
 448   if (mode.exists (&imode))
 449     {
 450       unsigned int unit = GET_MODE_BITSIZE (imode);
 451       *new_bitnum = bitnum % unit;
 452       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 453       return adjust_bitfield_address (mem, imode, offset);
 454     }
 455   else
 456     {
 457       *new_bitnum = bitnum % BITS_PER_UNIT;
 458       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 459       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 460                             / BITS_PER_UNIT);
 461       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 462     }
 463 }
 464
 465 /* The caller wants to perform insertion or extraction PATTERN on a
 466    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 467    BITREGION_START and BITREGION_END are as for store_bit_field
 468    and FIELDMODE is the natural mode of the field.
 469
 470    Search for a mode that is compatible with the memory access
 471    restrictions and (where applicable) with a register insertion or
 472    extraction.  Return the new memory on success, storing the adjusted
 473    bit position in *NEW_BITNUM.  Return null otherwise.  */
 474
 475 static rtx
 476 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 477                               rtx op0, HOST_WIDE_INT bitsize,
 478                               HOST_WIDE_INT bitnum,
 479                               poly_uint64 bitregion_start,
 480                               poly_uint64 bitregion_end,
 481                               machine_mode fieldmode,
 482                               unsigned HOST_WIDE_INT *new_bitnum)
 483 {
 484   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 485                                 bitregion_end, MEM_ALIGN (op0),
 486                                 MEM_VOLATILE_P (op0));
 487   scalar_int_mode best_mode;
 488   if (iter.next_mode (&best_mode))
 489     {
 490       /* We can use a memory in BEST_MODE.  See whether this is true for
 491          any wider modes.  All other things being equal, we prefer to
 492          use the widest mode possible because it tends to expose more
 493          CSE opportunities.  */
 494       if (!iter.prefer_smaller_modes ())
 495         {
 496           /* Limit the search to the mode required by the corresponding
 497              register insertion or extraction instruction, if any.  */
 498           scalar_int_mode limit_mode = word_mode;
 499           extraction_insn insn;
 500           if (get_best_reg_extraction_insn (&insn, pattern,
 501                                             GET_MODE_BITSIZE (best_mode),
 502                                             fieldmode))
 503             limit_mode = insn.field_mode;
 504
 505           scalar_int_mode wider_mode;
 506           while (iter.next_mode (&wider_mode)
 507                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 508             best_mode = wider_mode;
 509         }
 510       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 511                                    new_bitnum);
 512     }
 513   return NULL_RTX;
 514 }
 515
 516 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 517    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 518    offset is then BITNUM / BITS_PER_UNIT.  */
 519
 520 static bool
 521 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 522                      machine_mode struct_mode)
 523 {
 524   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 525   if (BYTES_BIG_ENDIAN)
 526     return (multiple_p (bitnum, BITS_PER_UNIT)
 527             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 528                 || multiple_p (bitnum + bitsize,
 529                                regsize * BITS_PER_UNIT)));
 530   else
 531     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 532 }
 533
 534 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 535    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 536    Return false if the access would touch memory outside the range
 537    BITREGION_START to BITREGION_END for conformance to the C++ memory
 538    model.  */
 539
 540 static bool
 541 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 542                             unsigned HOST_WIDE_INT bitnum,
 543                             scalar_int_mode fieldmode,
 544                             poly_uint64 bitregion_start,
 545                             poly_uint64 bitregion_end)
 546 {
 547   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 548
 549   /* -fstrict-volatile-bitfields must be enabled and we must have a
 550      volatile MEM.  */
 551   if (!MEM_P (op0)
 552       || !MEM_VOLATILE_P (op0)
 553       || flag_strict_volatile_bitfields <= 0)
 554     return false;
 555
 556   /* The bit size must not be larger than the field mode, and
 557      the field mode must not be larger than a word.  */
 558   if (bitsize > modesize || modesize > BITS_PER_WORD)
 559     return false;
 560
 561   /* Check for cases of unaligned fields that must be split.  */
 562   if (bitnum % modesize + bitsize > modesize)
 563     return false;
 564
 565   /* The memory must be sufficiently aligned for a MODESIZE access.
 566      This condition guarantees, that the memory access will not
 567      touch anything after the end of the structure.  */
 568   if (MEM_ALIGN (op0) < modesize)
 569     return false;
 570
 571   /* Check for cases where the C++ memory model applies.  */
 572   if (maybe_ne (bitregion_end, 0U)
 573       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 574           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 575                        bitregion_end)))
 576     return false;
 577
 578   return true;
 579 }
 580
 581 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 582    bit number BITNUM can be treated as a simple value of mode MODE.
 583    Store the byte offset in *BYTENUM if so.  */
 584
 585 static bool
 586 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 587                        machine_mode mode, poly_uint64 *bytenum)
 588 {
 589   return (MEM_P (op0)
 590           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 591           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 592           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 593               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 594                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 595 }
 596 \f
 597 /* Try to use instruction INSV to store VALUE into a field of OP0.
 598    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 599    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 600    are as for store_bit_field.  */
 601
 602 static bool
 603 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 604                             opt_scalar_int_mode op0_mode,
 605                             unsigned HOST_WIDE_INT bitsize,
 606                             unsigned HOST_WIDE_INT bitnum,
 607                             rtx value, scalar_int_mode value_mode)
 608 {
 609   class expand_operand ops[4];
 610   rtx value1;
 611   rtx xop0 = op0;
 612   rtx_insn *last = get_last_insn ();
 613   bool copy_back = false;
 614
 615   scalar_int_mode op_mode = insv->field_mode;
 616   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 617   if (bitsize == 0 || bitsize > unit)
 618     return false;
 619
 620   if (MEM_P (xop0))
 621     /* Get a reference to the first byte of the field.  */
 622     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 623                                  &bitnum);
 624   else
 625     {
 626       /* Convert from counting within OP0 to counting in OP_MODE.  */
 627       if (BYTES_BIG_ENDIAN)
 628         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 629
 630       /* If xop0 is a register, we need it in OP_MODE
 631          to make it acceptable to the format of insv.  */
 632       if (GET_CODE (xop0) == SUBREG)
 633         {
 634           /* If such a SUBREG can't be created, give up.  */
 635           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 636                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 637             return false;
 638           /* We can't just change the mode, because this might clobber op0,
 639              and we will need the original value of op0 if insv fails.  */
 640           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 641                                  SUBREG_BYTE (xop0));
 642         }
 643       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 644         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 645     }
 646
 647   /* If the destination is a paradoxical subreg such that we need a
 648      truncate to the inner mode, perform the insertion on a temporary and
 649      truncate the result to the original destination.  Note that we can't
 650      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 651      X) 0)) is (reg:N X).  */
 652   if (GET_CODE (xop0) == SUBREG
 653       && REG_P (SUBREG_REG (xop0))
 654       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 655                                          op_mode))
 656     {
 657       rtx tem = gen_reg_rtx (op_mode);
 658       emit_move_insn (tem, xop0);
 659       xop0 = tem;
 660       copy_back = true;
 661     }
 662
 663   /* There are similar overflow check at the start of store_bit_field_1,
 664      but that only check the situation where the field lies completely
 665      outside the register, while there do have situation where the field
 666      lies partialy in the register, we need to adjust bitsize for this
 667      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 668      will broken on those arch support bit insert instruction, like arm, aarch64
 669      etc.  */
 670   if (bitsize + bitnum > unit && bitnum < unit)
 671     {
 672       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 673                "destination object, data truncated into %wu-bit",
 674                bitsize, unit - bitnum);
 675       bitsize = unit - bitnum;
 676     }
 677
 678   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 679      "backwards" from the size of the unit we are inserting into.
 680      Otherwise, we count bits from the most significant on a
 681      BYTES/BITS_BIG_ENDIAN machine.  */
 682
 683   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 684     bitnum = unit - bitsize - bitnum;
 685
 686   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 687   value1 = value;
 688   if (value_mode != op_mode)
 689     {
 690       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 691         {
 692           rtx tmp;
 693           /* Optimization: Don't bother really extending VALUE
 694              if it has all the bits we will actually use.  However,
 695              if we must narrow it, be sure we do it correctly.  */
 696
 697           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 698             {
 699               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 700               if (! tmp)
 701                 tmp = simplify_gen_subreg (op_mode,
 702                                            force_reg (value_mode, value1),
 703                                            value_mode, 0);
 704             }
 705           else
 706             {
 707               if (targetm.mode_rep_extended (op_mode, value_mode) != UNKNOWN)
 708                 tmp = simplify_gen_unary (TRUNCATE, op_mode,
 709                                           value1, value_mode);
 710               else
 711                 {
 712                   tmp = gen_lowpart_if_possible (op_mode, value1);
 713                   if (! tmp)
 714                     tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 715                 }
 716             }
 717           value1 = tmp;
 718         }
 719       else if (CONST_INT_P (value))
 720         value1 = gen_int_mode (INTVAL (value), op_mode);
 721       else
 722         /* Parse phase is supposed to make VALUE's data type
 723            match that of the component reference, which is a type
 724            at least as wide as the field; so VALUE should have
 725            a mode that corresponds to that type.  */
 726         gcc_assert (CONSTANT_P (value));
 727     }
 728
 729   create_fixed_operand (&ops[0], xop0);
 730   create_integer_operand (&ops[1], bitsize);
 731   create_integer_operand (&ops[2], bitnum);
 732   create_input_operand (&ops[3], value1, op_mode);
 733   if (maybe_expand_insn (insv->icode, 4, ops))
 734     {
 735       if (copy_back)
 736         convert_move (op0, xop0, true);
 737       return true;
 738     }
 739   delete_insns_since (last);
 740   return false;
 741 }
 742
 743 /* A subroutine of store_bit_field, with the same arguments.  Return true
 744    if the operation could be implemented.
 745
 746    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 747    no other way of implementing the operation.  If FALLBACK_P is false,
 748    return false instead.
 749
 750    if UNDEFINED_P is true then STR_RTX is undefined and may be set using
 751    a subreg instead.  */
 752
 753 static bool
 754 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 755                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 756                    machine_mode fieldmode,
 757                    rtx value, bool reverse, bool fallback_p, bool undefined_p)
 758 {
 759   rtx op0 = str_rtx;
 760
 761   while (GET_CODE (op0) == SUBREG)
 762     {
 763       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 764       op0 = SUBREG_REG (op0);
 765     }
 766
 767   /* No action is needed if the target is a register and if the field
 768      lies completely outside that register.  This can occur if the source
 769      code contains an out-of-bounds access to a small array.  */
 770   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 771     return true;
 772
 773   /* Use vec_set patterns for inserting parts of vectors whenever
 774      available.  */
 775   machine_mode outermode = GET_MODE (op0);
 776   scalar_mode innermode = GET_MODE_INNER (outermode);
 777   poly_uint64 pos;
 778   if (VECTOR_MODE_P (outermode)
 779       && !MEM_P (op0)
 780       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 781       && fieldmode == innermode
 782       && known_eq (bitsize, GET_MODE_PRECISION (innermode))
 783       && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
 784     {
 785       class expand_operand ops[3];
 786       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 787
 788       create_fixed_operand (&ops[0], op0);
 789       create_input_operand (&ops[1], value, innermode);
 790       create_integer_operand (&ops[2], pos);
 791       if (maybe_expand_insn (icode, 3, ops))
 792         return true;
 793     }
 794
 795   /* If the target is a register, overwriting the entire object, or storing
 796      a full-word or multi-word field can be done with just a SUBREG.  */
 797   if (!MEM_P (op0)
 798       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 799     {
 800       /* Use the subreg machinery either to narrow OP0 to the required
 801          words or to cope with mode punning between equal-sized modes.
 802          In the latter case, use subreg on the rhs side, not lhs.  */
 803       rtx sub;
 804       poly_uint64 bytenum;
 805       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 806       if (known_eq (bitnum, 0U)
 807           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 808         {
 809           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 810           if (sub)
 811             {
 812               if (reverse)
 813                 sub = flip_storage_order (GET_MODE (op0), sub);
 814               emit_move_insn (op0, sub);
 815               return true;
 816             }
 817         }
 818       else if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
 819                && (undefined_p
 820                    || (multiple_p (bitnum, regsize * BITS_PER_UNIT)
 821                        && multiple_p (bitsize, regsize * BITS_PER_UNIT)))
 822                && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
 823         {
 824           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), bytenum);
 825           if (sub)
 826             {
 827               if (reverse)
 828                 value = flip_storage_order (fieldmode, value);
 829               emit_move_insn (sub, value);
 830               return true;
 831             }
 832         }
 833     }
 834
 835   /* If the target is memory, storing any naturally aligned field can be
 836      done with a simple store.  For targets that support fast unaligned
 837      memory, any naturally sized, unit aligned field can be done directly.  */
 838   poly_uint64 bytenum;
 839   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 840     {
 841       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 842       if (reverse)
 843         value = flip_storage_order (fieldmode, value);
 844       emit_move_insn (op0, value);
 845       return true;
 846     }
 847
 848   /* It's possible we'll need to handle other cases here for
 849      polynomial bitnum and bitsize.  */
 850
 851   /* From here on we need to be looking at a fixed-size insertion.  */
 852   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 853   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 854
 855   /* Make sure we are playing with integral modes.  Pun with subregs
 856      if we aren't.  This must come after the entire register case above,
 857      since that case is valid for any mode.  The following cases are only
 858      valid for integral modes.  */
 859   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 860   scalar_int_mode imode;
 861   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 862     {
 863       if (MEM_P (op0))
 864         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 865                                             0, MEM_SIZE (op0));
 866       else if (!op0_mode.exists ())
 867         {
 868           if (ibitnum == 0
 869               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 870               && MEM_P (value)
 871               && !reverse)
 872             {
 873               value = adjust_address (value, GET_MODE (op0), 0);
 874               emit_move_insn (op0, value);
 875               return true;
 876             }
 877           if (!fallback_p)
 878             return false;
 879           rtx temp = assign_stack_temp (GET_MODE (op0),
 880                                         GET_MODE_SIZE (GET_MODE (op0)));
 881           emit_move_insn (temp, op0);
 882           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 883                              reverse, fallback_p, undefined_p);
 884           emit_move_insn (op0, temp);
 885           return true;
 886         }
 887       else
 888         op0 = gen_lowpart (op0_mode.require (), op0);
 889     }
 890
 891   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 892                                    bitregion_start, bitregion_end,
 893                                    fieldmode, value, reverse, fallback_p);
 894 }
 895
 896 /* Subroutine of store_bit_field_1, with the same arguments, except
 897    that BITSIZE and BITNUM are constant.  Handle cases specific to
 898    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 899    otherwise OP0 is a BLKmode MEM.  */
 900
 901 static bool
 902 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 903                           unsigned HOST_WIDE_INT bitsize,
 904                           unsigned HOST_WIDE_INT bitnum,
 905                           poly_uint64 bitregion_start,
 906                           poly_uint64 bitregion_end,
 907                           machine_mode fieldmode,
 908                           rtx value, bool reverse, bool fallback_p)
 909 {
 910   /* Storing an lsb-aligned field in a register
 911      can be done with a movstrict instruction.  */
 912
 913   if (!MEM_P (op0)
 914       && !reverse
 915       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 916       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 917       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 918     {
 919       class expand_operand ops[2];
 920       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 921       rtx arg0 = op0;
 922       unsigned HOST_WIDE_INT subreg_off;
 923
 924       if (GET_CODE (arg0) == SUBREG)
 925         {
 926           /* Else we've got some float mode source being extracted into
 927              a different float mode destination -- this combination of
 928              subregs results in Severe Tire Damage.  */
 929           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 930                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 931                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 932           arg0 = SUBREG_REG (arg0);
 933         }
 934
 935       subreg_off = bitnum / BITS_PER_UNIT;
 936       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
 937           /* STRICT_LOW_PART must have a non-paradoxical subreg as
 938              operand.  */
 939           && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
 940         {
 941           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 942
 943           create_fixed_operand (&ops[0], arg0);
 944           /* Shrink the source operand to FIELDMODE.  */
 945           create_convert_operand_to (&ops[1], value, fieldmode, false);
 946           if (maybe_expand_insn (icode, 2, ops))
 947             return true;
 948         }
 949     }
 950
 951   /* Handle fields bigger than a word.  */
 952
 953   if (bitsize > BITS_PER_WORD)
 954     {
 955       /* Here we transfer the words of the field
 956          in the order least significant first.
 957          This is because the most significant word is the one which may
 958          be less than full.
 959          However, only do that if the value is not BLKmode.  */
 960
 961       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 962       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 963       rtx_insn *last;
 964
 965       /* This is the mode we must force value to, so that there will be enough
 966          subwords to extract.  Note that fieldmode will often (always?) be
 967          VOIDmode, because that is what store_field uses to indicate that this
 968          is a bit field, but passing VOIDmode to operand_subword_force
 969          is not allowed.
 970
 971          The mode must be fixed-size, since insertions into variable-sized
 972          objects are meant to be handled before calling this function.  */
 973       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 974       if (value_mode == VOIDmode)
 975         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 976
 977       last = get_last_insn ();
 978       for (int i = 0; i < nwords; i++)
 979         {
 980           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 981              except maybe for the last iteration.  */
 982           const unsigned HOST_WIDE_INT new_bitsize
 983             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 984           /* Bit offset from the starting bit number in the target.  */
 985           const unsigned int bit_offset
 986             = backwards ^ reverse
 987               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 988               : i * BITS_PER_WORD;
 989           /* Starting word number in the value.  */
 990           const unsigned int wordnum
 991             = backwards
 992               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 993               : i;
 994           /* The chunk of the value in word_mode.  We use bit-field extraction
 995               in BLKmode to handle unaligned memory references and to shift the
 996               last chunk right on big-endian machines if need be.  */
 997           rtx value_word
 998             = fieldmode == BLKmode
 999               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
1000                                    1, NULL_RTX, word_mode, word_mode, false,
1001                                    NULL)
1002               : operand_subword_force (value, wordnum, value_mode);
1003
1004           if (!store_bit_field_1 (op0, new_bitsize,
1005                                   bitnum + bit_offset,
1006                                   bitregion_start, bitregion_end,
1007                                   word_mode,
1008                                   value_word, reverse, fallback_p, false))
1009             {
1010               delete_insns_since (last);
1011               return false;
1012             }
1013         }
1014       return true;
1015     }
1016
1017   /* If VALUE has a floating-point or complex mode, access it as an
1018      integer of the corresponding size.  This can occur on a machine
1019      with 64 bit registers that uses SFmode for float.  It can also
1020      occur for unaligned float or complex fields.  */
1021   rtx orig_value = value;
1022   scalar_int_mode value_mode;
1023   if (GET_MODE (value) == VOIDmode)
1024     /* By this point we've dealt with values that are bigger than a word,
1025        so word_mode is a conservatively correct choice.  */
1026     value_mode = word_mode;
1027   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1028     {
1029       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1030       value = gen_reg_rtx (value_mode);
1031       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1032     }
1033
1034   /* If OP0 is a multi-word register, narrow it to the affected word.
1035      If the region spans two words, defer to store_split_bit_field.
1036      Don't do this if op0 is a single hard register wider than word
1037      such as a float or vector register.  */
1038   if (!MEM_P (op0)
1039       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1040       && (!REG_P (op0)
1041           || !HARD_REGISTER_P (op0)
1042           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1043     {
1044       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1045         {
1046           if (!fallback_p)
1047             return false;
1048
1049           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1050                                  bitregion_start, bitregion_end,
1051                                  value, value_mode, reverse);
1052           return true;
1053         }
1054       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1055                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1056       gcc_assert (op0);
1057       op0_mode = word_mode;
1058       bitnum %= BITS_PER_WORD;
1059     }
1060
1061   /* From here on we can assume that the field to be stored in fits
1062      within a word.  If the destination is a register, it too fits
1063      in a word.  */
1064
1065   extraction_insn insv;
1066   if (!MEM_P (op0)
1067       && !reverse
1068       && get_best_reg_extraction_insn (&insv, EP_insv,
1069                                        GET_MODE_BITSIZE (op0_mode.require ()),
1070                                        fieldmode)
1071       && store_bit_field_using_insv (&insv, op0, op0_mode,
1072                                      bitsize, bitnum, value, value_mode))
1073     return true;
1074
1075   /* If OP0 is a memory, try copying it to a register and seeing if a
1076      cheap register alternative is available.  */
1077   if (MEM_P (op0) && !reverse)
1078     {
1079       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1080                                         fieldmode)
1081           && store_bit_field_using_insv (&insv, op0, op0_mode,
1082                                          bitsize, bitnum, value, value_mode))
1083         return true;
1084
1085       rtx_insn *last = get_last_insn ();
1086
1087       /* Try loading part of OP0 into a register, inserting the bitfield
1088          into that, and then copying the result back to OP0.  */
1089       unsigned HOST_WIDE_INT bitpos;
1090       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1091                                                bitregion_start, bitregion_end,
1092                                                fieldmode, &bitpos);
1093       if (xop0)
1094         {
1095           rtx tempreg = copy_to_reg (xop0);
1096           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1097                                  bitregion_start, bitregion_end,
1098                                  fieldmode, orig_value, reverse, false, false))
1099             {
1100               emit_move_insn (xop0, tempreg);
1101               return true;
1102             }
1103           delete_insns_since (last);
1104         }
1105     }
1106
1107   if (!fallback_p)
1108     return false;
1109
1110   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1111                          bitregion_end, value, value_mode, reverse);
1112   return true;
1113 }
1114
1115 /* Generate code to store value from rtx VALUE
1116    into a bit-field within structure STR_RTX
1117    containing BITSIZE bits starting at bit BITNUM.
1118
1119    BITREGION_START is bitpos of the first bitfield in this region.
1120    BITREGION_END is the bitpos of the ending bitfield in this region.
1121    These two fields are 0, if the C++ memory model does not apply,
1122    or we are not interested in keeping track of bitfield regions.
1123
1124    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1125
1126    If REVERSE is true, the store is to be done in reverse order.
1127
1128    If UNDEFINED_P is true then STR_RTX is currently undefined.  */
1129
1130 void
1131 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1132                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1133                  machine_mode fieldmode,
1134                  rtx value, bool reverse, bool undefined_p)
1135 {
1136   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1137   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1138   scalar_int_mode int_mode;
1139   if (bitsize.is_constant (&ibitsize)
1140       && bitnum.is_constant (&ibitnum)
1141       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1142       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1143                                      bitregion_start, bitregion_end))
1144     {
1145       /* Storing of a full word can be done with a simple store.
1146          We know here that the field can be accessed with one single
1147          instruction.  For targets that support unaligned memory,
1148          an unaligned access may be necessary.  */
1149       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1150         {
1151           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1152                                              ibitnum / BITS_PER_UNIT);
1153           if (reverse)
1154             value = flip_storage_order (int_mode, value);
1155           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1156           emit_move_insn (str_rtx, value);
1157         }
1158       else
1159         {
1160           rtx temp;
1161
1162           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1163                                           ibitnum, &ibitnum);
1164           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1165           temp = copy_to_reg (str_rtx);
1166           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1167                                   int_mode, value, reverse, true, undefined_p))
1168             gcc_unreachable ();
1169
1170           emit_move_insn (str_rtx, temp);
1171         }
1172
1173       return;
1174     }
1175
1176   /* Under the C++0x memory model, we must not touch bits outside the
1177      bit region.  Adjust the address to start at the beginning of the
1178      bit region.  */
1179   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1180     {
1181       scalar_int_mode best_mode;
1182       machine_mode addr_mode = VOIDmode;
1183
1184       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1185       bitnum -= bitregion_start;
1186       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1187       bitregion_end -= bitregion_start;
1188       bitregion_start = 0;
1189       if (bitsize.is_constant (&ibitsize)
1190           && bitnum.is_constant (&ibitnum)
1191           && get_best_mode (ibitsize, ibitnum,
1192                             bitregion_start, bitregion_end,
1193                             MEM_ALIGN (str_rtx), INT_MAX,
1194                             MEM_VOLATILE_P (str_rtx), &best_mode))
1195         addr_mode = best_mode;
1196       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1197                                               offset, size);
1198     }
1199
1200   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1201                           bitregion_start, bitregion_end,
1202                           fieldmode, value, reverse, true, undefined_p))
1203     gcc_unreachable ();
1204 }
1205 \f
1206 /* Use shifts and boolean operations to store VALUE into a bit field of
1207    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1208    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1209    the mode of VALUE.
1210
1211    If REVERSE is true, the store is to be done in reverse order.  */
1212
1213 static void
1214 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1215                        unsigned HOST_WIDE_INT bitsize,
1216                        unsigned HOST_WIDE_INT bitnum,
1217                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1218                        rtx value, scalar_int_mode value_mode, bool reverse)
1219 {
1220   /* There is a case not handled here:
1221      a structure with a known alignment of just a halfword
1222      and a field split across two aligned halfwords within the structure.
1223      Or likewise a structure with a known alignment of just a byte
1224      and a field split across two bytes.
1225      Such cases are not supposed to be able to occur.  */
1226
1227   scalar_int_mode best_mode;
1228   if (MEM_P (op0))
1229     {
1230       unsigned int max_bitsize = BITS_PER_WORD;
1231       scalar_int_mode imode;
1232       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1233         max_bitsize = GET_MODE_BITSIZE (imode);
1234
1235       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1236                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1237                           &best_mode))
1238         {
1239           /* The only way this should occur is if the field spans word
1240              boundaries.  */
1241           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1242                                  bitregion_start, bitregion_end,
1243                                  value, value_mode, reverse);
1244           return;
1245         }
1246
1247       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1248     }
1249   else
1250     best_mode = op0_mode.require ();
1251
1252   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1253                            value, value_mode, reverse);
1254 }
1255
1256 /* Helper function for store_fixed_bit_field, stores
1257    the bit field always using MODE, which is the mode of OP0.  The other
1258    arguments are as for store_fixed_bit_field.  */
1259
1260 static void
1261 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1262                          unsigned HOST_WIDE_INT bitsize,
1263                          unsigned HOST_WIDE_INT bitnum,
1264                          rtx value, scalar_int_mode value_mode, bool reverse)
1265 {
1266   rtx temp;
1267   int all_zero = 0;
1268   int all_one = 0;
1269
1270   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1271      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1272
1273   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1274     /* BITNUM is the distance between our msb
1275        and that of the containing datum.
1276        Convert it to the distance from the lsb.  */
1277     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1278
1279   /* Now BITNUM is always the distance between our lsb
1280      and that of OP0.  */
1281
1282   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1283      we must first convert its mode to MODE.  */
1284
1285   if (CONST_INT_P (value))
1286     {
1287       unsigned HOST_WIDE_INT v = UINTVAL (value);
1288
1289       if (bitsize < HOST_BITS_PER_WIDE_INT)
1290         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1291
1292       if (v == 0)
1293         all_zero = 1;
1294       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1295                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1296                || (bitsize == HOST_BITS_PER_WIDE_INT
1297                    && v == HOST_WIDE_INT_M1U))
1298         all_one = 1;
1299
1300       value = lshift_value (mode, v, bitnum);
1301     }
1302   else
1303     {
1304       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1305                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1306
1307       if (value_mode != mode)
1308         value = convert_to_mode (mode, value, 1);
1309
1310       if (must_and)
1311         value = expand_binop (mode, and_optab, value,
1312                               mask_rtx (mode, 0, bitsize, 0),
1313                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1314       if (bitnum > 0)
1315         value = expand_shift (LSHIFT_EXPR, mode, value,
1316                               bitnum, NULL_RTX, 1);
1317     }
1318
1319   if (reverse)
1320     value = flip_storage_order (mode, value);
1321
1322   /* Now clear the chosen bits in OP0,
1323      except that if VALUE is -1 we need not bother.  */
1324   /* We keep the intermediates in registers to allow CSE to combine
1325      consecutive bitfield assignments.  */
1326
1327   temp = force_reg (mode, op0);
1328
1329   if (! all_one)
1330     {
1331       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1332       if (reverse)
1333         mask = flip_storage_order (mode, mask);
1334       temp = expand_binop (mode, and_optab, temp, mask,
1335                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1336       temp = force_reg (mode, temp);
1337     }
1338
1339   /* Now logical-or VALUE into OP0, unless it is zero.  */
1340
1341   if (! all_zero)
1342     {
1343       temp = expand_binop (mode, ior_optab, temp, value,
1344                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1345       temp = force_reg (mode, temp);
1346     }
1347
1348   if (op0 != temp)
1349     {
1350       op0 = copy_rtx (op0);
1351       emit_move_insn (op0, temp);
1352     }
1353 }
1354 \f
1355 /* Store a bit field that is split across multiple accessible memory objects.
1356
1357    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1358    BITSIZE is the field width; BITPOS the position of its first bit
1359    (within the word).
1360    VALUE is the value to store, which has mode VALUE_MODE.
1361    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1362    a BLKmode MEM.
1363
1364    If REVERSE is true, the store is to be done in reverse order.
1365
1366    This does not yet handle fields wider than BITS_PER_WORD.  */
1367
1368 static void
1369 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1370                        unsigned HOST_WIDE_INT bitsize,
1371                        unsigned HOST_WIDE_INT bitpos,
1372                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1373                        rtx value, scalar_int_mode value_mode, bool reverse)
1374 {
1375   unsigned int unit, total_bits, bitsdone = 0;
1376
1377   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1378      much at a time.  */
1379   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1380     unit = BITS_PER_WORD;
1381   else
1382     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1383
1384   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1385      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1386      again, and we will mutually recurse forever.  */
1387   if (MEM_P (op0) && op0_mode.exists ())
1388     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1389
1390   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1391      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1392      that VALUE might be a floating-point constant.  */
1393   if (CONSTANT_P (value) && !CONST_INT_P (value))
1394     {
1395       rtx word = gen_lowpart_common (word_mode, value);
1396
1397       if (word && (value != word))
1398         value = word;
1399       else
1400         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1401       value_mode = word_mode;
1402     }
1403
1404   total_bits = GET_MODE_BITSIZE (value_mode);
1405
1406   while (bitsdone < bitsize)
1407     {
1408       unsigned HOST_WIDE_INT thissize;
1409       unsigned HOST_WIDE_INT thispos;
1410       unsigned HOST_WIDE_INT offset;
1411       rtx part;
1412
1413       offset = (bitpos + bitsdone) / unit;
1414       thispos = (bitpos + bitsdone) % unit;
1415
1416       /* When region of bytes we can touch is restricted, decrease
1417          UNIT close to the end of the region as needed.  If op0 is a REG
1418          or SUBREG of REG, don't do this, as there can't be data races
1419          on a register and we can expand shorter code in some cases.  */
1420       if (maybe_ne (bitregion_end, 0U)
1421           && unit > BITS_PER_UNIT
1422           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1423           && !REG_P (op0)
1424           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1425         {
1426           unit = unit / 2;
1427           continue;
1428         }
1429
1430       /* THISSIZE must not overrun a word boundary.  Otherwise,
1431          store_fixed_bit_field will call us again, and we will mutually
1432          recurse forever.  */
1433       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1434       thissize = MIN (thissize, unit - thispos);
1435
1436       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1437         {
1438           /* Fetch successively less significant portions.  */
1439           if (CONST_INT_P (value))
1440             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1441                              >> (bitsize - bitsdone - thissize))
1442                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1443           /* Likewise, but the source is little-endian.  */
1444           else if (reverse)
1445             part = extract_fixed_bit_field (word_mode, value, value_mode,
1446                                             thissize,
1447                                             bitsize - bitsdone - thissize,
1448                                             NULL_RTX, 1, false);
1449           else
1450             /* The args are chosen so that the last part includes the
1451                lsb.  Give extract_bit_field the value it needs (with
1452                endianness compensation) to fetch the piece we want.  */
1453             part = extract_fixed_bit_field (word_mode, value, value_mode,
1454                                             thissize,
1455                                             total_bits - bitsize + bitsdone,
1456                                             NULL_RTX, 1, false);
1457         }
1458       else
1459         {
1460           /* Fetch successively more significant portions.  */
1461           if (CONST_INT_P (value))
1462             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1463                              >> bitsdone)
1464                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1465           /* Likewise, but the source is big-endian.  */
1466           else if (reverse)
1467             part = extract_fixed_bit_field (word_mode, value, value_mode,
1468                                             thissize,
1469                                             total_bits - bitsdone - thissize,
1470                                             NULL_RTX, 1, false);
1471           else
1472             part = extract_fixed_bit_field (word_mode, value, value_mode,
1473                                             thissize, bitsdone, NULL_RTX,
1474                                             1, false);
1475         }
1476
1477       /* If OP0 is a register, then handle OFFSET here.  */
1478       rtx op0_piece = op0;
1479       opt_scalar_int_mode op0_piece_mode = op0_mode;
1480       if (SUBREG_P (op0) || REG_P (op0))
1481         {
1482           scalar_int_mode imode;
1483           if (op0_mode.exists (&imode)
1484               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1485             {
1486               if (offset)
1487                 op0_piece = const0_rtx;
1488             }
1489           else
1490             {
1491               op0_piece = operand_subword_force (op0,
1492                                                  offset * unit / BITS_PER_WORD,
1493                                                  GET_MODE (op0));
1494               op0_piece_mode = word_mode;
1495             }
1496           offset &= BITS_PER_WORD / unit - 1;
1497         }
1498
1499       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1500          it is just an out-of-bounds access.  Ignore it.  */
1501       if (op0_piece != const0_rtx)
1502         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1503                                offset * unit + thispos, bitregion_start,
1504                                bitregion_end, part, word_mode, reverse);
1505       bitsdone += thissize;
1506     }
1507 }
1508 \f
1509 /* A subroutine of extract_bit_field_1 that converts return value X
1510    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1511    to extract_bit_field.  */
1512
1513 static rtx
1514 convert_extracted_bit_field (rtx x, machine_mode mode,
1515                              machine_mode tmode, bool unsignedp)
1516 {
1517   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1518     return x;
1519
1520   /* If the x mode is not a scalar integral, first convert to the
1521      integer mode of that size and then access it as a floating-point
1522      value via a SUBREG.  */
1523   if (!SCALAR_INT_MODE_P (tmode))
1524     {
1525       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1526       x = convert_to_mode (int_mode, x, unsignedp);
1527       x = force_reg (int_mode, x);
1528       return gen_lowpart (tmode, x);
1529     }
1530
1531   return convert_to_mode (tmode, x, unsignedp);
1532 }
1533
1534 /* Try to use an ext(z)v pattern to extract a field from OP0.
1535    Return the extracted value on success, otherwise return null.
1536    EXTV describes the extraction instruction to use.  If OP0_MODE
1537    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1538    The other arguments are as for extract_bit_field.  */
1539
1540 static rtx
1541 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1542                               opt_scalar_int_mode op0_mode,
1543                               unsigned HOST_WIDE_INT bitsize,
1544                               unsigned HOST_WIDE_INT bitnum,
1545                               int unsignedp, rtx target,
1546                               machine_mode mode, machine_mode tmode)
1547 {
1548   class expand_operand ops[4];
1549   rtx spec_target = target;
1550   rtx spec_target_subreg = 0;
1551   scalar_int_mode ext_mode = extv->field_mode;
1552   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1553
1554   if (bitsize == 0 || unit < bitsize)
1555     return NULL_RTX;
1556
1557   if (MEM_P (op0))
1558     /* Get a reference to the first byte of the field.  */
1559     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1560                                 &bitnum);
1561   else
1562     {
1563       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1564       if (BYTES_BIG_ENDIAN)
1565         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1566
1567       /* If op0 is a register, we need it in EXT_MODE to make it
1568          acceptable to the format of ext(z)v.  */
1569       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1570         return NULL_RTX;
1571       if (REG_P (op0) && op0_mode.require () != ext_mode)
1572         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1573     }
1574
1575   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1576      "backwards" from the size of the unit we are extracting from.
1577      Otherwise, we count bits from the most significant on a
1578      BYTES/BITS_BIG_ENDIAN machine.  */
1579
1580   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1581     bitnum = unit - bitsize - bitnum;
1582
1583   if (target == 0)
1584     target = spec_target = gen_reg_rtx (tmode);
1585
1586   if (GET_MODE (target) != ext_mode)
1587     {
1588       rtx temp;
1589       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1590          between the mode of the extraction (word_mode) and the target
1591          mode.  Instead, create a temporary and use convert_move to set
1592          the target.  */
1593       if (REG_P (target)
1594           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1595           && (temp = gen_lowpart_if_possible (ext_mode, target)))
1596         {
1597           target = temp;
1598           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1599             spec_target_subreg = target;
1600         }
1601       else
1602         target = gen_reg_rtx (ext_mode);
1603     }
1604
1605   create_output_operand (&ops[0], target, ext_mode);
1606   create_fixed_operand (&ops[1], op0);
1607   create_integer_operand (&ops[2], bitsize);
1608   create_integer_operand (&ops[3], bitnum);
1609   if (maybe_expand_insn (extv->icode, 4, ops))
1610     {
1611       target = ops[0].value;
1612       if (target == spec_target)
1613         return target;
1614       if (target == spec_target_subreg)
1615         return spec_target;
1616       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1617     }
1618   return NULL_RTX;
1619 }
1620
1621 /* See whether it would be valid to extract the part of OP0 with
1622    mode OP0_MODE described by BITNUM and BITSIZE into a value of
1623    mode MODE using a subreg operation.
1624    Return the subreg if so, otherwise return null.  */
1625
1626 static rtx
1627 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1628                              machine_mode op0_mode,
1629                              poly_uint64 bitsize, poly_uint64 bitnum)
1630 {
1631   poly_uint64 bytenum;
1632   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1633       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1634       && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1635       && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1636     return simplify_gen_subreg (mode, op0, op0_mode, bytenum);
1637   return NULL_RTX;
1638 }
1639
1640 /* A subroutine of extract_bit_field, with the same arguments.
1641    If UNSIGNEDP is -1, the result need not be sign or zero extended.
1642    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1643    if we can find no other means of implementing the operation.
1644    if FALLBACK_P is false, return NULL instead.  */
1645
1646 static rtx
1647 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1648                      int unsignedp, rtx target, machine_mode mode,
1649                      machine_mode tmode, bool reverse, bool fallback_p,
1650                      rtx *alt_rtl)
1651 {
1652   rtx op0 = str_rtx;
1653   machine_mode mode1;
1654
1655   if (tmode == VOIDmode)
1656     tmode = mode;
1657
1658   while (GET_CODE (op0) == SUBREG)
1659     {
1660       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1661       op0 = SUBREG_REG (op0);
1662     }
1663
1664   /* If we have an out-of-bounds access to a register, just return an
1665      uninitialized register of the required mode.  This can occur if the
1666      source code contains an out-of-bounds access to a small array.  */
1667   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1668     return gen_reg_rtx (tmode);
1669
1670   if (REG_P (op0)
1671       && mode == GET_MODE (op0)
1672       && known_eq (bitnum, 0U)
1673       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1674     {
1675       if (reverse)
1676         op0 = flip_storage_order (mode, op0);
1677       /* We're trying to extract a full register from itself.  */
1678       return op0;
1679     }
1680
1681   /* First try to check for vector from vector extractions.  */
1682   if (VECTOR_MODE_P (GET_MODE (op0))
1683       && !MEM_P (op0)
1684       && VECTOR_MODE_P (tmode)
1685       && known_eq (bitsize, GET_MODE_PRECISION (tmode))
1686       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1687     {
1688       machine_mode new_mode = GET_MODE (op0);
1689       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1690         {
1691           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1692           poly_uint64 nunits;
1693           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1694                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1695               || !related_vector_mode (tmode, inner_mode,
1696                                        nunits).exists (&new_mode)
1697               || maybe_ne (GET_MODE_SIZE (new_mode),
1698                            GET_MODE_SIZE (GET_MODE (op0))))
1699             new_mode = VOIDmode;
1700         }
1701       poly_uint64 pos;
1702       if (new_mode != VOIDmode
1703           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1704               != CODE_FOR_nothing)
1705           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1706         {
1707           class expand_operand ops[3];
1708           machine_mode outermode = new_mode;
1709           machine_mode innermode = tmode;
1710           enum insn_code icode
1711             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1712
1713           if (new_mode != GET_MODE (op0))
1714             op0 = gen_lowpart (new_mode, op0);
1715           create_output_operand (&ops[0], target, innermode);
1716           ops[0].target = 1;
1717           create_input_operand (&ops[1], op0, outermode);
1718           create_integer_operand (&ops[2], pos);
1719           if (maybe_expand_insn (icode, 3, ops))
1720             {
1721               if (alt_rtl && ops[0].target)
1722                 *alt_rtl = target;
1723               target = ops[0].value;
1724               if (GET_MODE (target) != mode)
1725                 return gen_lowpart (tmode, target);
1726               return target;
1727             }
1728         }
1729     }
1730
1731   /* See if we can get a better vector mode before extracting.  */
1732   if (VECTOR_MODE_P (GET_MODE (op0))
1733       && !MEM_P (op0)
1734       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1735     {
1736       machine_mode new_mode;
1737
1738       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1739         new_mode = MIN_MODE_VECTOR_FLOAT;
1740       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1741         new_mode = MIN_MODE_VECTOR_FRACT;
1742       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1743         new_mode = MIN_MODE_VECTOR_UFRACT;
1744       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1745         new_mode = MIN_MODE_VECTOR_ACCUM;
1746       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1747         new_mode = MIN_MODE_VECTOR_UACCUM;
1748       else
1749         new_mode = MIN_MODE_VECTOR_INT;
1750
1751       FOR_EACH_MODE_FROM (new_mode, new_mode)
1752         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1753             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1754             && known_eq (bitsize, GET_MODE_UNIT_PRECISION (new_mode))
1755             && multiple_p (bitnum, GET_MODE_UNIT_PRECISION (new_mode))
1756             && targetm.vector_mode_supported_p (new_mode)
1757             && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1758           break;
1759       if (new_mode != VOIDmode)
1760         op0 = gen_lowpart (new_mode, op0);
1761     }
1762
1763   /* Use vec_extract patterns for extracting parts of vectors whenever
1764      available.  If that fails, see whether the current modes and bitregion
1765      give a natural subreg.  */
1766   machine_mode outermode = GET_MODE (op0);
1767   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1768     {
1769       scalar_mode innermode = GET_MODE_INNER (outermode);
1770
1771       enum insn_code icode
1772         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1773
1774       poly_uint64 pos;
1775       if (icode != CODE_FOR_nothing
1776           && known_eq (bitsize, GET_MODE_PRECISION (innermode))
1777           && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
1778         {
1779           class expand_operand ops[3];
1780
1781           create_output_operand (&ops[0], target,
1782                                  insn_data[icode].operand[0].mode);
1783           ops[0].target = 1;
1784           create_input_operand (&ops[1], op0, outermode);
1785           create_integer_operand (&ops[2], pos);
1786           if (maybe_expand_insn (icode, 3, ops))
1787             {
1788               if (alt_rtl && ops[0].target)
1789                 *alt_rtl = target;
1790               target = ops[0].value;
1791               if (GET_MODE (target) != mode)
1792                 return gen_lowpart (tmode, target);
1793               return target;
1794             }
1795         }
1796       /* Using subregs is useful if we're extracting one register vector
1797          from a multi-register vector.  extract_bit_field_as_subreg checks
1798          for valid bitsize and bitnum, so we don't need to do that here.  */
1799       if (VECTOR_MODE_P (mode))
1800         {
1801           rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1802                                                  bitsize, bitnum);
1803           if (sub)
1804             return sub;
1805         }
1806     }
1807
1808   /* Make sure we are playing with integral modes.  Pun with subregs
1809      if we aren't.  */
1810   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1811   scalar_int_mode imode;
1812   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1813     {
1814       if (MEM_P (op0))
1815         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1816                                             0, MEM_SIZE (op0));
1817       else if (op0_mode.exists (&imode))
1818         {
1819           op0 = gen_lowpart (imode, op0);
1820
1821           /* If we got a SUBREG, force it into a register since we
1822              aren't going to be able to do another SUBREG on it.  */
1823           if (GET_CODE (op0) == SUBREG)
1824             op0 = force_reg (imode, op0);
1825         }
1826       else
1827         {
1828           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1829           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1830           emit_move_insn (mem, op0);
1831           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1832         }
1833     }
1834
1835   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1836      If that's wrong, the solution is to test for it and set TARGET to 0
1837      if needed.  */
1838
1839   /* Get the mode of the field to use for atomic access or subreg
1840      conversion.  */
1841   if (!SCALAR_INT_MODE_P (tmode)
1842       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1843     mode1 = mode;
1844   gcc_assert (mode1 != BLKmode);
1845
1846   /* Extraction of a full MODE1 value can be done with a subreg as long
1847      as the least significant bit of the value is the least significant
1848      bit of either OP0 or a word of OP0.  */
1849   if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1850     {
1851       rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1852                                              bitsize, bitnum);
1853       if (sub)
1854         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1855     }
1856
1857   /* Extraction of a full MODE1 value can be done with a load as long as
1858      the field is on a byte boundary and is sufficiently aligned.  */
1859   poly_uint64 bytenum;
1860   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1861     {
1862       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1863       if (reverse)
1864         op0 = flip_storage_order (mode1, op0);
1865       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1866     }
1867
1868   /* If we have a memory source and a non-constant bit offset, restrict
1869      the memory to the referenced bytes.  This is a worst-case fallback
1870      but is useful for things like vector booleans.  */
1871   if (MEM_P (op0) && !bitnum.is_constant ())
1872     {
1873       bytenum = bits_to_bytes_round_down (bitnum);
1874       bitnum = num_trailing_bits (bitnum);
1875       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1876       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1877       op0_mode = opt_scalar_int_mode ();
1878     }
1879
1880   /* It's possible we'll need to handle other cases here for
1881      polynomial bitnum and bitsize.  */
1882
1883   /* From here on we need to be looking at a fixed-size insertion.  */
1884   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1885                                      bitnum.to_constant (), unsignedp,
1886                                      target, mode, tmode, reverse, fallback_p);
1887 }
1888
1889 /* Subroutine of extract_bit_field_1, with the same arguments, except
1890    that BITSIZE and BITNUM are constant.  Handle cases specific to
1891    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1892    otherwise OP0 is a BLKmode MEM.  */
1893
1894 static rtx
1895 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1896                             unsigned HOST_WIDE_INT bitsize,
1897                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1898                             rtx target, machine_mode mode, machine_mode tmode,
1899                             bool reverse, bool fallback_p)
1900 {
1901   /* Handle fields bigger than a word.  */
1902
1903   if (bitsize > BITS_PER_WORD)
1904     {
1905       /* Here we transfer the words of the field
1906          in the order least significant first.
1907          This is because the most significant word is the one which may
1908          be less than full.  */
1909
1910       const bool backwards = WORDS_BIG_ENDIAN;
1911       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1912       unsigned int i;
1913       rtx_insn *last;
1914
1915       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1916         target = gen_reg_rtx (mode);
1917
1918       /* In case we're about to clobber a base register or something
1919          (see gcc.c-torture/execute/20040625-1.c).   */
1920       if (reg_mentioned_p (target, op0))
1921         target = gen_reg_rtx (mode);
1922
1923       /* Indicate for flow that the entire target reg is being set.  */
1924       emit_clobber (target);
1925
1926       /* The mode must be fixed-size, since extract_bit_field_1 handles
1927          extractions from variable-sized objects before calling this
1928          function.  */
1929       unsigned int target_size
1930         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1931       last = get_last_insn ();
1932       for (i = 0; i < nwords; i++)
1933         {
1934           /* If I is 0, use the low-order word in both field and target;
1935              if I is 1, use the next to lowest word; and so on.  */
1936           /* Word number in TARGET to use.  */
1937           unsigned int wordnum
1938             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1939           /* Offset from start of field in OP0.  */
1940           unsigned int bit_offset = (backwards ^ reverse
1941                                      ? MAX ((int) bitsize - ((int) i + 1)
1942                                             * BITS_PER_WORD,
1943                                             0)
1944                                      : (int) i * BITS_PER_WORD);
1945           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1946           rtx result_part
1947             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1948                                              bitsize - i * BITS_PER_WORD),
1949                                    bitnum + bit_offset,
1950                                    (unsignedp ? 1 : -1), target_part,
1951                                    mode, word_mode, reverse, fallback_p, NULL);
1952
1953           gcc_assert (target_part);
1954           if (!result_part)
1955             {
1956               delete_insns_since (last);
1957               return NULL;
1958             }
1959
1960           if (result_part != target_part)
1961             emit_move_insn (target_part, result_part);
1962         }
1963
1964       if (unsignedp)
1965         {
1966           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1967              need to be zero'd out.  */
1968           if (target_size > nwords * UNITS_PER_WORD)
1969             {
1970               unsigned int i, total_words;
1971
1972               total_words = target_size / UNITS_PER_WORD;
1973               for (i = nwords; i < total_words; i++)
1974                 emit_move_insn
1975                   (operand_subword (target,
1976                                     backwards ? total_words - i - 1 : i,
1977                                     1, VOIDmode),
1978                    const0_rtx);
1979             }
1980           return target;
1981         }
1982
1983       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1984       target = expand_shift (LSHIFT_EXPR, mode, target,
1985                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1986       return expand_shift (RSHIFT_EXPR, mode, target,
1987                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1988     }
1989
1990   /* If OP0 is a multi-word register, narrow it to the affected word.
1991      If the region spans two words, defer to extract_split_bit_field.  */
1992   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1993     {
1994       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1995         {
1996           if (!fallback_p)
1997             return NULL_RTX;
1998           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1999                                             unsignedp, reverse);
2000           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
2001         }
2002       /* If OP0 is a hard register, copy it to a pseudo before calling
2003          simplify_gen_subreg.  */
2004       if (REG_P (op0) && HARD_REGISTER_P (op0))
2005         op0 = copy_to_reg (op0);
2006       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
2007                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
2008       op0_mode = word_mode;
2009       bitnum %= BITS_PER_WORD;
2010     }
2011
2012   /* From here on we know the desired field is smaller than a word.
2013      If OP0 is a register, it too fits within a word.  */
2014   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
2015   extraction_insn extv;
2016   if (!MEM_P (op0)
2017       && !reverse
2018       /* ??? We could limit the structure size to the part of OP0 that
2019          contains the field, with appropriate checks for endianness
2020          and TARGET_TRULY_NOOP_TRUNCATION.  */
2021       && get_best_reg_extraction_insn (&extv, pattern,
2022                                        GET_MODE_BITSIZE (op0_mode.require ()),
2023                                        tmode))
2024     {
2025       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2026                                                  bitsize, bitnum,
2027                                                  unsignedp, target, mode,
2028                                                  tmode);
2029       if (result)
2030         return result;
2031     }
2032
2033   /* If OP0 is a memory, try copying it to a register and seeing if a
2034      cheap register alternative is available.  */
2035   if (MEM_P (op0) & !reverse)
2036     {
2037       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2038                                         tmode))
2039         {
2040           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2041                                                      bitsize, bitnum,
2042                                                      unsignedp, target, mode,
2043                                                      tmode);
2044           if (result)
2045             return result;
2046         }
2047
2048       rtx_insn *last = get_last_insn ();
2049
2050       /* Try loading part of OP0 into a register and extracting the
2051          bitfield from that.  */
2052       unsigned HOST_WIDE_INT bitpos;
2053       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2054                                                0, 0, tmode, &bitpos);
2055       if (xop0)
2056         {
2057           xop0 = copy_to_reg (xop0);
2058           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2059                                             unsignedp, target,
2060                                             mode, tmode, reverse, false, NULL);
2061           if (result)
2062             return result;
2063           delete_insns_since (last);
2064         }
2065     }
2066
2067   if (!fallback_p)
2068     return NULL;
2069
2070   /* Find a correspondingly-sized integer field, so we can apply
2071      shifts and masks to it.  */
2072   scalar_int_mode int_mode;
2073   if (!int_mode_for_mode (tmode).exists (&int_mode))
2074     /* If this fails, we should probably push op0 out to memory and then
2075        do a load.  */
2076     int_mode = int_mode_for_mode (mode).require ();
2077
2078   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2079                                     bitnum, target, unsignedp, reverse);
2080
2081   /* Complex values must be reversed piecewise, so we need to undo the global
2082      reversal, convert to the complex mode and reverse again.  */
2083   if (reverse && COMPLEX_MODE_P (tmode))
2084     {
2085       target = flip_storage_order (int_mode, target);
2086       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2087       target = flip_storage_order (tmode, target);
2088     }
2089   else
2090     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2091
2092   return target;
2093 }
2094
2095 /* Generate code to extract a byte-field from STR_RTX
2096    containing BITSIZE bits, starting at BITNUM,
2097    and put it in TARGET if possible (if TARGET is nonzero).
2098    Regardless of TARGET, we return the rtx for where the value is placed.
2099
2100    STR_RTX is the structure containing the byte (a REG or MEM).
2101    UNSIGNEDP is nonzero if this is an unsigned bit field.
2102    MODE is the natural mode of the field value once extracted.
2103    TMODE is the mode the caller would like the value to have;
2104    but the value may be returned with type MODE instead.
2105
2106    If REVERSE is true, the extraction is to be done in reverse order.
2107
2108    If a TARGET is specified and we can store in it at no extra cost,
2109    we do so, and return TARGET.
2110    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2111    if they are equally easy.
2112
2113    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2114    then *ALT_RTL is set to TARGET (before legitimziation).  */
2115
2116 rtx
2117 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2118                    int unsignedp, rtx target, machine_mode mode,
2119                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2120 {
2121   machine_mode mode1;
2122
2123   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2124   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2125     mode1 = GET_MODE (str_rtx);
2126   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2127     mode1 = GET_MODE (target);
2128   else
2129     mode1 = tmode;
2130
2131   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2132   scalar_int_mode int_mode;
2133   if (bitsize.is_constant (&ibitsize)
2134       && bitnum.is_constant (&ibitnum)
2135       && is_a <scalar_int_mode> (mode1, &int_mode)
2136       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2137                                      int_mode, 0, 0))
2138     {
2139       /* Extraction of a full INT_MODE value can be done with a simple load.
2140          We know here that the field can be accessed with one single
2141          instruction.  For targets that support unaligned memory,
2142          an unaligned access may be necessary.  */
2143       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2144         {
2145           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2146                                                 ibitnum / BITS_PER_UNIT);
2147           if (reverse)
2148             result = flip_storage_order (int_mode, result);
2149           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2150           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2151         }
2152
2153       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2154                                       &ibitnum);
2155       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2156       str_rtx = copy_to_reg (str_rtx);
2157       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2158                                   target, mode, tmode, reverse, true, alt_rtl);
2159     }
2160
2161   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2162                               target, mode, tmode, reverse, true, alt_rtl);
2163 }
2164 \f
2165 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2166    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2167    otherwise OP0 is a BLKmode MEM.
2168
2169    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2170    If REVERSE is true, the extraction is to be done in reverse order.
2171
2172    If TARGET is nonzero, attempts to store the value there
2173    and return TARGET, but this is not guaranteed.
2174    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2175
2176 static rtx
2177 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2178                          opt_scalar_int_mode op0_mode,
2179                          unsigned HOST_WIDE_INT bitsize,
2180                          unsigned HOST_WIDE_INT bitnum, rtx target,
2181                          int unsignedp, bool reverse)
2182 {
2183   scalar_int_mode mode;
2184   if (MEM_P (op0))
2185     {
2186       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2187                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2188         /* The only way this should occur is if the field spans word
2189            boundaries.  */
2190         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2191                                         unsignedp, reverse);
2192
2193       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2194     }
2195   else
2196     mode = op0_mode.require ();
2197
2198   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2199                                     target, unsignedp, reverse);
2200 }
2201
2202 /* Helper function for extract_fixed_bit_field, extracts
2203    the bit field always using MODE, which is the mode of OP0.
2204    If UNSIGNEDP is -1, the result need not be sign or zero extended.
2205    The other arguments are as for extract_fixed_bit_field.  */
2206
2207 static rtx
2208 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2209                            unsigned HOST_WIDE_INT bitsize,
2210                            unsigned HOST_WIDE_INT bitnum, rtx target,
2211                            int unsignedp, bool reverse)
2212 {
2213   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2214      for invalid input, such as extract equivalent of f5 from
2215      gcc.dg/pr48335-2.c.  */
2216
2217   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2218     /* BITNUM is the distance between our msb and that of OP0.
2219        Convert it to the distance from the lsb.  */
2220     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2221
2222   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2223      We have reduced the big-endian case to the little-endian case.  */
2224   if (reverse)
2225     op0 = flip_storage_order (mode, op0);
2226
2227   if (unsignedp)
2228     {
2229       if (bitnum)
2230         {
2231           /* If the field does not already start at the lsb,
2232              shift it so it does.  */
2233           /* Maybe propagate the target for the shift.  */
2234           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2235           if (tmode != mode)
2236             subtarget = 0;
2237           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2238         }
2239       /* Convert the value to the desired mode.  TMODE must also be a
2240          scalar integer for this conversion to make sense, since we
2241          shouldn't reinterpret the bits.  */
2242       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2243       if (mode != new_mode)
2244         op0 = convert_to_mode (new_mode, op0, 1);
2245
2246       /* Unless the msb of the field used to be the msb when we shifted,
2247          mask out the upper bits.  */
2248
2249       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize
2250           && unsignedp != -1)
2251         return expand_binop (new_mode, and_optab, op0,
2252                              mask_rtx (new_mode, 0, bitsize, 0),
2253                              target, 1, OPTAB_LIB_WIDEN);
2254       return op0;
2255     }
2256
2257   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2258      then arithmetic-shift its lsb to the lsb of the word.  */
2259   op0 = force_reg (mode, op0);
2260
2261   /* Find the narrowest integer mode that contains the field.  */
2262
2263   opt_scalar_int_mode mode_iter;
2264   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2265     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2266       break;
2267
2268   mode = mode_iter.require ();
2269   op0 = convert_to_mode (mode, op0, 0);
2270
2271   if (mode != tmode)
2272     target = 0;
2273
2274   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2275     {
2276       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2277       /* Maybe propagate the target for the shift.  */
2278       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2279       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2280     }
2281
2282   return expand_shift (RSHIFT_EXPR, mode, op0,
2283                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2284 }
2285
2286 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2287    VALUE << BITPOS.  */
2288
2289 static rtx
2290 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2291               int bitpos)
2292 {
2293   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2294 }
2295 \f
2296 /* Extract a bit field that is split across two words
2297    and return an RTX for the result.
2298
2299    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2300    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2301    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2302    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2303    a BLKmode MEM.
2304
2305    If REVERSE is true, the extraction is to be done in reverse order.  */
2306
2307 static rtx
2308 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2309                          unsigned HOST_WIDE_INT bitsize,
2310                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2311                          bool reverse)
2312 {
2313   unsigned int unit;
2314   unsigned int bitsdone = 0;
2315   rtx result = NULL_RTX;
2316   int first = 1;
2317
2318   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2319      much at a time.  */
2320   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2321     unit = BITS_PER_WORD;
2322   else
2323     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2324
2325   while (bitsdone < bitsize)
2326     {
2327       unsigned HOST_WIDE_INT thissize;
2328       rtx part;
2329       unsigned HOST_WIDE_INT thispos;
2330       unsigned HOST_WIDE_INT offset;
2331
2332       offset = (bitpos + bitsdone) / unit;
2333       thispos = (bitpos + bitsdone) % unit;
2334
2335       /* THISSIZE must not overrun a word boundary.  Otherwise,
2336          extract_fixed_bit_field will call us again, and we will mutually
2337          recurse forever.  */
2338       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2339       thissize = MIN (thissize, unit - thispos);
2340
2341       /* If OP0 is a register, then handle OFFSET here.  */
2342       rtx op0_piece = op0;
2343       opt_scalar_int_mode op0_piece_mode = op0_mode;
2344       if (SUBREG_P (op0) || REG_P (op0))
2345         {
2346           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2347           op0_piece_mode = word_mode;
2348           offset = 0;
2349         }
2350
2351       /* Extract the parts in bit-counting order,
2352          whose meaning is determined by BYTES_PER_UNIT.
2353          OFFSET is in UNITs, and UNIT is in bits.  */
2354       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2355                                       thissize, offset * unit + thispos,
2356                                       0, 1, reverse);
2357       bitsdone += thissize;
2358
2359       /* Shift this part into place for the result.  */
2360       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2361         {
2362           if (bitsize != bitsdone)
2363             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2364                                  bitsize - bitsdone, 0, 1);
2365         }
2366       else
2367         {
2368           if (bitsdone != thissize)
2369             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2370                                  bitsdone - thissize, 0, 1);
2371         }
2372
2373       if (first)
2374         result = part;
2375       else
2376         /* Combine the parts with bitwise or.  This works
2377            because we extracted each part as an unsigned bit field.  */
2378         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2379                                OPTAB_LIB_WIDEN);
2380
2381       first = 0;
2382     }
2383
2384   /* Unsigned bit field: we are done.  */
2385   if (unsignedp)
2386     return result;
2387   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2388   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2389                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2390   return expand_shift (RSHIFT_EXPR, word_mode, result,
2391                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2392 }
2393 \f
2394 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2395    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2396    MODE, fill the upper bits with zeros.  Fail if the layout of either
2397    mode is unknown (as for CC modes) or if the extraction would involve
2398    unprofitable mode punning.  Return the value on success, otherwise
2399    return null.
2400
2401    This is different from gen_lowpart* in these respects:
2402
2403      - the returned value must always be considered an rvalue
2404
2405      - when MODE is wider than SRC_MODE, the extraction involves
2406        a zero extension
2407
2408      - when MODE is smaller than SRC_MODE, the extraction involves
2409        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2410
2411    In other words, this routine performs a computation, whereas the
2412    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2413    operations.  */
2414
2415 rtx
2416 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2417 {
2418   scalar_int_mode int_mode, src_int_mode;
2419
2420   if (mode == src_mode)
2421     return src;
2422
2423   if (CONSTANT_P (src))
2424     {
2425       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2426          fails, it will happily create (subreg (symbol_ref)) or similar
2427          invalid SUBREGs.  */
2428       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2429       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2430       if (ret)
2431         return ret;
2432
2433       if (GET_MODE (src) == VOIDmode
2434           || !validate_subreg (mode, src_mode, src, byte))
2435         return NULL_RTX;
2436
2437       src = force_reg (GET_MODE (src), src);
2438       return gen_rtx_SUBREG (mode, src, byte);
2439     }
2440
2441   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2442     return NULL_RTX;
2443
2444   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2445       && targetm.modes_tieable_p (mode, src_mode))
2446     {
2447       rtx x = gen_lowpart_common (mode, src);
2448       if (x)
2449         return x;
2450     }
2451
2452   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2453       || !int_mode_for_mode (mode).exists (&int_mode))
2454     return NULL_RTX;
2455
2456   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2457     return NULL_RTX;
2458   if (!targetm.modes_tieable_p (int_mode, mode))
2459     return NULL_RTX;
2460
2461   src = gen_lowpart (src_int_mode, src);
2462   if (!validate_subreg (int_mode, src_int_mode, src,
2463                         subreg_lowpart_offset (int_mode, src_int_mode)))
2464     return NULL_RTX;
2465
2466   src = convert_modes (int_mode, src_int_mode, src, true);
2467   src = gen_lowpart (mode, src);
2468   return src;
2469 }
2470 \f
2471 /* Add INC into TARGET.  */
2472
2473 void
2474 expand_inc (rtx target, rtx inc)
2475 {
2476   rtx value = expand_binop (GET_MODE (target), add_optab,
2477                             target, inc,
2478                             target, 0, OPTAB_LIB_WIDEN);
2479   if (value != target)
2480     emit_move_insn (target, value);
2481 }
2482
2483 /* Subtract DEC from TARGET.  */
2484
2485 void
2486 expand_dec (rtx target, rtx dec)
2487 {
2488   rtx value = expand_binop (GET_MODE (target), sub_optab,
2489                             target, dec,
2490                             target, 0, OPTAB_LIB_WIDEN);
2491   if (value != target)
2492     emit_move_insn (target, value);
2493 }
2494 \f
2495 /* Output a shift instruction for expression code CODE,
2496    with SHIFTED being the rtx for the value to shift,
2497    and AMOUNT the rtx for the amount to shift by.
2498    Store the result in the rtx TARGET, if that is convenient.
2499    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2500    Return the rtx for where the value is.
2501    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2502    in which case 0 is returned.  */
2503
2504 static rtx
2505 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2506                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2507 {
2508   rtx op1, temp = 0;
2509   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2510   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2511   optab lshift_optab = ashl_optab;
2512   optab rshift_arith_optab = ashr_optab;
2513   optab rshift_uns_optab = lshr_optab;
2514   optab lrotate_optab = rotl_optab;
2515   optab rrotate_optab = rotr_optab;
2516   machine_mode op1_mode;
2517   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2518   int attempt;
2519   bool speed = optimize_insn_for_speed_p ();
2520
2521   op1 = amount;
2522   op1_mode = GET_MODE (op1);
2523
2524   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2525      shift amount is a vector, use the vector/vector shift patterns.  */
2526   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2527     {
2528       lshift_optab = vashl_optab;
2529       rshift_arith_optab = vashr_optab;
2530       rshift_uns_optab = vlshr_optab;
2531       lrotate_optab = vrotl_optab;
2532       rrotate_optab = vrotr_optab;
2533     }
2534
2535   /* Previously detected shift-counts computed by NEGATE_EXPR
2536      and shifted in the other direction; but that does not work
2537      on all machines.  */
2538
2539   if (SHIFT_COUNT_TRUNCATED)
2540     {
2541       if (CONST_INT_P (op1)
2542           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2543               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2544         op1 = gen_int_shift_amount (mode,
2545                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2546                                     % GET_MODE_BITSIZE (scalar_mode));
2547       else if (GET_CODE (op1) == SUBREG
2548                && subreg_lowpart_p (op1)
2549                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2550                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2551         op1 = SUBREG_REG (op1);
2552     }
2553
2554   /* Canonicalize rotates by constant amount.  We may canonicalize
2555      to reduce the immediate or if the ISA can rotate by constants
2556      in only on direction.  */
2557   if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1))
2558     {
2559       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2560                                          - INTVAL (op1)));
2561       left = !left;
2562       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2563     }
2564
2565   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2566      Note that this is not the case for bigger values.  For instance a rotation
2567      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2568      0x04030201 (bswapsi).  */
2569   if (rotate
2570       && CONST_INT_P (op1)
2571       && INTVAL (op1) == BITS_PER_UNIT
2572       && GET_MODE_SIZE (scalar_mode) == 2
2573       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2574     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2575
2576   if (op1 == const0_rtx)
2577     return shifted;
2578
2579   /* Check whether its cheaper to implement a left shift by a constant
2580      bit count by a sequence of additions.  */
2581   if (code == LSHIFT_EXPR
2582       && CONST_INT_P (op1)
2583       && INTVAL (op1) > 0
2584       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2585       && INTVAL (op1) < MAX_BITS_PER_WORD
2586       && (shift_cost (speed, mode, INTVAL (op1))
2587           > INTVAL (op1) * add_cost (speed, mode))
2588       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2589     {
2590       int i;
2591       for (i = 0; i < INTVAL (op1); i++)
2592         {
2593           temp = force_reg (mode, shifted);
2594           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2595                                   unsignedp, OPTAB_LIB_WIDEN);
2596         }
2597       return shifted;
2598     }
2599
2600   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2601     {
2602       enum optab_methods methods;
2603
2604       if (attempt == 0)
2605         methods = OPTAB_DIRECT;
2606       else if (attempt == 1)
2607         methods = OPTAB_WIDEN;
2608       else
2609         methods = OPTAB_LIB_WIDEN;
2610
2611       if (rotate)
2612         {
2613           /* Widening does not work for rotation.  */
2614           if (methods == OPTAB_WIDEN)
2615             continue;
2616           else if (methods == OPTAB_LIB_WIDEN)
2617             {
2618               /* If we have been unable to open-code this by a rotation,
2619                  do it as the IOR of two shifts.  I.e., to rotate A
2620                  by N bits, compute
2621                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2622                  where C is the bitsize of A.
2623
2624                  It is theoretically possible that the target machine might
2625                  not be able to perform either shift and hence we would
2626                  be making two libcalls rather than just the one for the
2627                  shift (similarly if IOR could not be done).  We will allow
2628                  this extremely unlikely lossage to avoid complicating the
2629                  code below.  */
2630
2631               rtx subtarget = target == shifted ? 0 : target;
2632               rtx new_amount, other_amount;
2633               rtx temp1;
2634
2635               new_amount = op1;
2636               if (op1 == const0_rtx)
2637                 return shifted;
2638               else if (CONST_INT_P (op1))
2639                 other_amount = gen_int_shift_amount
2640                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2641               else
2642                 {
2643                   other_amount
2644                     = simplify_gen_unary (NEG, GET_MODE (op1),
2645                                           op1, GET_MODE (op1));
2646                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2647                   other_amount
2648                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2649                                            gen_int_mode (mask, GET_MODE (op1)));
2650                 }
2651
2652               shifted = force_reg (mode, shifted);
2653
2654               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2655                                      mode, shifted, new_amount, 0, 1);
2656               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2657                                       mode, shifted, other_amount,
2658                                       subtarget, 1);
2659               return expand_binop (mode, ior_optab, temp, temp1, target,
2660                                    unsignedp, methods);
2661             }
2662
2663           temp = expand_binop (mode,
2664                                left ? lrotate_optab : rrotate_optab,
2665                                shifted, op1, target, unsignedp, methods);
2666         }
2667       else if (unsignedp)
2668         temp = expand_binop (mode,
2669                              left ? lshift_optab : rshift_uns_optab,
2670                              shifted, op1, target, unsignedp, methods);
2671
2672       /* Do arithmetic shifts.
2673          Also, if we are going to widen the operand, we can just as well
2674          use an arithmetic right-shift instead of a logical one.  */
2675       if (temp == 0 && ! rotate
2676           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2677         {
2678           enum optab_methods methods1 = methods;
2679
2680           /* If trying to widen a log shift to an arithmetic shift,
2681              don't accept an arithmetic shift of the same size.  */
2682           if (unsignedp)
2683             methods1 = OPTAB_MUST_WIDEN;
2684
2685           /* Arithmetic shift */
2686
2687           temp = expand_binop (mode,
2688                                left ? lshift_optab : rshift_arith_optab,
2689                                shifted, op1, target, unsignedp, methods1);
2690         }
2691
2692       /* We used to try extzv here for logical right shifts, but that was
2693          only useful for one machine, the VAX, and caused poor code
2694          generation there for lshrdi3, so the code was deleted and a
2695          define_expand for lshrsi3 was added to vax.md.  */
2696     }
2697
2698   gcc_assert (temp != NULL_RTX || may_fail);
2699   return temp;
2700 }
2701
2702 /* Output a shift instruction for expression code CODE,
2703    with SHIFTED being the rtx for the value to shift,
2704    and AMOUNT the amount to shift by.
2705    Store the result in the rtx TARGET, if that is convenient.
2706    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2707    Return the rtx for where the value is.  */
2708
2709 rtx
2710 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2711               poly_int64 amount, rtx target, int unsignedp)
2712 {
2713   return expand_shift_1 (code, mode, shifted,
2714                          gen_int_shift_amount (mode, amount),
2715                          target, unsignedp);
2716 }
2717
2718 /* Likewise, but return 0 if that cannot be done.  */
2719
2720 rtx
2721 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2722                     int amount, rtx target, int unsignedp)
2723 {
2724   return expand_shift_1 (code, mode,
2725                          shifted, GEN_INT (amount), target, unsignedp, true);
2726 }
2727
2728 /* Output a shift instruction for expression code CODE,
2729    with SHIFTED being the rtx for the value to shift,
2730    and AMOUNT the tree for the amount to shift by.
2731    Store the result in the rtx TARGET, if that is convenient.
2732    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2733    Return the rtx for where the value is.  */
2734
2735 rtx
2736 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2737                        tree amount, rtx target, int unsignedp)
2738 {
2739   return expand_shift_1 (code, mode,
2740                          shifted, expand_normal (amount), target, unsignedp);
2741 }
2742
2743 \f
2744 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2745                         const struct mult_cost *, machine_mode mode);
2746 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2747                               const struct algorithm *, enum mult_variant);
2748 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2749 static rtx extract_high_half (scalar_int_mode, rtx);
2750 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2751 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2752                                        int, int);
2753 /* Compute and return the best algorithm for multiplying by T.
2754    The algorithm must cost less than cost_limit
2755    If retval.cost >= COST_LIMIT, no algorithm was found and all
2756    other field of the returned struct are undefined.
2757    MODE is the machine mode of the multiplication.  */
2758
2759 static void
2760 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2761             const struct mult_cost *cost_limit, machine_mode mode)
2762 {
2763   int m;
2764   struct algorithm *alg_in, *best_alg;
2765   struct mult_cost best_cost;
2766   struct mult_cost new_limit;
2767   int op_cost, op_latency;
2768   unsigned HOST_WIDE_INT orig_t = t;
2769   unsigned HOST_WIDE_INT q;
2770   int maxm, hash_index;
2771   bool cache_hit = false;
2772   enum alg_code cache_alg = alg_zero;
2773   bool speed = optimize_insn_for_speed_p ();
2774   scalar_int_mode imode;
2775   struct alg_hash_entry *entry_ptr;
2776
2777   /* Indicate that no algorithm is yet found.  If no algorithm
2778      is found, this value will be returned and indicate failure.  */
2779   alg_out->cost.cost = cost_limit->cost + 1;
2780   alg_out->cost.latency = cost_limit->latency + 1;
2781
2782   if (cost_limit->cost < 0
2783       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2784     return;
2785
2786   /* Be prepared for vector modes.  */
2787   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2788
2789   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2790
2791   /* Restrict the bits of "t" to the multiplication's mode.  */
2792   t &= GET_MODE_MASK (imode);
2793
2794   /* t == 1 can be done in zero cost.  */
2795   if (t == 1)
2796     {
2797       alg_out->ops = 1;
2798       alg_out->cost.cost = 0;
2799       alg_out->cost.latency = 0;
2800       alg_out->op[0] = alg_m;
2801       return;
2802     }
2803
2804   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2805      fail now.  */
2806   if (t == 0)
2807     {
2808       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2809         return;
2810       else
2811         {
2812           alg_out->ops = 1;
2813           alg_out->cost.cost = zero_cost (speed);
2814           alg_out->cost.latency = zero_cost (speed);
2815           alg_out->op[0] = alg_zero;
2816           return;
2817         }
2818     }
2819
2820   /* We'll be needing a couple extra algorithm structures now.  */
2821
2822   alg_in = XALLOCA (struct algorithm);
2823   best_alg = XALLOCA (struct algorithm);
2824   best_cost = *cost_limit;
2825
2826   /* Compute the hash index.  */
2827   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2828
2829   /* See if we already know what to do for T.  */
2830   entry_ptr = alg_hash_entry_ptr (hash_index);
2831   if (entry_ptr->t == t
2832       && entry_ptr->mode == mode
2833       && entry_ptr->speed == speed
2834       && entry_ptr->alg != alg_unknown)
2835     {
2836       cache_alg = entry_ptr->alg;
2837
2838       if (cache_alg == alg_impossible)
2839         {
2840           /* The cache tells us that it's impossible to synthesize
2841              multiplication by T within entry_ptr->cost.  */
2842           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2843             /* COST_LIMIT is at least as restrictive as the one
2844                recorded in the hash table, in which case we have no
2845                hope of synthesizing a multiplication.  Just
2846                return.  */
2847             return;
2848
2849           /* If we get here, COST_LIMIT is less restrictive than the
2850              one recorded in the hash table, so we may be able to
2851              synthesize a multiplication.  Proceed as if we didn't
2852              have the cache entry.  */
2853         }
2854       else
2855         {
2856           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2857             /* The cached algorithm shows that this multiplication
2858                requires more cost than COST_LIMIT.  Just return.  This
2859                way, we don't clobber this cache entry with
2860                alg_impossible but retain useful information.  */
2861             return;
2862
2863           cache_hit = true;
2864
2865           switch (cache_alg)
2866             {
2867             case alg_shift:
2868               goto do_alg_shift;
2869
2870             case alg_add_t_m2:
2871             case alg_sub_t_m2:
2872               goto do_alg_addsub_t_m2;
2873
2874             case alg_add_factor:
2875             case alg_sub_factor:
2876               goto do_alg_addsub_factor;
2877
2878             case alg_add_t2_m:
2879               goto do_alg_add_t2_m;
2880
2881             case alg_sub_t2_m:
2882               goto do_alg_sub_t2_m;
2883
2884             default:
2885               gcc_unreachable ();
2886             }
2887         }
2888     }
2889
2890   /* If we have a group of zero bits at the low-order part of T, try
2891      multiplying by the remaining bits and then doing a shift.  */
2892
2893   if ((t & 1) == 0)
2894     {
2895     do_alg_shift:
2896       m = ctz_or_zero (t); /* m = number of low zero bits */
2897       if (m < maxm)
2898         {
2899           q = t >> m;
2900           /* The function expand_shift will choose between a shift and
2901              a sequence of additions, so the observed cost is given as
2902              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2903           op_cost = m * add_cost (speed, mode);
2904           if (shift_cost (speed, mode, m) < op_cost)
2905             op_cost = shift_cost (speed, mode, m);
2906           new_limit.cost = best_cost.cost - op_cost;
2907           new_limit.latency = best_cost.latency - op_cost;
2908           synth_mult (alg_in, q, &new_limit, mode);
2909
2910           alg_in->cost.cost += op_cost;
2911           alg_in->cost.latency += op_cost;
2912           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2913             {
2914               best_cost = alg_in->cost;
2915               std::swap (alg_in, best_alg);
2916               best_alg->log[best_alg->ops] = m;
2917               best_alg->op[best_alg->ops] = alg_shift;
2918             }
2919
2920           /* See if treating ORIG_T as a signed number yields a better
2921              sequence.  Try this sequence only for a negative ORIG_T
2922              as it would be useless for a non-negative ORIG_T.  */
2923           if ((HOST_WIDE_INT) orig_t < 0)
2924             {
2925               /* Shift ORIG_T as follows because a right shift of a
2926                  negative-valued signed type is implementation
2927                  defined.  */
2928               q = ~(~orig_t >> m);
2929               /* The function expand_shift will choose between a shift
2930                  and a sequence of additions, so the observed cost is
2931                  given as MIN (m * add_cost(speed, mode),
2932                  shift_cost(speed, mode, m)).  */
2933               op_cost = m * add_cost (speed, mode);
2934               if (shift_cost (speed, mode, m) < op_cost)
2935                 op_cost = shift_cost (speed, mode, m);
2936               new_limit.cost = best_cost.cost - op_cost;
2937               new_limit.latency = best_cost.latency - op_cost;
2938               synth_mult (alg_in, q, &new_limit, mode);
2939
2940               alg_in->cost.cost += op_cost;
2941               alg_in->cost.latency += op_cost;
2942               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2943                 {
2944                   best_cost = alg_in->cost;
2945                   std::swap (alg_in, best_alg);
2946                   best_alg->log[best_alg->ops] = m;
2947                   best_alg->op[best_alg->ops] = alg_shift;
2948                 }
2949             }
2950         }
2951       if (cache_hit)
2952         goto done;
2953     }
2954
2955   /* If we have an odd number, add or subtract one.  */
2956   if ((t & 1) != 0)
2957     {
2958       unsigned HOST_WIDE_INT w;
2959
2960     do_alg_addsub_t_m2:
2961       for (w = 1; (w & t) != 0; w <<= 1)
2962         ;
2963       /* If T was -1, then W will be zero after the loop.  This is another
2964          case where T ends with ...111.  Handling this with (T + 1) and
2965          subtract 1 produces slightly better code and results in algorithm
2966          selection much faster than treating it like the ...0111 case
2967          below.  */
2968       if (w == 0
2969           || (w > 2
2970               /* Reject the case where t is 3.
2971                  Thus we prefer addition in that case.  */
2972               && t != 3))
2973         {
2974           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2975
2976           op_cost = add_cost (speed, mode);
2977           new_limit.cost = best_cost.cost - op_cost;
2978           new_limit.latency = best_cost.latency - op_cost;
2979           synth_mult (alg_in, t + 1, &new_limit, mode);
2980
2981           alg_in->cost.cost += op_cost;
2982           alg_in->cost.latency += op_cost;
2983           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2984             {
2985               best_cost = alg_in->cost;
2986               std::swap (alg_in, best_alg);
2987               best_alg->log[best_alg->ops] = 0;
2988               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2989             }
2990         }
2991       else
2992         {
2993           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2994
2995           op_cost = add_cost (speed, mode);
2996           new_limit.cost = best_cost.cost - op_cost;
2997           new_limit.latency = best_cost.latency - op_cost;
2998           synth_mult (alg_in, t - 1, &new_limit, mode);
2999
3000           alg_in->cost.cost += op_cost;
3001           alg_in->cost.latency += op_cost;
3002           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3003             {
3004               best_cost = alg_in->cost;
3005               std::swap (alg_in, best_alg);
3006               best_alg->log[best_alg->ops] = 0;
3007               best_alg->op[best_alg->ops] = alg_add_t_m2;
3008             }
3009         }
3010
3011       /* We may be able to calculate a * -7, a * -15, a * -31, etc
3012          quickly with a - a * n for some appropriate constant n.  */
3013       m = exact_log2 (-orig_t + 1);
3014       if (m >= 0 && m < maxm)
3015         {
3016           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3017           /* If the target has a cheap shift-and-subtract insn use
3018              that in preference to a shift insn followed by a sub insn.
3019              Assume that the shift-and-sub is "atomic" with a latency
3020              equal to it's cost, otherwise assume that on superscalar
3021              hardware the shift may be executed concurrently with the
3022              earlier steps in the algorithm.  */
3023           if (shiftsub1_cost (speed, mode, m) <= op_cost)
3024             {
3025               op_cost = shiftsub1_cost (speed, mode, m);
3026               op_latency = op_cost;
3027             }
3028           else
3029             op_latency = add_cost (speed, mode);
3030
3031           new_limit.cost = best_cost.cost - op_cost;
3032           new_limit.latency = best_cost.latency - op_latency;
3033           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3034                       &new_limit, mode);
3035
3036           alg_in->cost.cost += op_cost;
3037           alg_in->cost.latency += op_latency;
3038           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3039             {
3040               best_cost = alg_in->cost;
3041               std::swap (alg_in, best_alg);
3042               best_alg->log[best_alg->ops] = m;
3043               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3044             }
3045         }
3046
3047       if (cache_hit)
3048         goto done;
3049     }
3050
3051   /* Look for factors of t of the form
3052      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3053      If we find such a factor, we can multiply by t using an algorithm that
3054      multiplies by q, shift the result by m and add/subtract it to itself.
3055
3056      We search for large factors first and loop down, even if large factors
3057      are less probable than small; if we find a large factor we will find a
3058      good sequence quickly, and therefore be able to prune (by decreasing
3059      COST_LIMIT) the search.  */
3060
3061  do_alg_addsub_factor:
3062   for (m = floor_log2 (t - 1); m >= 2; m--)
3063     {
3064       unsigned HOST_WIDE_INT d;
3065
3066       d = (HOST_WIDE_INT_1U << m) + 1;
3067       if (t % d == 0 && t > d && m < maxm
3068           && (!cache_hit || cache_alg == alg_add_factor))
3069         {
3070           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3071           if (shiftadd_cost (speed, mode, m) <= op_cost)
3072             op_cost = shiftadd_cost (speed, mode, m);
3073
3074           op_latency = op_cost;
3075
3076
3077           new_limit.cost = best_cost.cost - op_cost;
3078           new_limit.latency = best_cost.latency - op_latency;
3079           synth_mult (alg_in, t / d, &new_limit, mode);
3080
3081           alg_in->cost.cost += op_cost;
3082           alg_in->cost.latency += op_latency;
3083           if (alg_in->cost.latency < op_cost)
3084             alg_in->cost.latency = op_cost;
3085           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3086             {
3087               best_cost = alg_in->cost;
3088               std::swap (alg_in, best_alg);
3089               best_alg->log[best_alg->ops] = m;
3090               best_alg->op[best_alg->ops] = alg_add_factor;
3091             }
3092           /* Other factors will have been taken care of in the recursion.  */
3093           break;
3094         }
3095
3096       d = (HOST_WIDE_INT_1U << m) - 1;
3097       if (t % d == 0 && t > d && m < maxm
3098           && (!cache_hit || cache_alg == alg_sub_factor))
3099         {
3100           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3101           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3102             op_cost = shiftsub0_cost (speed, mode, m);
3103
3104           op_latency = op_cost;
3105
3106           new_limit.cost = best_cost.cost - op_cost;
3107           new_limit.latency = best_cost.latency - op_latency;
3108           synth_mult (alg_in, t / d, &new_limit, mode);
3109
3110           alg_in->cost.cost += op_cost;
3111           alg_in->cost.latency += op_latency;
3112           if (alg_in->cost.latency < op_cost)
3113             alg_in->cost.latency = op_cost;
3114           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3115             {
3116               best_cost = alg_in->cost;
3117               std::swap (alg_in, best_alg);
3118               best_alg->log[best_alg->ops] = m;
3119               best_alg->op[best_alg->ops] = alg_sub_factor;
3120             }
3121           break;
3122         }
3123     }
3124   if (cache_hit)
3125     goto done;
3126
3127   /* Try shift-and-add (load effective address) instructions,
3128      i.e. do a*3, a*5, a*9.  */
3129   if ((t & 1) != 0)
3130     {
3131     do_alg_add_t2_m:
3132       q = t - 1;
3133       m = ctz_hwi (q);
3134       if (q && m < maxm)
3135         {
3136           op_cost = shiftadd_cost (speed, mode, m);
3137           new_limit.cost = best_cost.cost - op_cost;
3138           new_limit.latency = best_cost.latency - op_cost;
3139           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3140
3141           alg_in->cost.cost += op_cost;
3142           alg_in->cost.latency += op_cost;
3143           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3144             {
3145               best_cost = alg_in->cost;
3146               std::swap (alg_in, best_alg);
3147               best_alg->log[best_alg->ops] = m;
3148               best_alg->op[best_alg->ops] = alg_add_t2_m;
3149             }
3150         }
3151       if (cache_hit)
3152         goto done;
3153
3154     do_alg_sub_t2_m:
3155       q = t + 1;
3156       m = ctz_hwi (q);
3157       if (q && m < maxm)
3158         {
3159           op_cost = shiftsub0_cost (speed, mode, m);
3160           new_limit.cost = best_cost.cost - op_cost;
3161           new_limit.latency = best_cost.latency - op_cost;
3162           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3163
3164           alg_in->cost.cost += op_cost;
3165           alg_in->cost.latency += op_cost;
3166           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3167             {
3168               best_cost = alg_in->cost;
3169               std::swap (alg_in, best_alg);
3170               best_alg->log[best_alg->ops] = m;
3171               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3172             }
3173         }
3174       if (cache_hit)
3175         goto done;
3176     }
3177
3178  done:
3179   /* If best_cost has not decreased, we have not found any algorithm.  */
3180   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3181     {
3182       /* We failed to find an algorithm.  Record alg_impossible for
3183          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3184          we are asked to find an algorithm for T within the same or
3185          lower COST_LIMIT, we can immediately return to the
3186          caller.  */
3187       entry_ptr->t = t;
3188       entry_ptr->mode = mode;
3189       entry_ptr->speed = speed;
3190       entry_ptr->alg = alg_impossible;
3191       entry_ptr->cost = *cost_limit;
3192       return;
3193     }
3194
3195   /* Cache the result.  */
3196   if (!cache_hit)
3197     {
3198       entry_ptr->t = t;
3199       entry_ptr->mode = mode;
3200       entry_ptr->speed = speed;
3201       entry_ptr->alg = best_alg->op[best_alg->ops];
3202       entry_ptr->cost.cost = best_cost.cost;
3203       entry_ptr->cost.latency = best_cost.latency;
3204     }
3205
3206   /* If we are getting a too long sequence for `struct algorithm'
3207      to record, make this search fail.  */
3208   if (best_alg->ops == MAX_BITS_PER_WORD)
3209     return;
3210
3211   /* Copy the algorithm from temporary space to the space at alg_out.
3212      We avoid using structure assignment because the majority of
3213      best_alg is normally undefined, and this is a critical function.  */
3214   alg_out->ops = best_alg->ops + 1;
3215   alg_out->cost = best_cost;
3216   memcpy (alg_out->op, best_alg->op,
3217           alg_out->ops * sizeof *alg_out->op);
3218   memcpy (alg_out->log, best_alg->log,
3219           alg_out->ops * sizeof *alg_out->log);
3220 }
3221 \f
3222 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3223    Try three variations:
3224
3225        - a shift/add sequence based on VAL itself
3226        - a shift/add sequence based on -VAL, followed by a negation
3227        - a shift/add sequence based on VAL - 1, followed by an addition.
3228
3229    Return true if the cheapest of these cost less than MULT_COST,
3230    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3231
3232 bool
3233 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3234                      struct algorithm *alg, enum mult_variant *variant,
3235                      int mult_cost)
3236 {
3237   struct algorithm alg2;
3238   struct mult_cost limit;
3239   int op_cost;
3240   bool speed = optimize_insn_for_speed_p ();
3241
3242   /* Fail quickly for impossible bounds.  */
3243   if (mult_cost < 0)
3244     return false;
3245
3246   /* Ensure that mult_cost provides a reasonable upper bound.
3247      Any constant multiplication can be performed with less
3248      than 2 * bits additions.  */
3249   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3250   if (mult_cost > op_cost)
3251     mult_cost = op_cost;
3252
3253   *variant = basic_variant;
3254   limit.cost = mult_cost;
3255   limit.latency = mult_cost;
3256   synth_mult (alg, val, &limit, mode);
3257
3258   /* This works only if the inverted value actually fits in an
3259      `unsigned int' */
3260   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3261     {
3262       op_cost = neg_cost (speed, mode);
3263       if (MULT_COST_LESS (&alg->cost, mult_cost))
3264         {
3265           limit.cost = alg->cost.cost - op_cost;
3266           limit.latency = alg->cost.latency - op_cost;
3267         }
3268       else
3269         {
3270           limit.cost = mult_cost - op_cost;
3271           limit.latency = mult_cost - op_cost;
3272         }
3273
3274       synth_mult (&alg2, -val, &limit, mode);
3275       alg2.cost.cost += op_cost;
3276       alg2.cost.latency += op_cost;
3277       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3278         *alg = alg2, *variant = negate_variant;
3279     }
3280
3281   /* This proves very useful for division-by-constant.  */
3282   op_cost = add_cost (speed, mode);
3283   if (MULT_COST_LESS (&alg->cost, mult_cost))
3284     {
3285       limit.cost = alg->cost.cost - op_cost;
3286       limit.latency = alg->cost.latency - op_cost;
3287     }
3288   else
3289     {
3290       limit.cost = mult_cost - op_cost;
3291       limit.latency = mult_cost - op_cost;
3292     }
3293
3294   if (val != HOST_WIDE_INT_MIN
3295       || GET_MODE_UNIT_PRECISION (mode) == HOST_BITS_PER_WIDE_INT)
3296     {
3297       synth_mult (&alg2, val - HOST_WIDE_INT_1U, &limit, mode);
3298       alg2.cost.cost += op_cost;
3299       alg2.cost.latency += op_cost;
3300       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3301         *alg = alg2, *variant = add_variant;
3302     }
3303
3304   return MULT_COST_LESS (&alg->cost, mult_cost);
3305 }
3306
3307 /* A subroutine of expand_mult, used for constant multiplications.
3308    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3309    convenient.  Use the shift/add sequence described by ALG and apply
3310    the final fixup specified by VARIANT.  */
3311
3312 static rtx
3313 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3314                    rtx target, const struct algorithm *alg,
3315                    enum mult_variant variant)
3316 {
3317   unsigned HOST_WIDE_INT val_so_far;
3318   rtx_insn *insn;
3319   rtx accum, tem;
3320   int opno;
3321   machine_mode nmode;
3322
3323   /* Avoid referencing memory over and over and invalid sharing
3324      on SUBREGs.  */
3325   op0 = force_reg (mode, op0);
3326
3327   /* ACCUM starts out either as OP0 or as a zero, depending on
3328      the first operation.  */
3329
3330   if (alg->op[0] == alg_zero)
3331     {
3332       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3333       val_so_far = 0;
3334     }
3335   else if (alg->op[0] == alg_m)
3336     {
3337       accum = copy_to_mode_reg (mode, op0);
3338       val_so_far = 1;
3339     }
3340   else
3341     gcc_unreachable ();
3342
3343   for (opno = 1; opno < alg->ops; opno++)
3344     {
3345       int log = alg->log[opno];
3346       rtx shift_subtarget = optimize ? 0 : accum;
3347       rtx add_target
3348         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3349            && !optimize)
3350           ? target : 0;
3351       rtx accum_target = optimize ? 0 : accum;
3352       rtx accum_inner;
3353
3354       switch (alg->op[opno])
3355         {
3356         case alg_shift:
3357           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3358           /* REG_EQUAL note will be attached to the following insn.  */
3359           emit_move_insn (accum, tem);
3360           val_so_far <<= log;
3361           break;
3362
3363         case alg_add_t_m2:
3364           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3365           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3366                                  add_target ? add_target : accum_target);
3367           val_so_far += HOST_WIDE_INT_1U << log;
3368           break;
3369
3370         case alg_sub_t_m2:
3371           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3372           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3373                                  add_target ? add_target : accum_target);
3374           val_so_far -= HOST_WIDE_INT_1U << log;
3375           break;
3376
3377         case alg_add_t2_m:
3378           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3379                                 log, shift_subtarget, 0);
3380           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3381                                  add_target ? add_target : accum_target);
3382           val_so_far = (val_so_far << log) + 1;
3383           break;
3384
3385         case alg_sub_t2_m:
3386           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3387                                 log, shift_subtarget, 0);
3388           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3389                                  add_target ? add_target : accum_target);
3390           val_so_far = (val_so_far << log) - 1;
3391           break;
3392
3393         case alg_add_factor:
3394           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3395           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3396                                  add_target ? add_target : accum_target);
3397           val_so_far += val_so_far << log;
3398           break;
3399
3400         case alg_sub_factor:
3401           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3402           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3403                                  (add_target
3404                                   ? add_target : (optimize ? 0 : tem)));
3405           val_so_far = (val_so_far << log) - val_so_far;
3406           break;
3407
3408         default:
3409           gcc_unreachable ();
3410         }
3411
3412       if (SCALAR_INT_MODE_P (mode))
3413         {
3414           /* Write a REG_EQUAL note on the last insn so that we can cse
3415              multiplication sequences.  Note that if ACCUM is a SUBREG,
3416              we've set the inner register and must properly indicate that.  */
3417           tem = op0, nmode = mode;
3418           accum_inner = accum;
3419           if (GET_CODE (accum) == SUBREG)
3420             {
3421               accum_inner = SUBREG_REG (accum);
3422               nmode = GET_MODE (accum_inner);
3423               tem = gen_lowpart (nmode, op0);
3424             }
3425
3426           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3427              In that case, only the low bits of accum would be guaranteed to
3428              be equal to the content of the REG_EQUAL note, the upper bits
3429              can be anything.  */
3430           if (!paradoxical_subreg_p (tem))
3431             {
3432               insn = get_last_insn ();
3433               wide_int wval_so_far
3434                 = wi::uhwi (val_so_far,
3435                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3436               rtx c = immed_wide_int_const (wval_so_far, nmode);
3437               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3438                                 accum_inner);
3439             }
3440         }
3441     }
3442
3443   if (variant == negate_variant)
3444     {
3445       val_so_far = -val_so_far;
3446       accum = expand_unop (mode, neg_optab, accum, target, 0);
3447     }
3448   else if (variant == add_variant)
3449     {
3450       val_so_far = val_so_far + 1;
3451       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3452     }
3453
3454   /* Compare only the bits of val and val_so_far that are significant
3455      in the result mode, to avoid sign-/zero-extension confusion.  */
3456   nmode = GET_MODE_INNER (mode);
3457   val &= GET_MODE_MASK (nmode);
3458   val_so_far &= GET_MODE_MASK (nmode);
3459   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3460
3461   return accum;
3462 }
3463
3464 /* Perform a multiplication and return an rtx for the result.
3465    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3466    TARGET is a suggestion for where to store the result (an rtx).
3467
3468    We check specially for a constant integer as OP1.
3469    If you want this check for OP0 as well, then before calling
3470    you should swap the two operands if OP0 would be constant.  */
3471
3472 rtx
3473 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3474              int unsignedp, bool no_libcall)
3475 {
3476   enum mult_variant variant;
3477   struct algorithm algorithm;
3478   rtx scalar_op1;
3479   int max_cost;
3480   bool speed = optimize_insn_for_speed_p ();
3481   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3482
3483   if (CONSTANT_P (op0))
3484     std::swap (op0, op1);
3485
3486   /* For vectors, there are several simplifications that can be made if
3487      all elements of the vector constant are identical.  */
3488   scalar_op1 = unwrap_const_vec_duplicate (op1);
3489
3490   if (INTEGRAL_MODE_P (mode))
3491     {
3492       rtx fake_reg;
3493       HOST_WIDE_INT coeff;
3494       bool is_neg;
3495       int mode_bitsize;
3496
3497       if (op1 == CONST0_RTX (mode))
3498         return op1;
3499       if (op1 == CONST1_RTX (mode))
3500         return op0;
3501       if (op1 == CONSTM1_RTX (mode))
3502         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3503                             op0, target, 0);
3504
3505       if (do_trapv)
3506         goto skip_synth;
3507
3508       /* If mode is integer vector mode, check if the backend supports
3509          vector lshift (by scalar or vector) at all.  If not, we can't use
3510          synthetized multiply.  */
3511       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3512           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3513           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3514         goto skip_synth;
3515
3516       /* These are the operations that are potentially turned into
3517          a sequence of shifts and additions.  */
3518       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3519
3520       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3521          less than or equal in size to `unsigned int' this doesn't matter.
3522          If the mode is larger than `unsigned int', then synth_mult works
3523          only if the constant value exactly fits in an `unsigned int' without
3524          any truncation.  This means that multiplying by negative values does
3525          not work; results are off by 2^32 on a 32 bit machine.  */
3526       if (CONST_INT_P (scalar_op1))
3527         {
3528           coeff = INTVAL (scalar_op1);
3529           is_neg = coeff < 0;
3530         }
3531 #if TARGET_SUPPORTS_WIDE_INT
3532       else if (CONST_WIDE_INT_P (scalar_op1))
3533 #else
3534       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3535 #endif
3536         {
3537           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3538           /* Perfect power of 2 (other than 1, which is handled above).  */
3539           if (shift > 0)
3540             return expand_shift (LSHIFT_EXPR, mode, op0,
3541                                  shift, target, unsignedp);
3542           else
3543             goto skip_synth;
3544         }
3545       else
3546         goto skip_synth;
3547
3548       /* We used to test optimize here, on the grounds that it's better to
3549          produce a smaller program when -O is not used.  But this causes
3550          such a terrible slowdown sometimes that it seems better to always
3551          use synth_mult.  */
3552
3553       /* Special case powers of two.  */
3554       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3555           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3556         return expand_shift (LSHIFT_EXPR, mode, op0,
3557                              floor_log2 (coeff), target, unsignedp);
3558
3559       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3560
3561       /* Attempt to handle multiplication of DImode values by negative
3562          coefficients, by performing the multiplication by a positive
3563          multiplier and then inverting the result.  */
3564       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3565         {
3566           /* Its safe to use -coeff even for INT_MIN, as the
3567              result is interpreted as an unsigned coefficient.
3568              Exclude cost of op0 from max_cost to match the cost
3569              calculation of the synth_mult.  */
3570           coeff = -(unsigned HOST_WIDE_INT) coeff;
3571           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3572                                     mode, speed)
3573                       - neg_cost (speed, mode));
3574           if (max_cost <= 0)
3575             goto skip_synth;
3576
3577           /* Special case powers of two.  */
3578           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3579             {
3580               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3581                                        floor_log2 (coeff), target, unsignedp);
3582               return expand_unop (mode, neg_optab, temp, target, 0);
3583             }
3584
3585           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3586                                    max_cost))
3587             {
3588               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3589                                             &algorithm, variant);
3590               return expand_unop (mode, neg_optab, temp, target, 0);
3591             }
3592           goto skip_synth;
3593         }
3594
3595       /* Exclude cost of op0 from max_cost to match the cost
3596          calculation of the synth_mult.  */
3597       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3598       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3599         return expand_mult_const (mode, op0, coeff, target,
3600                                   &algorithm, variant);
3601     }
3602  skip_synth:
3603
3604   /* Expand x*2.0 as x+x.  */
3605   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3606       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3607     {
3608       op0 = force_reg (GET_MODE (op0), op0);
3609       return expand_binop (mode, add_optab, op0, op0,
3610                            target, unsignedp,
3611                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3612     }
3613
3614   /* This used to use umul_optab if unsigned, but for non-widening multiply
3615      there is no difference between signed and unsigned.  */
3616   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3617                       op0, op1, target, unsignedp,
3618                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3619   gcc_assert (op0 || no_libcall);
3620   return op0;
3621 }
3622
3623 /* Return a cost estimate for multiplying a register by the given
3624    COEFFicient in the given MODE and SPEED.  */
3625
3626 int
3627 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3628 {
3629   int max_cost;
3630   struct algorithm algorithm;
3631   enum mult_variant variant;
3632
3633   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3634   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3635                            mode, speed);
3636   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3637     return algorithm.cost.cost;
3638   else
3639     return max_cost;
3640 }
3641
3642 /* Perform a widening multiplication and return an rtx for the result.
3643    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3644    TARGET is a suggestion for where to store the result (an rtx).
3645    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3646    or smul_widen_optab.
3647
3648    We check specially for a constant integer as OP1, comparing the
3649    cost of a widening multiply against the cost of a sequence of shifts
3650    and adds.  */
3651
3652 rtx
3653 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3654                       int unsignedp, optab this_optab)
3655 {
3656   bool speed = optimize_insn_for_speed_p ();
3657   rtx cop1;
3658
3659   if (CONST_INT_P (op1)
3660       && GET_MODE (op0) != VOIDmode
3661       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3662                                 this_optab == umul_widen_optab))
3663       && CONST_INT_P (cop1)
3664       && (INTVAL (cop1) >= 0
3665           || HWI_COMPUTABLE_MODE_P (mode)))
3666     {
3667       HOST_WIDE_INT coeff = INTVAL (cop1);
3668       int max_cost;
3669       enum mult_variant variant;
3670       struct algorithm algorithm;
3671
3672       if (coeff == 0)
3673         return CONST0_RTX (mode);
3674
3675       /* Special case powers of two.  */
3676       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3677         {
3678           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3679           return expand_shift (LSHIFT_EXPR, mode, op0,
3680                                floor_log2 (coeff), target, unsignedp);
3681         }
3682
3683       /* Exclude cost of op0 from max_cost to match the cost
3684          calculation of the synth_mult.  */
3685       max_cost = mul_widen_cost (speed, mode);
3686       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3687                                max_cost))
3688         {
3689           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3690           return expand_mult_const (mode, op0, coeff, target,
3691                                     &algorithm, variant);
3692         }
3693     }
3694   return expand_binop (mode, this_optab, op0, op1, target,
3695                        unsignedp, OPTAB_LIB_WIDEN);
3696 }
3697 \f
3698 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3699    replace division by D, and put the least significant N bits of the result
3700    in *MULTIPLIER_PTR and return the most significant bit.
3701
3702    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3703    needed precision is in PRECISION (should be <= N).
3704
3705    PRECISION should be as small as possible so this function can choose
3706    multiplier more freely.
3707
3708    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3709    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3710
3711    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3712    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3713
3714 unsigned HOST_WIDE_INT
3715 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3716                    unsigned HOST_WIDE_INT *multiplier_ptr,
3717                    int *post_shift_ptr, int *lgup_ptr)
3718 {
3719   int lgup, post_shift;
3720   int pow, pow2;
3721
3722   /* lgup = ceil(log2(divisor)); */
3723   lgup = ceil_log2 (d);
3724
3725   gcc_assert (lgup <= n);
3726
3727   pow = n + lgup;
3728   pow2 = n + lgup - precision;
3729
3730   /* mlow = 2^(N + lgup)/d */
3731   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3732   wide_int mlow = wi::udiv_trunc (val, d);
3733
3734   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3735   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3736   wide_int mhigh = wi::udiv_trunc (val, d);
3737
3738   /* If precision == N, then mlow, mhigh exceed 2^N
3739      (but they do not exceed 2^(N+1)).  */
3740
3741   /* Reduce to lowest terms.  */
3742   for (post_shift = lgup; post_shift > 0; post_shift--)
3743     {
3744       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3745                                                        HOST_BITS_PER_WIDE_INT);
3746       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3747                                                        HOST_BITS_PER_WIDE_INT);
3748       if (ml_lo >= mh_lo)
3749         break;
3750
3751       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3752       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3753     }
3754
3755   *post_shift_ptr = post_shift;
3756   *lgup_ptr = lgup;
3757   if (n < HOST_BITS_PER_WIDE_INT)
3758     {
3759       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3760       *multiplier_ptr = mhigh.to_uhwi () & mask;
3761       return mhigh.to_uhwi () > mask;
3762     }
3763   else
3764     {
3765       *multiplier_ptr = mhigh.to_uhwi ();
3766       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3767     }
3768 }
3769
3770 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3771    congruent to 1 (mod 2**N).  */
3772
3773 static unsigned HOST_WIDE_INT
3774 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3775 {
3776   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3777
3778   /* The algorithm notes that the choice y = x satisfies
3779      x*y == 1 mod 2^3, since x is assumed odd.
3780      Each iteration doubles the number of bits of significance in y.  */
3781
3782   unsigned HOST_WIDE_INT mask;
3783   unsigned HOST_WIDE_INT y = x;
3784   int nbit = 3;
3785
3786   mask = (n == HOST_BITS_PER_WIDE_INT
3787           ? HOST_WIDE_INT_M1U
3788           : (HOST_WIDE_INT_1U << n) - 1);
3789
3790   while (nbit < n)
3791     {
3792       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3793       nbit *= 2;
3794     }
3795   return y;
3796 }
3797
3798 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3799    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3800    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3801    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3802    become signed.
3803
3804    The result is put in TARGET if that is convenient.
3805
3806    MODE is the mode of operation.  */
3807
3808 rtx
3809 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3810                              rtx op1, rtx target, int unsignedp)
3811 {
3812   rtx tem;
3813   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3814
3815   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3816                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3817   tem = expand_and (mode, tem, op1, NULL_RTX);
3818   adj_operand
3819     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3820                      adj_operand);
3821
3822   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3823                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3824   tem = expand_and (mode, tem, op0, NULL_RTX);
3825   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3826                           target);
3827
3828   return target;
3829 }
3830
3831 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3832
3833 static rtx
3834 extract_high_half (scalar_int_mode mode, rtx op)
3835 {
3836   if (mode == word_mode)
3837     return gen_highpart (mode, op);
3838
3839   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3840
3841   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3842                      GET_MODE_BITSIZE (mode), 0, 1);
3843   return convert_modes (mode, wider_mode, op, 0);
3844 }
3845
3846 /* Like expmed_mult_highpart, but only consider using a multiplication
3847    optab.  OP1 is an rtx for the constant operand.  */
3848
3849 static rtx
3850 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3851                             rtx target, int unsignedp, int max_cost)
3852 {
3853   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3854   optab moptab;
3855   rtx tem;
3856   int size;
3857   bool speed = optimize_insn_for_speed_p ();
3858
3859   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3860
3861   size = GET_MODE_BITSIZE (mode);
3862
3863   /* Firstly, try using a multiplication insn that only generates the needed
3864      high part of the product, and in the sign flavor of unsignedp.  */
3865   if (mul_highpart_cost (speed, mode) < max_cost)
3866     {
3867       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3868       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3869                           unsignedp, OPTAB_DIRECT);
3870       if (tem)
3871         return tem;
3872     }
3873
3874   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3875      Need to adjust the result after the multiplication.  */
3876   if (size - 1 < BITS_PER_WORD
3877       && (mul_highpart_cost (speed, mode)
3878           + 2 * shift_cost (speed, mode, size-1)
3879           + 4 * add_cost (speed, mode) < max_cost))
3880     {
3881       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3882       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3883                           unsignedp, OPTAB_DIRECT);
3884       if (tem)
3885         /* We used the wrong signedness.  Adjust the result.  */
3886         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3887                                             tem, unsignedp);
3888     }
3889
3890   /* Try widening multiplication.  */
3891   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3892   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3893       && mul_widen_cost (speed, wider_mode) < max_cost)
3894     {
3895       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3896                           unsignedp, OPTAB_WIDEN);
3897       if (tem)
3898         return extract_high_half (mode, tem);
3899     }
3900
3901   /* Try widening the mode and perform a non-widening multiplication.  */
3902   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3903       && size - 1 < BITS_PER_WORD
3904       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3905           < max_cost))
3906     {
3907       rtx_insn *insns;
3908       rtx wop0, wop1;
3909
3910       /* We need to widen the operands, for example to ensure the
3911          constant multiplier is correctly sign or zero extended.
3912          Use a sequence to clean-up any instructions emitted by
3913          the conversions if things don't work out.  */
3914       start_sequence ();
3915       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3916       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3917       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3918                           unsignedp, OPTAB_WIDEN);
3919       insns = get_insns ();
3920       end_sequence ();
3921
3922       if (tem)
3923         {
3924           emit_insn (insns);
3925           return extract_high_half (mode, tem);
3926         }
3927     }
3928
3929   /* Try widening multiplication of opposite signedness, and adjust.  */
3930   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3931   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3932       && size - 1 < BITS_PER_WORD
3933       && (mul_widen_cost (speed, wider_mode)
3934           + 2 * shift_cost (speed, mode, size-1)
3935           + 4 * add_cost (speed, mode) < max_cost))
3936     {
3937       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3938                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3939       if (tem != 0)
3940         {
3941           tem = extract_high_half (mode, tem);
3942           /* We used the wrong signedness.  Adjust the result.  */
3943           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3944                                               target, unsignedp);
3945         }
3946     }
3947
3948   return 0;
3949 }
3950
3951 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3952    putting the high half of the result in TARGET if that is convenient,
3953    and return where the result is.  If the operation cannot be performed,
3954    0 is returned.
3955
3956    MODE is the mode of operation and result.
3957
3958    UNSIGNEDP nonzero means unsigned multiply.
3959
3960    MAX_COST is the total allowed cost for the expanded RTL.  */
3961
3962 static rtx
3963 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3964                       rtx target, int unsignedp, int max_cost)
3965 {
3966   unsigned HOST_WIDE_INT cnst1;
3967   int extra_cost;
3968   bool sign_adjust = false;
3969   enum mult_variant variant;
3970   struct algorithm alg;
3971   rtx tem;
3972   bool speed = optimize_insn_for_speed_p ();
3973
3974   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3975   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3976
3977   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3978
3979   /* We can't optimize modes wider than BITS_PER_WORD.
3980      ??? We might be able to perform double-word arithmetic if
3981      mode == word_mode, however all the cost calculations in
3982      synth_mult etc. assume single-word operations.  */
3983   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3984   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3985     return expmed_mult_highpart_optab (mode, op0, op1, target,
3986                                        unsignedp, max_cost);
3987
3988   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3989
3990   /* Check whether we try to multiply by a negative constant.  */
3991   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3992     {
3993       sign_adjust = true;
3994       extra_cost += add_cost (speed, mode);
3995     }
3996
3997   /* See whether shift/add multiplication is cheap enough.  */
3998   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3999                            max_cost - extra_cost))
4000     {
4001       /* See whether the specialized multiplication optabs are
4002          cheaper than the shift/add version.  */
4003       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
4004                                         alg.cost.cost + extra_cost);
4005       if (tem)
4006         return tem;
4007
4008       tem = convert_to_mode (wider_mode, op0, unsignedp);
4009       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
4010       tem = extract_high_half (mode, tem);
4011
4012       /* Adjust result for signedness.  */
4013       if (sign_adjust)
4014         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
4015
4016       return tem;
4017     }
4018   return expmed_mult_highpart_optab (mode, op0, op1, target,
4019                                      unsignedp, max_cost);
4020 }
4021
4022
4023 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
4024
4025 static rtx
4026 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4027 {
4028   rtx result, temp, shift;
4029   rtx_code_label *label;
4030   int logd;
4031   int prec = GET_MODE_PRECISION (mode);
4032
4033   logd = floor_log2 (d);
4034   result = gen_reg_rtx (mode);
4035
4036   /* Avoid conditional branches when they're expensive.  */
4037   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4038       && optimize_insn_for_speed_p ())
4039     {
4040       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4041                                       mode, 0, -1);
4042       if (signmask)
4043         {
4044           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4045           signmask = force_reg (mode, signmask);
4046           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4047
4048           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4049              which instruction sequence to use.  If logical right shifts
4050              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4051              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4052
4053           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4054           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4055               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4056                   > COSTS_N_INSNS (2)))
4057             {
4058               temp = expand_binop (mode, xor_optab, op0, signmask,
4059                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4060               temp = expand_binop (mode, sub_optab, temp, signmask,
4061                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4062               temp = expand_binop (mode, and_optab, temp,
4063                                    gen_int_mode (masklow, mode),
4064                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4065               temp = expand_binop (mode, xor_optab, temp, signmask,
4066                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4067               temp = expand_binop (mode, sub_optab, temp, signmask,
4068                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4069             }
4070           else
4071             {
4072               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4073                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4074               signmask = force_reg (mode, signmask);
4075
4076               temp = expand_binop (mode, add_optab, op0, signmask,
4077                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4078               temp = expand_binop (mode, and_optab, temp,
4079                                    gen_int_mode (masklow, mode),
4080                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4081               temp = expand_binop (mode, sub_optab, temp, signmask,
4082                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4083             }
4084           return temp;
4085         }
4086     }
4087
4088   /* Mask contains the mode's signbit and the significant bits of the
4089      modulus.  By including the signbit in the operation, many targets
4090      can avoid an explicit compare operation in the following comparison
4091      against zero.  */
4092   wide_int mask = wi::mask (logd, false, prec);
4093   mask = wi::set_bit (mask, prec - 1);
4094
4095   temp = expand_binop (mode, and_optab, op0,
4096                        immed_wide_int_const (mask, mode),
4097                        result, 1, OPTAB_LIB_WIDEN);
4098   if (temp != result)
4099     emit_move_insn (result, temp);
4100
4101   label = gen_label_rtx ();
4102   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4103
4104   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4105                        0, OPTAB_LIB_WIDEN);
4106
4107   mask = wi::mask (logd, true, prec);
4108   temp = expand_binop (mode, ior_optab, temp,
4109                        immed_wide_int_const (mask, mode),
4110                        result, 1, OPTAB_LIB_WIDEN);
4111   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4112                        0, OPTAB_LIB_WIDEN);
4113   if (temp != result)
4114     emit_move_insn (result, temp);
4115   emit_label (label);
4116   return result;
4117 }
4118
4119 /* Expand signed division of OP0 by a power of two D in mode MODE.
4120    This routine is only called for positive values of D.  */
4121
4122 static rtx
4123 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4124 {
4125   rtx temp;
4126   rtx_code_label *label;
4127   int logd;
4128
4129   logd = floor_log2 (d);
4130
4131   if (d == 2
4132       && BRANCH_COST (optimize_insn_for_speed_p (),
4133                       false) >= 1)
4134     {
4135       temp = gen_reg_rtx (mode);
4136       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4137       if (temp != NULL_RTX)
4138         {
4139           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4140                                0, OPTAB_LIB_WIDEN);
4141           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4142         }
4143     }
4144
4145   if (HAVE_conditional_move
4146       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4147     {
4148       rtx temp2;
4149
4150       start_sequence ();
4151       temp2 = copy_to_mode_reg (mode, op0);
4152       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4153                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4154       temp = force_reg (mode, temp);
4155
4156       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4157       temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4158                                      temp, temp2, mode, 0);
4159       if (temp2)
4160         {
4161           rtx_insn *seq = get_insns ();
4162           end_sequence ();
4163           emit_insn (seq);
4164           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4165         }
4166       end_sequence ();
4167     }
4168
4169   if (BRANCH_COST (optimize_insn_for_speed_p (),
4170                    false) >= 2)
4171     {
4172       int ushift = GET_MODE_BITSIZE (mode) - logd;
4173
4174       temp = gen_reg_rtx (mode);
4175       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4176       if (temp != NULL_RTX)
4177         {
4178           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4179               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4180               > COSTS_N_INSNS (1))
4181             temp = expand_binop (mode, and_optab, temp,
4182                                  gen_int_mode (d - 1, mode),
4183                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4184           else
4185             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4186                                  ushift, NULL_RTX, 1);
4187           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4188                                0, OPTAB_LIB_WIDEN);
4189           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4190         }
4191     }
4192
4193   label = gen_label_rtx ();
4194   temp = copy_to_mode_reg (mode, op0);
4195   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4196   expand_inc (temp, gen_int_mode (d - 1, mode));
4197   emit_label (label);
4198   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4199 }
4200 \f
4201 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4202    if that is convenient, and returning where the result is.
4203    You may request either the quotient or the remainder as the result;
4204    specify REM_FLAG nonzero to get the remainder.
4205
4206    CODE is the expression code for which kind of division this is;
4207    it controls how rounding is done.  MODE is the machine mode to use.
4208    UNSIGNEDP nonzero means do unsigned division.  */
4209
4210 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4211    and then correct it by or'ing in missing high bits
4212    if result of ANDI is nonzero.
4213    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4214    This could optimize to a bfexts instruction.
4215    But C doesn't use these operations, so their optimizations are
4216    left for later.  */
4217 /* ??? For modulo, we don't actually need the highpart of the first product,
4218    the low part will do nicely.  And for small divisors, the second multiply
4219    can also be a low-part only multiply or even be completely left out.
4220    E.g. to calculate the remainder of a division by 3 with a 32 bit
4221    multiply, multiply with 0x55555556 and extract the upper two bits;
4222    the result is exact for inputs up to 0x1fffffff.
4223    The input range can be reduced by using cross-sum rules.
4224    For odd divisors >= 3, the following table gives right shift counts
4225    so that if a number is shifted by an integer multiple of the given
4226    amount, the remainder stays the same:
4227    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4228    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4229    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4230    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4231    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4232
4233    Cross-sum rules for even numbers can be derived by leaving as many bits
4234    to the right alone as the divisor has zeros to the right.
4235    E.g. if x is an unsigned 32 bit number:
4236    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4237    */
4238
4239 rtx
4240 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4241                rtx op0, rtx op1, rtx target, int unsignedp,
4242                enum optab_methods methods)
4243 {
4244   machine_mode compute_mode;
4245   rtx tquotient;
4246   rtx quotient = 0, remainder = 0;
4247   rtx_insn *last;
4248   rtx_insn *insn;
4249   optab optab1, optab2;
4250   int op1_is_constant, op1_is_pow2 = 0;
4251   int max_cost, extra_cost;
4252   static HOST_WIDE_INT last_div_const = 0;
4253   bool speed = optimize_insn_for_speed_p ();
4254
4255   op1_is_constant = CONST_INT_P (op1);
4256   if (op1_is_constant)
4257     {
4258       wide_int ext_op1 = rtx_mode_t (op1, mode);
4259       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4260                      || (! unsignedp
4261                          && wi::popcount (wi::neg (ext_op1)) == 1));
4262     }
4263
4264   /*
4265      This is the structure of expand_divmod:
4266
4267      First comes code to fix up the operands so we can perform the operations
4268      correctly and efficiently.
4269
4270      Second comes a switch statement with code specific for each rounding mode.
4271      For some special operands this code emits all RTL for the desired
4272      operation, for other cases, it generates only a quotient and stores it in
4273      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4274      to indicate that it has not done anything.
4275
4276      Last comes code that finishes the operation.  If QUOTIENT is set and
4277      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4278      QUOTIENT is not set, it is computed using trunc rounding.
4279
4280      We try to generate special code for division and remainder when OP1 is a
4281      constant.  If |OP1| = 2**n we can use shifts and some other fast
4282      operations.  For other values of OP1, we compute a carefully selected
4283      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4284      by m.
4285
4286      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4287      half of the product.  Different strategies for generating the product are
4288      implemented in expmed_mult_highpart.
4289
4290      If what we actually want is the remainder, we generate that by another
4291      by-constant multiplication and a subtraction.  */
4292
4293   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4294      code below will malfunction if we are, so check here and handle
4295      the special case if so.  */
4296   if (op1 == const1_rtx)
4297     return rem_flag ? const0_rtx : op0;
4298
4299     /* When dividing by -1, we could get an overflow.
4300      negv_optab can handle overflows.  */
4301   if (! unsignedp && op1 == constm1_rtx)
4302     {
4303       if (rem_flag)
4304         return const0_rtx;
4305       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4306                           ? negv_optab : neg_optab, op0, target, 0);
4307     }
4308
4309   if (target
4310       /* Don't use the function value register as a target
4311          since we have to read it as well as write it,
4312          and function-inlining gets confused by this.  */
4313       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4314           /* Don't clobber an operand while doing a multi-step calculation.  */
4315           || ((rem_flag || op1_is_constant)
4316               && (reg_mentioned_p (target, op0)
4317                   || (MEM_P (op0) && MEM_P (target))))
4318           || reg_mentioned_p (target, op1)
4319           || (MEM_P (op1) && MEM_P (target))))
4320     target = 0;
4321
4322   /* Get the mode in which to perform this computation.  Normally it will
4323      be MODE, but sometimes we can't do the desired operation in MODE.
4324      If so, pick a wider mode in which we can do the operation.  Convert
4325      to that mode at the start to avoid repeated conversions.
4326
4327      First see what operations we need.  These depend on the expression
4328      we are evaluating.  (We assume that divxx3 insns exist under the
4329      same conditions that modxx3 insns and that these insns don't normally
4330      fail.  If these assumptions are not correct, we may generate less
4331      efficient code in some cases.)
4332
4333      Then see if we find a mode in which we can open-code that operation
4334      (either a division, modulus, or shift).  Finally, check for the smallest
4335      mode for which we can do the operation with a library call.  */
4336
4337   /* We might want to refine this now that we have division-by-constant
4338      optimization.  Since expmed_mult_highpart tries so many variants, it is
4339      not straightforward to generalize this.  Maybe we should make an array
4340      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4341
4342   optab1 = (op1_is_pow2
4343             ? (unsignedp ? lshr_optab : ashr_optab)
4344             : (unsignedp ? udiv_optab : sdiv_optab));
4345   optab2 = (op1_is_pow2 ? optab1
4346             : (unsignedp ? udivmod_optab : sdivmod_optab));
4347
4348   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4349     {
4350       FOR_EACH_MODE_FROM (compute_mode, mode)
4351       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4352           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4353         break;
4354
4355       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4356         FOR_EACH_MODE_FROM (compute_mode, mode)
4357           if (optab_libfunc (optab1, compute_mode)
4358               || optab_libfunc (optab2, compute_mode))
4359             break;
4360     }
4361   else
4362     compute_mode = mode;
4363
4364   /* If we still couldn't find a mode, use MODE, but expand_binop will
4365      probably die.  */
4366   if (compute_mode == VOIDmode)
4367     compute_mode = mode;
4368
4369   if (target && GET_MODE (target) == compute_mode)
4370     tquotient = target;
4371   else
4372     tquotient = gen_reg_rtx (compute_mode);
4373
4374 #if 0
4375   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4376      (mode), and thereby get better code when OP1 is a constant.  Do that
4377      later.  It will require going over all usages of SIZE below.  */
4378   size = GET_MODE_BITSIZE (mode);
4379 #endif
4380
4381   /* Only deduct something for a REM if the last divide done was
4382      for a different constant.   Then set the constant of the last
4383      divide.  */
4384   max_cost = (unsignedp
4385               ? udiv_cost (speed, compute_mode)
4386               : sdiv_cost (speed, compute_mode));
4387   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4388                      && INTVAL (op1) == last_div_const))
4389     max_cost -= (mul_cost (speed, compute_mode)
4390                  + add_cost (speed, compute_mode));
4391
4392   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4393
4394   /* Now convert to the best mode to use.  */
4395   if (compute_mode != mode)
4396     {
4397       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4398       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4399
4400       /* convert_modes may have placed op1 into a register, so we
4401          must recompute the following.  */
4402       op1_is_constant = CONST_INT_P (op1);
4403       if (op1_is_constant)
4404         {
4405           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4406           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4407                          || (! unsignedp
4408                              && wi::popcount (wi::neg (ext_op1)) == 1));
4409         }
4410       else
4411         op1_is_pow2 = 0;
4412     }
4413
4414   /* If one of the operands is a volatile MEM, copy it into a register.  */
4415
4416   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4417     op0 = force_reg (compute_mode, op0);
4418   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4419     op1 = force_reg (compute_mode, op1);
4420
4421   /* If we need the remainder or if OP1 is constant, we need to
4422      put OP0 in a register in case it has any queued subexpressions.  */
4423   if (rem_flag || op1_is_constant)
4424     op0 = force_reg (compute_mode, op0);
4425
4426   last = get_last_insn ();
4427
4428   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4429   if (unsignedp)
4430     {
4431       if (code == FLOOR_DIV_EXPR)
4432         code = TRUNC_DIV_EXPR;
4433       if (code == FLOOR_MOD_EXPR)
4434         code = TRUNC_MOD_EXPR;
4435       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4436         code = TRUNC_DIV_EXPR;
4437     }
4438
4439   if (op1 != const0_rtx)
4440     switch (code)
4441       {
4442       case TRUNC_MOD_EXPR:
4443       case TRUNC_DIV_EXPR:
4444         if (op1_is_constant)
4445           {
4446             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4447             int size = GET_MODE_BITSIZE (int_mode);
4448             if (unsignedp)
4449               {
4450                 unsigned HOST_WIDE_INT mh, ml;
4451                 int pre_shift, post_shift;
4452                 int dummy;
4453                 wide_int wd = rtx_mode_t (op1, int_mode);
4454                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4455
4456                 if (wi::popcount (wd) == 1)
4457                   {
4458                     pre_shift = floor_log2 (d);
4459                     if (rem_flag)
4460                       {
4461                         unsigned HOST_WIDE_INT mask
4462                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4463                         remainder
4464                           = expand_binop (int_mode, and_optab, op0,
4465                                           gen_int_mode (mask, int_mode),
4466                                           remainder, 1, methods);
4467                         if (remainder)
4468                           return gen_lowpart (mode, remainder);
4469                       }
4470                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4471                                              pre_shift, tquotient, 1);
4472                   }
4473                 else if (size <= HOST_BITS_PER_WIDE_INT)
4474                   {
4475                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4476                       {
4477                         /* Most significant bit of divisor is set; emit an scc
4478                            insn.  */
4479                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4480                                                           int_mode, 1, 1);
4481                       }
4482                     else
4483                       {
4484                         /* Find a suitable multiplier and right shift count
4485                            instead of multiplying with D.  */
4486
4487                         mh = choose_multiplier (d, size, size,
4488                                                 &ml, &post_shift, &dummy);
4489
4490                         /* If the suggested multiplier is more than SIZE bits,
4491                            we can do better for even divisors, using an
4492                            initial right shift.  */
4493                         if (mh != 0 && (d & 1) == 0)
4494                           {
4495                             pre_shift = ctz_or_zero (d);
4496                             mh = choose_multiplier (d >> pre_shift, size,
4497                                                     size - pre_shift,
4498                                                     &ml, &post_shift, &dummy);
4499                             gcc_assert (!mh);
4500                           }
4501                         else
4502                           pre_shift = 0;
4503
4504                         if (mh != 0)
4505                           {
4506                             rtx t1, t2, t3, t4;
4507
4508                             if (post_shift - 1 >= BITS_PER_WORD)
4509                               goto fail1;
4510
4511                             extra_cost
4512                               = (shift_cost (speed, int_mode, post_shift - 1)
4513                                  + shift_cost (speed, int_mode, 1)
4514                                  + 2 * add_cost (speed, int_mode));
4515                             t1 = expmed_mult_highpart
4516                               (int_mode, op0, gen_int_mode (ml, int_mode),
4517                                NULL_RTX, 1, max_cost - extra_cost);
4518                             if (t1 == 0)
4519                               goto fail1;
4520                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4521                                                                op0, t1),
4522                                                 NULL_RTX);
4523                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4524                                                t2, 1, NULL_RTX, 1);
4525                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4526                                                               t1, t3),
4527                                                 NULL_RTX);
4528                             quotient = expand_shift
4529                               (RSHIFT_EXPR, int_mode, t4,
4530                                post_shift - 1, tquotient, 1);
4531                           }
4532                         else
4533                           {
4534                             rtx t1, t2;
4535
4536                             if (pre_shift >= BITS_PER_WORD
4537                                 || post_shift >= BITS_PER_WORD)
4538                               goto fail1;
4539
4540                             t1 = expand_shift
4541                               (RSHIFT_EXPR, int_mode, op0,
4542                                pre_shift, NULL_RTX, 1);
4543                             extra_cost
4544                               = (shift_cost (speed, int_mode, pre_shift)
4545                                  + shift_cost (speed, int_mode, post_shift));
4546                             t2 = expmed_mult_highpart
4547                               (int_mode, t1,
4548                                gen_int_mode (ml, int_mode),
4549                                NULL_RTX, 1, max_cost - extra_cost);
4550                             if (t2 == 0)
4551                               goto fail1;
4552                             quotient = expand_shift
4553                               (RSHIFT_EXPR, int_mode, t2,
4554                                post_shift, tquotient, 1);
4555                           }
4556                       }
4557                   }
4558                 else            /* Too wide mode to use tricky code */
4559                   break;
4560
4561                 insn = get_last_insn ();
4562                 if (insn != last)
4563                   set_dst_reg_note (insn, REG_EQUAL,
4564                                     gen_rtx_UDIV (int_mode, op0, op1),
4565                                     quotient);
4566               }
4567             else                /* TRUNC_DIV, signed */
4568               {
4569                 unsigned HOST_WIDE_INT ml;
4570                 int lgup, post_shift;
4571                 rtx mlr;
4572                 HOST_WIDE_INT d = INTVAL (op1);
4573                 unsigned HOST_WIDE_INT abs_d;
4574
4575                 /* Not prepared to handle division/remainder by
4576                    0xffffffffffffffff8000000000000000 etc.  */
4577                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4578                   break;
4579
4580                 /* Since d might be INT_MIN, we have to cast to
4581                    unsigned HOST_WIDE_INT before negating to avoid
4582                    undefined signed overflow.  */
4583                 abs_d = (d >= 0
4584                          ? (unsigned HOST_WIDE_INT) d
4585                          : - (unsigned HOST_WIDE_INT) d);
4586
4587                 /* n rem d = n rem -d */
4588                 if (rem_flag && d < 0)
4589                   {
4590                     d = abs_d;
4591                     op1 = gen_int_mode (abs_d, int_mode);
4592                   }
4593
4594                 if (d == 1)
4595                   quotient = op0;
4596                 else if (d == -1)
4597                   quotient = expand_unop (int_mode, neg_optab, op0,
4598                                           tquotient, 0);
4599                 else if (size <= HOST_BITS_PER_WIDE_INT
4600                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4601                   {
4602                     /* This case is not handled correctly below.  */
4603                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4604                                                 int_mode, 1, 1);
4605                     if (quotient == 0)
4606                       goto fail1;
4607                   }
4608                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4609                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4610                          && (rem_flag
4611                              ? smod_pow2_cheap (speed, int_mode)
4612                              : sdiv_pow2_cheap (speed, int_mode))
4613                          /* We assume that cheap metric is true if the
4614                             optab has an expander for this mode.  */
4615                          && ((optab_handler ((rem_flag ? smod_optab
4616                                               : sdiv_optab),
4617                                              int_mode)
4618                               != CODE_FOR_nothing)
4619                              || (optab_handler (sdivmod_optab, int_mode)
4620                                  != CODE_FOR_nothing)))
4621                   ;
4622                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4623                   {
4624                     if (rem_flag)
4625                       {
4626                         remainder = expand_smod_pow2 (int_mode, op0, d);
4627                         if (remainder)
4628                           return gen_lowpart (mode, remainder);
4629                       }
4630
4631                     if (sdiv_pow2_cheap (speed, int_mode)
4632                         && ((optab_handler (sdiv_optab, int_mode)
4633                              != CODE_FOR_nothing)
4634                             || (optab_handler (sdivmod_optab, int_mode)
4635                                 != CODE_FOR_nothing)))
4636                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4637                                                 int_mode, op0,
4638                                                 gen_int_mode (abs_d,
4639                                                               int_mode),
4640                                                 NULL_RTX, 0);
4641                     else
4642                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4643
4644                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4645                        negate the quotient.  */
4646                     if (d < 0)
4647                       {
4648                         insn = get_last_insn ();
4649                         if (insn != last
4650                             && abs_d < (HOST_WIDE_INT_1U
4651                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4652                           set_dst_reg_note (insn, REG_EQUAL,
4653                                             gen_rtx_DIV (int_mode, op0,
4654                                                          gen_int_mode
4655                                                            (abs_d,
4656                                                             int_mode)),
4657                                             quotient);
4658
4659                         quotient = expand_unop (int_mode, neg_optab,
4660                                                 quotient, quotient, 0);
4661                       }
4662                   }
4663                 else if (size <= HOST_BITS_PER_WIDE_INT)
4664                   {
4665                     choose_multiplier (abs_d, size, size - 1,
4666                                        &ml, &post_shift, &lgup);
4667                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4668                       {
4669                         rtx t1, t2, t3;
4670
4671                         if (post_shift >= BITS_PER_WORD
4672                             || size - 1 >= BITS_PER_WORD)
4673                           goto fail1;
4674
4675                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4676                                       + shift_cost (speed, int_mode, size - 1)
4677                                       + add_cost (speed, int_mode));
4678                         t1 = expmed_mult_highpart
4679                           (int_mode, op0, gen_int_mode (ml, int_mode),
4680                            NULL_RTX, 0, max_cost - extra_cost);
4681                         if (t1 == 0)
4682                           goto fail1;
4683                         t2 = expand_shift
4684                           (RSHIFT_EXPR, int_mode, t1,
4685                            post_shift, NULL_RTX, 0);
4686                         t3 = expand_shift
4687                           (RSHIFT_EXPR, int_mode, op0,
4688                            size - 1, NULL_RTX, 0);
4689                         if (d < 0)
4690                           quotient
4691                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4692                                              tquotient);
4693                         else
4694                           quotient
4695                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4696                                              tquotient);
4697                       }
4698                     else
4699                       {
4700                         rtx t1, t2, t3, t4;
4701
4702                         if (post_shift >= BITS_PER_WORD
4703                             || size - 1 >= BITS_PER_WORD)
4704                           goto fail1;
4705
4706                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4707                         mlr = gen_int_mode (ml, int_mode);
4708                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4709                                       + shift_cost (speed, int_mode, size - 1)
4710                                       + 2 * add_cost (speed, int_mode));
4711                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4712                                                    NULL_RTX, 0,
4713                                                    max_cost - extra_cost);
4714                         if (t1 == 0)
4715                           goto fail1;
4716                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4717                                             NULL_RTX);
4718                         t3 = expand_shift
4719                           (RSHIFT_EXPR, int_mode, t2,
4720                            post_shift, NULL_RTX, 0);
4721                         t4 = expand_shift
4722                           (RSHIFT_EXPR, int_mode, op0,
4723                            size - 1, NULL_RTX, 0);
4724                         if (d < 0)
4725                           quotient
4726                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4727                                              tquotient);
4728                         else
4729                           quotient
4730                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4731                                              tquotient);
4732                       }
4733                   }
4734                 else            /* Too wide mode to use tricky code */
4735                   break;
4736
4737                 insn = get_last_insn ();
4738                 if (insn != last)
4739                   set_dst_reg_note (insn, REG_EQUAL,
4740                                     gen_rtx_DIV (int_mode, op0, op1),
4741                                     quotient);
4742               }
4743             break;
4744           }
4745       fail1:
4746         delete_insns_since (last);
4747         break;
4748
4749       case FLOOR_DIV_EXPR:
4750       case FLOOR_MOD_EXPR:
4751       /* We will come here only for signed operations.  */
4752         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4753           {
4754             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4755             int size = GET_MODE_BITSIZE (int_mode);
4756             unsigned HOST_WIDE_INT mh, ml;
4757             int pre_shift, lgup, post_shift;
4758             HOST_WIDE_INT d = INTVAL (op1);
4759
4760             if (d > 0)
4761               {
4762                 /* We could just as easily deal with negative constants here,
4763                    but it does not seem worth the trouble for GCC 2.6.  */
4764                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4765                   {
4766                     pre_shift = floor_log2 (d);
4767                     if (rem_flag)
4768                       {
4769                         unsigned HOST_WIDE_INT mask
4770                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4771                         remainder = expand_binop
4772                           (int_mode, and_optab, op0,
4773                            gen_int_mode (mask, int_mode),
4774                            remainder, 0, methods);
4775                         if (remainder)
4776                           return gen_lowpart (mode, remainder);
4777                       }
4778                     quotient = expand_shift
4779                       (RSHIFT_EXPR, int_mode, op0,
4780                        pre_shift, tquotient, 0);
4781                   }
4782                 else
4783                   {
4784                     rtx t1, t2, t3, t4;
4785
4786                     mh = choose_multiplier (d, size, size - 1,
4787                                             &ml, &post_shift, &lgup);
4788                     gcc_assert (!mh);
4789
4790                     if (post_shift < BITS_PER_WORD
4791                         && size - 1 < BITS_PER_WORD)
4792                       {
4793                         t1 = expand_shift
4794                           (RSHIFT_EXPR, int_mode, op0,
4795                            size - 1, NULL_RTX, 0);
4796                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4797                                            NULL_RTX, 0, OPTAB_WIDEN);
4798                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4799                                       + shift_cost (speed, int_mode, size - 1)
4800                                       + 2 * add_cost (speed, int_mode));
4801                         t3 = expmed_mult_highpart
4802                           (int_mode, t2, gen_int_mode (ml, int_mode),
4803                            NULL_RTX, 1, max_cost - extra_cost);
4804                         if (t3 != 0)
4805                           {
4806                             t4 = expand_shift
4807                               (RSHIFT_EXPR, int_mode, t3,
4808                                post_shift, NULL_RTX, 1);
4809                             quotient = expand_binop (int_mode, xor_optab,
4810                                                      t4, t1, tquotient, 0,
4811                                                      OPTAB_WIDEN);
4812                           }
4813                       }
4814                   }
4815               }
4816             else
4817               {
4818                 rtx nsign, t1, t2, t3, t4;
4819                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4820                                                   op0, constm1_rtx), NULL_RTX);
4821                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4822                                    0, OPTAB_WIDEN);
4823                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4824                                       size - 1, NULL_RTX, 0);
4825                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4826                                     NULL_RTX);
4827                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4828                                     NULL_RTX, 0);
4829                 if (t4)
4830                   {
4831                     rtx t5;
4832                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4833                                       NULL_RTX, 0);
4834                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4835                                               tquotient);
4836                   }
4837               }
4838           }
4839
4840         if (quotient != 0)
4841           break;
4842         delete_insns_since (last);
4843
4844         /* Try using an instruction that produces both the quotient and
4845            remainder, using truncation.  We can easily compensate the quotient
4846            or remainder to get floor rounding, once we have the remainder.
4847            Notice that we compute also the final remainder value here,
4848            and return the result right away.  */
4849         if (target == 0 || GET_MODE (target) != compute_mode)
4850           target = gen_reg_rtx (compute_mode);
4851
4852         if (rem_flag)
4853           {
4854             remainder
4855               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4856             quotient = gen_reg_rtx (compute_mode);
4857           }
4858         else
4859           {
4860             quotient
4861               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4862             remainder = gen_reg_rtx (compute_mode);
4863           }
4864
4865         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4866                                  quotient, remainder, 0))
4867           {
4868             /* This could be computed with a branch-less sequence.
4869                Save that for later.  */
4870             rtx tem;
4871             rtx_code_label *label = gen_label_rtx ();
4872             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4873             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4874                                 NULL_RTX, 0, OPTAB_WIDEN);
4875             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4876             expand_dec (quotient, const1_rtx);
4877             expand_inc (remainder, op1);
4878             emit_label (label);
4879             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4880           }
4881
4882         /* No luck with division elimination or divmod.  Have to do it
4883            by conditionally adjusting op0 *and* the result.  */
4884         {
4885           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4886           rtx adjusted_op0;
4887           rtx tem;
4888
4889           quotient = gen_reg_rtx (compute_mode);
4890           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4891           label1 = gen_label_rtx ();
4892           label2 = gen_label_rtx ();
4893           label3 = gen_label_rtx ();
4894           label4 = gen_label_rtx ();
4895           label5 = gen_label_rtx ();
4896           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4897           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4898           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4899                               quotient, 0, methods);
4900           if (tem != quotient)
4901             emit_move_insn (quotient, tem);
4902           emit_jump_insn (targetm.gen_jump (label5));
4903           emit_barrier ();
4904           emit_label (label1);
4905           expand_inc (adjusted_op0, const1_rtx);
4906           emit_jump_insn (targetm.gen_jump (label4));
4907           emit_barrier ();
4908           emit_label (label2);
4909           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4910           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4911                               quotient, 0, methods);
4912           if (tem != quotient)
4913             emit_move_insn (quotient, tem);
4914           emit_jump_insn (targetm.gen_jump (label5));
4915           emit_barrier ();
4916           emit_label (label3);
4917           expand_dec (adjusted_op0, const1_rtx);
4918           emit_label (label4);
4919           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4920                               quotient, 0, methods);
4921           if (tem != quotient)
4922             emit_move_insn (quotient, tem);
4923           expand_dec (quotient, const1_rtx);
4924           emit_label (label5);
4925         }
4926         break;
4927
4928       case CEIL_DIV_EXPR:
4929       case CEIL_MOD_EXPR:
4930         if (unsignedp)
4931           {
4932             if (op1_is_constant
4933                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4934                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4935                     || INTVAL (op1) >= 0))
4936               {
4937                 scalar_int_mode int_mode
4938                   = as_a <scalar_int_mode> (compute_mode);
4939                 rtx t1, t2, t3;
4940                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4941                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4942                                    floor_log2 (d), tquotient, 1);
4943                 t2 = expand_binop (int_mode, and_optab, op0,
4944                                    gen_int_mode (d - 1, int_mode),
4945                                    NULL_RTX, 1, methods);
4946                 t3 = gen_reg_rtx (int_mode);
4947                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4948                 if (t3 == 0)
4949                   {
4950                     rtx_code_label *lab;
4951                     lab = gen_label_rtx ();
4952                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4953                     expand_inc (t1, const1_rtx);
4954                     emit_label (lab);
4955                     quotient = t1;
4956                   }
4957                 else
4958                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4959                                             tquotient);
4960                 break;
4961               }
4962
4963             /* Try using an instruction that produces both the quotient and
4964                remainder, using truncation.  We can easily compensate the
4965                quotient or remainder to get ceiling rounding, once we have the
4966                remainder.  Notice that we compute also the final remainder
4967                value here, and return the result right away.  */
4968             if (target == 0 || GET_MODE (target) != compute_mode)
4969               target = gen_reg_rtx (compute_mode);
4970
4971             if (rem_flag)
4972               {
4973                 remainder = (REG_P (target)
4974                              ? target : gen_reg_rtx (compute_mode));
4975                 quotient = gen_reg_rtx (compute_mode);
4976               }
4977             else
4978               {
4979                 quotient = (REG_P (target)
4980                             ? target : gen_reg_rtx (compute_mode));
4981                 remainder = gen_reg_rtx (compute_mode);
4982               }
4983
4984             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4985                                      remainder, 1))
4986               {
4987                 /* This could be computed with a branch-less sequence.
4988                    Save that for later.  */
4989                 rtx_code_label *label = gen_label_rtx ();
4990                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4991                                  compute_mode, label);
4992                 expand_inc (quotient, const1_rtx);
4993                 expand_dec (remainder, op1);
4994                 emit_label (label);
4995                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4996               }
4997
4998             /* No luck with division elimination or divmod.  Have to do it
4999                by conditionally adjusting op0 *and* the result.  */
5000             {
5001               rtx_code_label *label1, *label2;
5002               rtx adjusted_op0, tem;
5003
5004               quotient = gen_reg_rtx (compute_mode);
5005               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5006               label1 = gen_label_rtx ();
5007               label2 = gen_label_rtx ();
5008               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
5009                                compute_mode, label1);
5010               emit_move_insn  (quotient, const0_rtx);
5011               emit_jump_insn (targetm.gen_jump (label2));
5012               emit_barrier ();
5013               emit_label (label1);
5014               expand_dec (adjusted_op0, const1_rtx);
5015               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
5016                                   quotient, 1, methods);
5017               if (tem != quotient)
5018                 emit_move_insn (quotient, tem);
5019               expand_inc (quotient, const1_rtx);
5020               emit_label (label2);
5021             }
5022           }
5023         else /* signed */
5024           {
5025             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5026                 && INTVAL (op1) >= 0)
5027               {
5028                 /* This is extremely similar to the code for the unsigned case
5029                    above.  For 2.7 we should merge these variants, but for
5030                    2.6.1 I don't want to touch the code for unsigned since that
5031                    get used in C.  The signed case will only be used by other
5032                    languages (Ada).  */
5033
5034                 rtx t1, t2, t3;
5035                 unsigned HOST_WIDE_INT d = INTVAL (op1);
5036                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5037                                    floor_log2 (d), tquotient, 0);
5038                 t2 = expand_binop (compute_mode, and_optab, op0,
5039                                    gen_int_mode (d - 1, compute_mode),
5040                                    NULL_RTX, 1, methods);
5041                 t3 = gen_reg_rtx (compute_mode);
5042                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5043                                       compute_mode, 1, 1);
5044                 if (t3 == 0)
5045                   {
5046                     rtx_code_label *lab;
5047                     lab = gen_label_rtx ();
5048                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5049                     expand_inc (t1, const1_rtx);
5050                     emit_label (lab);
5051                     quotient = t1;
5052                   }
5053                 else
5054                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5055                                                           t1, t3),
5056                                             tquotient);
5057                 break;
5058               }
5059
5060             /* Try using an instruction that produces both the quotient and
5061                remainder, using truncation.  We can easily compensate the
5062                quotient or remainder to get ceiling rounding, once we have the
5063                remainder.  Notice that we compute also the final remainder
5064                value here, and return the result right away.  */
5065             if (target == 0 || GET_MODE (target) != compute_mode)
5066               target = gen_reg_rtx (compute_mode);
5067             if (rem_flag)
5068               {
5069                 remainder= (REG_P (target)
5070                             ? target : gen_reg_rtx (compute_mode));
5071                 quotient = gen_reg_rtx (compute_mode);
5072               }
5073             else
5074               {
5075                 quotient = (REG_P (target)
5076                             ? target : gen_reg_rtx (compute_mode));
5077                 remainder = gen_reg_rtx (compute_mode);
5078               }
5079
5080             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5081                                      remainder, 0))
5082               {
5083                 /* This could be computed with a branch-less sequence.
5084                    Save that for later.  */
5085                 rtx tem;
5086                 rtx_code_label *label = gen_label_rtx ();
5087                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5088                                  compute_mode, label);
5089                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5090                                     NULL_RTX, 0, OPTAB_WIDEN);
5091                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5092                 expand_inc (quotient, const1_rtx);
5093                 expand_dec (remainder, op1);
5094                 emit_label (label);
5095                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5096               }
5097
5098             /* No luck with division elimination or divmod.  Have to do it
5099                by conditionally adjusting op0 *and* the result.  */
5100             {
5101               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5102               rtx adjusted_op0;
5103               rtx tem;
5104
5105               quotient = gen_reg_rtx (compute_mode);
5106               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5107               label1 = gen_label_rtx ();
5108               label2 = gen_label_rtx ();
5109               label3 = gen_label_rtx ();
5110               label4 = gen_label_rtx ();
5111               label5 = gen_label_rtx ();
5112               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5113               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5114                                compute_mode, label1);
5115               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5116                                   quotient, 0, methods);
5117               if (tem != quotient)
5118                 emit_move_insn (quotient, tem);
5119               emit_jump_insn (targetm.gen_jump (label5));
5120               emit_barrier ();
5121               emit_label (label1);
5122               expand_dec (adjusted_op0, const1_rtx);
5123               emit_jump_insn (targetm.gen_jump (label4));
5124               emit_barrier ();
5125               emit_label (label2);
5126               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5127                                compute_mode, label3);
5128               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5129                                   quotient, 0, methods);
5130               if (tem != quotient)
5131                 emit_move_insn (quotient, tem);
5132               emit_jump_insn (targetm.gen_jump (label5));
5133               emit_barrier ();
5134               emit_label (label3);
5135               expand_inc (adjusted_op0, const1_rtx);
5136               emit_label (label4);
5137               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5138                                   quotient, 0, methods);
5139               if (tem != quotient)
5140                 emit_move_insn (quotient, tem);
5141               expand_inc (quotient, const1_rtx);
5142               emit_label (label5);
5143             }
5144           }
5145         break;
5146
5147       case EXACT_DIV_EXPR:
5148         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5149           {
5150             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5151             int size = GET_MODE_BITSIZE (int_mode);
5152             HOST_WIDE_INT d = INTVAL (op1);
5153             unsigned HOST_WIDE_INT ml;
5154             int pre_shift;
5155             rtx t1;
5156
5157             pre_shift = ctz_or_zero (d);
5158             ml = invert_mod2n (d >> pre_shift, size);
5159             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5160                                pre_shift, NULL_RTX, unsignedp);
5161             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5162                                     NULL_RTX, 1);
5163
5164             insn = get_last_insn ();
5165             set_dst_reg_note (insn, REG_EQUAL,
5166                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5167                                               int_mode, op0, op1),
5168                               quotient);
5169           }
5170         break;
5171
5172       case ROUND_DIV_EXPR:
5173       case ROUND_MOD_EXPR:
5174         if (unsignedp)
5175           {
5176             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5177             rtx tem;
5178             rtx_code_label *label;
5179             label = gen_label_rtx ();
5180             quotient = gen_reg_rtx (int_mode);
5181             remainder = gen_reg_rtx (int_mode);
5182             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5183               {
5184                 rtx tem;
5185                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5186                                          quotient, 1, methods);
5187                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5188                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5189                                           remainder, 1, methods);
5190               }
5191             tem = plus_constant (int_mode, op1, -1);
5192             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5193             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5194             expand_inc (quotient, const1_rtx);
5195             expand_dec (remainder, op1);
5196             emit_label (label);
5197           }
5198         else
5199           {
5200             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5201             int size = GET_MODE_BITSIZE (int_mode);
5202             rtx abs_rem, abs_op1, tem, mask;
5203             rtx_code_label *label;
5204             label = gen_label_rtx ();
5205             quotient = gen_reg_rtx (int_mode);
5206             remainder = gen_reg_rtx (int_mode);
5207             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5208               {
5209                 rtx tem;
5210                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5211                                          quotient, 0, methods);
5212                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5213                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5214                                           remainder, 0, methods);
5215               }
5216             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5217             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5218             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5219                                 1, NULL_RTX, 1);
5220             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5221             tem = expand_binop (int_mode, xor_optab, op0, op1,
5222                                 NULL_RTX, 0, OPTAB_WIDEN);
5223             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5224                                  size - 1, NULL_RTX, 0);
5225             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5226                                 NULL_RTX, 0, OPTAB_WIDEN);
5227             tem = expand_binop (int_mode, sub_optab, tem, mask,
5228                                 NULL_RTX, 0, OPTAB_WIDEN);
5229             expand_inc (quotient, tem);
5230             tem = expand_binop (int_mode, xor_optab, mask, op1,
5231                                 NULL_RTX, 0, OPTAB_WIDEN);
5232             tem = expand_binop (int_mode, sub_optab, tem, mask,
5233                                 NULL_RTX, 0, OPTAB_WIDEN);
5234             expand_dec (remainder, tem);
5235             emit_label (label);
5236           }
5237         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5238
5239       default:
5240         gcc_unreachable ();
5241       }
5242
5243   if (quotient == 0)
5244     {
5245       if (target && GET_MODE (target) != compute_mode)
5246         target = 0;
5247
5248       if (rem_flag)
5249         {
5250           /* Try to produce the remainder without producing the quotient.
5251              If we seem to have a divmod pattern that does not require widening,
5252              don't try widening here.  We should really have a WIDEN argument
5253              to expand_twoval_binop, since what we'd really like to do here is
5254              1) try a mod insn in compute_mode
5255              2) try a divmod insn in compute_mode
5256              3) try a div insn in compute_mode and multiply-subtract to get
5257                 remainder
5258              4) try the same things with widening allowed.  */
5259           remainder
5260             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5261                                  op0, op1, target,
5262                                  unsignedp,
5263                                  ((optab_handler (optab2, compute_mode)
5264                                    != CODE_FOR_nothing)
5265                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5266           if (remainder == 0)
5267             {
5268               /* No luck there.  Can we do remainder and divide at once
5269                  without a library call?  */
5270               remainder = gen_reg_rtx (compute_mode);
5271               if (! expand_twoval_binop ((unsignedp
5272                                           ? udivmod_optab
5273                                           : sdivmod_optab),
5274                                          op0, op1,
5275                                          NULL_RTX, remainder, unsignedp))
5276                 remainder = 0;
5277             }
5278
5279           if (remainder)
5280             return gen_lowpart (mode, remainder);
5281         }
5282
5283       /* Produce the quotient.  Try a quotient insn, but not a library call.
5284          If we have a divmod in this mode, use it in preference to widening
5285          the div (for this test we assume it will not fail). Note that optab2
5286          is set to the one of the two optabs that the call below will use.  */
5287       quotient
5288         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5289                              op0, op1, rem_flag ? NULL_RTX : target,
5290                              unsignedp,
5291                              ((optab_handler (optab2, compute_mode)
5292                                != CODE_FOR_nothing)
5293                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5294
5295       if (quotient == 0)
5296         {
5297           /* No luck there.  Try a quotient-and-remainder insn,
5298              keeping the quotient alone.  */
5299           quotient = gen_reg_rtx (compute_mode);
5300           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5301                                      op0, op1,
5302                                      quotient, NULL_RTX, unsignedp))
5303             {
5304               quotient = 0;
5305               if (! rem_flag)
5306                 /* Still no luck.  If we are not computing the remainder,
5307                    use a library call for the quotient.  */
5308                 quotient = sign_expand_binop (compute_mode,
5309                                               udiv_optab, sdiv_optab,
5310                                               op0, op1, target,
5311                                               unsignedp, methods);
5312             }
5313         }
5314     }
5315
5316   if (rem_flag)
5317     {
5318       if (target && GET_MODE (target) != compute_mode)
5319         target = 0;
5320
5321       if (quotient == 0)
5322         {
5323           /* No divide instruction either.  Use library for remainder.  */
5324           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5325                                          op0, op1, target,
5326                                          unsignedp, methods);
5327           /* No remainder function.  Try a quotient-and-remainder
5328              function, keeping the remainder.  */
5329           if (!remainder
5330               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5331             {
5332               remainder = gen_reg_rtx (compute_mode);
5333               if (!expand_twoval_binop_libfunc
5334                   (unsignedp ? udivmod_optab : sdivmod_optab,
5335                    op0, op1,
5336                    NULL_RTX, remainder,
5337                    unsignedp ? UMOD : MOD))
5338                 remainder = NULL_RTX;
5339             }
5340         }
5341       else
5342         {
5343           /* We divided.  Now finish doing X - Y * (X / Y).  */
5344           remainder = expand_mult (compute_mode, quotient, op1,
5345                                    NULL_RTX, unsignedp);
5346           remainder = expand_binop (compute_mode, sub_optab, op0,
5347                                     remainder, target, unsignedp,
5348                                     methods);
5349         }
5350     }
5351
5352   if (methods != OPTAB_LIB_WIDEN
5353       && (rem_flag ? remainder : quotient) == NULL_RTX)
5354     return NULL_RTX;
5355
5356   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5357 }
5358 \f
5359 /* Return a tree node with data type TYPE, describing the value of X.
5360    Usually this is an VAR_DECL, if there is no obvious better choice.
5361    X may be an expression, however we only support those expressions
5362    generated by loop.c.  */
5363
5364 tree
5365 make_tree (tree type, rtx x)
5366 {
5367   tree t;
5368
5369   switch (GET_CODE (x))
5370     {
5371     case CONST_INT:
5372     case CONST_WIDE_INT:
5373       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5374       return t;
5375
5376     case CONST_DOUBLE:
5377       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5378       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5379         t = wide_int_to_tree (type,
5380                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5381                                                     HOST_BITS_PER_WIDE_INT * 2));
5382       else
5383         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5384
5385       return t;
5386
5387     case CONST_VECTOR:
5388       {
5389         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5390         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5391         tree itype = TREE_TYPE (type);
5392
5393         /* Build a tree with vector elements.  */
5394         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5395         unsigned int count = elts.encoded_nelts ();
5396         for (unsigned int i = 0; i < count; ++i)
5397           {
5398             rtx elt = CONST_VECTOR_ELT (x, i);
5399             elts.quick_push (make_tree (itype, elt));
5400           }
5401
5402         return elts.build ();
5403       }
5404
5405     case PLUS:
5406       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5407                           make_tree (type, XEXP (x, 1)));
5408
5409     case MINUS:
5410       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5411                           make_tree (type, XEXP (x, 1)));
5412
5413     case NEG:
5414       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5415
5416     case MULT:
5417       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5418                           make_tree (type, XEXP (x, 1)));
5419
5420     case ASHIFT:
5421       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5422                           make_tree (type, XEXP (x, 1)));
5423
5424     case LSHIFTRT:
5425       t = unsigned_type_for (type);
5426       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5427                                          make_tree (t, XEXP (x, 0)),
5428                                          make_tree (type, XEXP (x, 1))));
5429
5430     case ASHIFTRT:
5431       t = signed_type_for (type);
5432       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5433                                          make_tree (t, XEXP (x, 0)),
5434                                          make_tree (type, XEXP (x, 1))));
5435
5436     case DIV:
5437       if (TREE_CODE (type) != REAL_TYPE)
5438         t = signed_type_for (type);
5439       else
5440         t = type;
5441
5442       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5443                                          make_tree (t, XEXP (x, 0)),
5444                                          make_tree (t, XEXP (x, 1))));
5445     case UDIV:
5446       t = unsigned_type_for (type);
5447       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5448                                          make_tree (t, XEXP (x, 0)),
5449                                          make_tree (t, XEXP (x, 1))));
5450
5451     case SIGN_EXTEND:
5452     case ZERO_EXTEND:
5453       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5454                                           GET_CODE (x) == ZERO_EXTEND);
5455       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5456
5457     case CONST:
5458       return make_tree (type, XEXP (x, 0));
5459
5460     case SYMBOL_REF:
5461       t = SYMBOL_REF_DECL (x);
5462       if (t)
5463         return fold_convert (type, build_fold_addr_expr (t));
5464       /* fall through.  */
5465
5466     default:
5467       if (CONST_POLY_INT_P (x))
5468         return wide_int_to_tree (t, const_poly_int_value (x));
5469
5470       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5471
5472       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5473          address mode to pointer mode.  */
5474       if (POINTER_TYPE_P (type))
5475         x = convert_memory_address_addr_space
5476           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5477
5478       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5479          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5480       t->decl_with_rtl.rtl = x;
5481
5482       return t;
5483     }
5484 }
5485 \f
5486 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5487    and returning TARGET.
5488
5489    If TARGET is 0, a pseudo-register or constant is returned.  */
5490
5491 rtx
5492 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5493 {
5494   rtx tem = 0;
5495
5496   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5497     tem = simplify_binary_operation (AND, mode, op0, op1);
5498   if (tem == 0)
5499     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5500
5501   if (target == 0)
5502     target = tem;
5503   else if (tem != target)
5504     emit_move_insn (target, tem);
5505   return target;
5506 }
5507
5508 /* Helper function for emit_store_flag.  */
5509 rtx
5510 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5511              machine_mode mode, machine_mode compare_mode,
5512              int unsignedp, rtx x, rtx y, int normalizep,
5513              machine_mode target_mode)
5514 {
5515   class expand_operand ops[4];
5516   rtx op0, comparison, subtarget;
5517   rtx_insn *last;
5518   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5519   scalar_int_mode int_target_mode;
5520
5521   last = get_last_insn ();
5522   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5523   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5524   if (!x || !y)
5525     {
5526       delete_insns_since (last);
5527       return NULL_RTX;
5528     }
5529
5530   if (target_mode == VOIDmode)
5531     int_target_mode = result_mode;
5532   else
5533     int_target_mode = as_a <scalar_int_mode> (target_mode);
5534   if (!target)
5535     target = gen_reg_rtx (int_target_mode);
5536
5537   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5538
5539   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5540   create_fixed_operand (&ops[1], comparison);
5541   create_fixed_operand (&ops[2], x);
5542   create_fixed_operand (&ops[3], y);
5543   if (!maybe_expand_insn (icode, 4, ops))
5544     {
5545       delete_insns_since (last);
5546       return NULL_RTX;
5547     }
5548   subtarget = ops[0].value;
5549
5550   /* If we are converting to a wider mode, first convert to
5551      INT_TARGET_MODE, then normalize.  This produces better combining
5552      opportunities on machines that have a SIGN_EXTRACT when we are
5553      testing a single bit.  This mostly benefits the 68k.
5554
5555      If STORE_FLAG_VALUE does not have the sign bit set when
5556      interpreted in MODE, we can do this conversion as unsigned, which
5557      is usually more efficient.  */
5558   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5559     {
5560       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5561                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5562
5563       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5564       convert_move (target, subtarget, unsignedp);
5565
5566       op0 = target;
5567       result_mode = int_target_mode;
5568     }
5569   else
5570     op0 = subtarget;
5571
5572   /* If we want to keep subexpressions around, don't reuse our last
5573      target.  */
5574   if (optimize)
5575     subtarget = 0;
5576
5577   /* Now normalize to the proper value in MODE.  Sometimes we don't
5578      have to do anything.  */
5579   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5580     ;
5581   /* STORE_FLAG_VALUE might be the most negative number, so write
5582      the comparison this way to avoid a compiler-time warning.  */
5583   else if (- normalizep == STORE_FLAG_VALUE)
5584     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5585
5586   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5587      it hard to use a value of just the sign bit due to ANSI integer
5588      constant typing rules.  */
5589   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5590     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5591                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5592                         normalizep == 1);
5593   else
5594     {
5595       gcc_assert (STORE_FLAG_VALUE & 1);
5596
5597       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5598       if (normalizep == -1)
5599         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5600     }
5601
5602   /* If we were converting to a smaller mode, do the conversion now.  */
5603   if (int_target_mode != result_mode)
5604     {
5605       convert_move (target, op0, 0);
5606       return target;
5607     }
5608   else
5609     return op0;
5610 }
5611
5612
5613 /* A subroutine of emit_store_flag only including "tricks" that do not
5614    need a recursive call.  These are kept separate to avoid infinite
5615    loops.  */
5616
5617 static rtx
5618 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5619                    machine_mode mode, int unsignedp, int normalizep,
5620                    machine_mode target_mode)
5621 {
5622   rtx subtarget;
5623   enum insn_code icode;
5624   machine_mode compare_mode;
5625   enum mode_class mclass;
5626   enum rtx_code scode;
5627
5628   if (unsignedp)
5629     code = unsigned_condition (code);
5630   scode = swap_condition (code);
5631
5632   /* If one operand is constant, make it the second one.  Only do this
5633      if the other operand is not constant as well.  */
5634
5635   if (swap_commutative_operands_p (op0, op1))
5636     {
5637       std::swap (op0, op1);
5638       code = swap_condition (code);
5639     }
5640
5641   if (mode == VOIDmode)
5642     mode = GET_MODE (op0);
5643
5644   if (CONST_SCALAR_INT_P (op1))
5645     canonicalize_comparison (mode, &code, &op1);
5646
5647   /* For some comparisons with 1 and -1, we can convert this to
5648      comparisons with zero.  This will often produce more opportunities for
5649      store-flag insns.  */
5650
5651   switch (code)
5652     {
5653     case LT:
5654       if (op1 == const1_rtx)
5655         op1 = const0_rtx, code = LE;
5656       break;
5657     case LE:
5658       if (op1 == constm1_rtx)
5659         op1 = const0_rtx, code = LT;
5660       break;
5661     case GE:
5662       if (op1 == const1_rtx)
5663         op1 = const0_rtx, code = GT;
5664       break;
5665     case GT:
5666       if (op1 == constm1_rtx)
5667         op1 = const0_rtx, code = GE;
5668       break;
5669     case GEU:
5670       if (op1 == const1_rtx)
5671         op1 = const0_rtx, code = NE;
5672       break;
5673     case LTU:
5674       if (op1 == const1_rtx)
5675         op1 = const0_rtx, code = EQ;
5676       break;
5677     default:
5678       break;
5679     }
5680
5681   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5682      complement of A (for GE) and shifting the sign bit to the low bit.  */
5683   scalar_int_mode int_mode;
5684   if (op1 == const0_rtx && (code == LT || code == GE)
5685       && is_int_mode (mode, &int_mode)
5686       && (normalizep || STORE_FLAG_VALUE == 1
5687           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5688     {
5689       scalar_int_mode int_target_mode;
5690       subtarget = target;
5691
5692       if (!target)
5693         int_target_mode = int_mode;
5694       else
5695         {
5696           /* If the result is to be wider than OP0, it is best to convert it
5697              first.  If it is to be narrower, it is *incorrect* to convert it
5698              first.  */
5699           int_target_mode = as_a <scalar_int_mode> (target_mode);
5700           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5701             {
5702               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5703               int_mode = int_target_mode;
5704             }
5705         }
5706
5707       if (int_target_mode != int_mode)
5708         subtarget = 0;
5709
5710       if (code == GE)
5711         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5712                            ((STORE_FLAG_VALUE == 1 || normalizep)
5713                             ? 0 : subtarget), 0);
5714
5715       if (STORE_FLAG_VALUE == 1 || normalizep)
5716         /* If we are supposed to produce a 0/1 value, we want to do
5717            a logical shift from the sign bit to the low-order bit; for
5718            a -1/0 value, we do an arithmetic shift.  */
5719         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5720                             GET_MODE_BITSIZE (int_mode) - 1,
5721                             subtarget, normalizep != -1);
5722
5723       if (int_mode != int_target_mode)
5724         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5725
5726       return op0;
5727     }
5728
5729   /* Next try expanding this via the backend's cstore<mode>4.  */
5730   mclass = GET_MODE_CLASS (mode);
5731   FOR_EACH_WIDER_MODE_FROM (compare_mode, mode)
5732     {
5733      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5734      icode = optab_handler (cstore_optab, optab_mode);
5735      if (icode != CODE_FOR_nothing)
5736         {
5737           do_pending_stack_adjust ();
5738           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5739                                  unsignedp, op0, op1, normalizep, target_mode);
5740           if (tem)
5741             return tem;
5742
5743           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5744             {
5745               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5746                                  unsignedp, op1, op0, normalizep, target_mode);
5747               if (tem)
5748                 return tem;
5749             }
5750           break;
5751         }
5752     }
5753
5754   /* If we are comparing a double-word integer with zero or -1, we can
5755      convert the comparison into one involving a single word.  */
5756   if (is_int_mode (mode, &int_mode)
5757       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5758       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5759     {
5760       rtx tem;
5761       if ((code == EQ || code == NE)
5762           && (op1 == const0_rtx || op1 == constm1_rtx))
5763         {
5764           rtx op00, op01;
5765
5766           /* Do a logical OR or AND of the two words and compare the
5767              result.  */
5768           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5769           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5770           tem = expand_binop (word_mode,
5771                               op1 == const0_rtx ? ior_optab : and_optab,
5772                               op00, op01, NULL_RTX, unsignedp,
5773                               OPTAB_DIRECT);
5774
5775           if (tem != 0)
5776             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5777                                    unsignedp, normalizep);
5778         }
5779       else if ((code == LT || code == GE) && op1 == const0_rtx)
5780         {
5781           rtx op0h;
5782
5783           /* If testing the sign bit, can just test on high word.  */
5784           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5785                                       subreg_highpart_offset (word_mode,
5786                                                               int_mode));
5787           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5788                                  unsignedp, normalizep);
5789         }
5790       else
5791         tem = NULL_RTX;
5792
5793       if (tem)
5794         {
5795           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5796             return tem;
5797           if (!target)
5798             target = gen_reg_rtx (target_mode);
5799
5800           convert_move (target, tem,
5801                         !val_signbit_known_set_p (word_mode,
5802                                                   (normalizep ? normalizep
5803                                                    : STORE_FLAG_VALUE)));
5804           return target;
5805         }
5806     }
5807
5808   return 0;
5809 }
5810
5811 /* Subroutine of emit_store_flag that handles cases in which the operands
5812    are scalar integers.  SUBTARGET is the target to use for temporary
5813    operations and TRUEVAL is the value to store when the condition is
5814    true.  All other arguments are as for emit_store_flag.  */
5815
5816 rtx
5817 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5818                      rtx op1, scalar_int_mode mode, int unsignedp,
5819                      int normalizep, rtx trueval)
5820 {
5821   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5822   rtx_insn *last = get_last_insn ();
5823
5824   /* If this is an equality comparison of integers, we can try to exclusive-or
5825      (or subtract) the two operands and use a recursive call to try the
5826      comparison with zero.  Don't do any of these cases if branches are
5827      very cheap.  */
5828
5829   if ((code == EQ || code == NE) && op1 != const0_rtx)
5830     {
5831       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5832                               OPTAB_WIDEN);
5833
5834       if (tem == 0)
5835         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5836                             OPTAB_WIDEN);
5837       if (tem != 0)
5838         tem = emit_store_flag (target, code, tem, const0_rtx,
5839                                mode, unsignedp, normalizep);
5840       if (tem != 0)
5841         return tem;
5842
5843       delete_insns_since (last);
5844     }
5845
5846   /* For integer comparisons, try the reverse comparison.  However, for
5847      small X and if we'd have anyway to extend, implementing "X != 0"
5848      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5849   rtx_code rcode = reverse_condition (code);
5850   if (can_compare_p (rcode, mode, ccp_store_flag)
5851       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5852             && code == NE
5853             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5854             && op1 == const0_rtx))
5855     {
5856       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5857                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5858
5859       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5860       if (want_add
5861           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5862                        optimize_insn_for_speed_p ()) == 0)
5863         {
5864           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5865                                        STORE_FLAG_VALUE, target_mode);
5866           if (tem != 0)
5867             tem = expand_binop (target_mode, add_optab, tem,
5868                                 gen_int_mode (normalizep, target_mode),
5869                                 target, 0, OPTAB_WIDEN);
5870           if (tem != 0)
5871             return tem;
5872         }
5873       else if (!want_add
5874                && rtx_cost (trueval, mode, XOR, 1,
5875                             optimize_insn_for_speed_p ()) == 0)
5876         {
5877           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5878                                        normalizep, target_mode);
5879           if (tem != 0)
5880             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5881                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5882           if (tem != 0)
5883             return tem;
5884         }
5885
5886       delete_insns_since (last);
5887     }
5888
5889   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5890      the constant zero.  Reject all other comparisons at this point.  Only
5891      do LE and GT if branches are expensive since they are expensive on
5892      2-operand machines.  */
5893
5894   if (op1 != const0_rtx
5895       || (code != EQ && code != NE
5896           && (BRANCH_COST (optimize_insn_for_speed_p (),
5897                            false) <= 1 || (code != LE && code != GT))))
5898     return 0;
5899
5900   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5901      do the necessary operation below.  */
5902
5903   rtx tem = 0;
5904
5905   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5906      the sign bit set.  */
5907
5908   if (code == LE)
5909     {
5910       /* This is destructive, so SUBTARGET can't be OP0.  */
5911       if (rtx_equal_p (subtarget, op0))
5912         subtarget = 0;
5913
5914       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5915                           OPTAB_WIDEN);
5916       if (tem)
5917         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5918                             OPTAB_WIDEN);
5919     }
5920
5921   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5922      number of bits in the mode of OP0, minus one.  */
5923
5924   if (code == GT)
5925     {
5926       if (rtx_equal_p (subtarget, op0))
5927         subtarget = 0;
5928
5929       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5930                                 GET_MODE_BITSIZE (mode) - 1,
5931                                 subtarget, 0);
5932       if (tem)
5933         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5934                             OPTAB_WIDEN);
5935     }
5936
5937   if (code == EQ || code == NE)
5938     {
5939       /* For EQ or NE, one way to do the comparison is to apply an operation
5940          that converts the operand into a positive number if it is nonzero
5941          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5942          for NE we negate.  This puts the result in the sign bit.  Then we
5943          normalize with a shift, if needed.
5944
5945          Two operations that can do the above actions are ABS and FFS, so try
5946          them.  If that doesn't work, and MODE is smaller than a full word,
5947          we can use zero-extension to the wider mode (an unsigned conversion)
5948          as the operation.  */
5949
5950       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5951          that is compensated by the subsequent overflow when subtracting
5952          one / negating.  */
5953
5954       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5955         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5956       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5957         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5958       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5959         {
5960           tem = convert_modes (word_mode, mode, op0, 1);
5961           mode = word_mode;
5962         }
5963
5964       if (tem != 0)
5965         {
5966           if (code == EQ)
5967             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5968                                 0, OPTAB_WIDEN);
5969           else
5970             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5971         }
5972
5973       /* If we couldn't do it that way, for NE we can "or" the two's complement
5974          of the value with itself.  For EQ, we take the one's complement of
5975          that "or", which is an extra insn, so we only handle EQ if branches
5976          are expensive.  */
5977
5978       if (tem == 0
5979           && (code == NE
5980               || BRANCH_COST (optimize_insn_for_speed_p (),
5981                               false) > 1))
5982         {
5983           if (rtx_equal_p (subtarget, op0))
5984             subtarget = 0;
5985
5986           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5987           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5988                               OPTAB_WIDEN);
5989
5990           if (tem && code == EQ)
5991             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5992         }
5993     }
5994
5995   if (tem && normalizep)
5996     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5997                               GET_MODE_BITSIZE (mode) - 1,
5998                               subtarget, normalizep == 1);
5999
6000   if (tem)
6001     {
6002       if (!target)
6003         ;
6004       else if (GET_MODE (tem) != target_mode)
6005         {
6006           convert_move (target, tem, 0);
6007           tem = target;
6008         }
6009       else if (!subtarget)
6010         {
6011           emit_move_insn (target, tem);
6012           tem = target;
6013         }
6014     }
6015   else
6016     delete_insns_since (last);
6017
6018   return tem;
6019 }
6020
6021 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
6022    and storing in TARGET.  Normally return TARGET.
6023    Return 0 if that cannot be done.
6024
6025    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
6026    it is VOIDmode, they cannot both be CONST_INT.
6027
6028    UNSIGNEDP is for the case where we have to widen the operands
6029    to perform the operation.  It says to use zero-extension.
6030
6031    NORMALIZEP is 1 if we should convert the result to be either zero
6032    or one.  Normalize is -1 if we should convert the result to be
6033    either zero or -1.  If NORMALIZEP is zero, the result will be left
6034    "raw" out of the scc insn.  */
6035
6036 rtx
6037 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6038                  machine_mode mode, int unsignedp, int normalizep)
6039 {
6040   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6041   enum rtx_code rcode;
6042   rtx subtarget;
6043   rtx tem, trueval;
6044   rtx_insn *last;
6045
6046   /* If we compare constants, we shouldn't use a store-flag operation,
6047      but a constant load.  We can get there via the vanilla route that
6048      usually generates a compare-branch sequence, but will in this case
6049      fold the comparison to a constant, and thus elide the branch.  */
6050   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6051     return NULL_RTX;
6052
6053   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6054                            target_mode);
6055   if (tem)
6056     return tem;
6057
6058   /* If we reached here, we can't do this with a scc insn, however there
6059      are some comparisons that can be done in other ways.  Don't do any
6060      of these cases if branches are very cheap.  */
6061   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6062     return 0;
6063
6064   /* See what we need to return.  We can only return a 1, -1, or the
6065      sign bit.  */
6066
6067   if (normalizep == 0)
6068     {
6069       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6070         normalizep = STORE_FLAG_VALUE;
6071
6072       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6073         ;
6074       else
6075         return 0;
6076     }
6077
6078   last = get_last_insn ();
6079
6080   /* If optimizing, use different pseudo registers for each insn, instead
6081      of reusing the same pseudo.  This leads to better CSE, but slows
6082      down the compiler, since there are more pseudos.  */
6083   subtarget = (!optimize
6084                && (target_mode == mode)) ? target : NULL_RTX;
6085   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6086
6087   /* For floating-point comparisons, try the reverse comparison or try
6088      changing the "orderedness" of the comparison.  */
6089   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6090     {
6091       enum rtx_code first_code;
6092       bool and_them;
6093
6094       rcode = reverse_condition_maybe_unordered (code);
6095       if (can_compare_p (rcode, mode, ccp_store_flag)
6096           && (code == ORDERED || code == UNORDERED
6097               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6098               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6099         {
6100           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6101                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6102
6103           /* For the reverse comparison, use either an addition or a XOR.  */
6104           if (want_add
6105               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6106                            optimize_insn_for_speed_p ()) == 0)
6107             {
6108               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6109                                        STORE_FLAG_VALUE, target_mode);
6110               if (tem)
6111                 return expand_binop (target_mode, add_optab, tem,
6112                                      gen_int_mode (normalizep, target_mode),
6113                                      target, 0, OPTAB_WIDEN);
6114             }
6115           else if (!want_add
6116                    && rtx_cost (trueval, mode, XOR, 1,
6117                                 optimize_insn_for_speed_p ()) == 0)
6118             {
6119               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6120                                        normalizep, target_mode);
6121               if (tem)
6122                 return expand_binop (target_mode, xor_optab, tem, trueval,
6123                                      target, INTVAL (trueval) >= 0,
6124                                      OPTAB_WIDEN);
6125             }
6126         }
6127
6128       delete_insns_since (last);
6129
6130       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6131       if (code == ORDERED || code == UNORDERED)
6132         return 0;
6133
6134       and_them = split_comparison (code, mode, &first_code, &code);
6135
6136       /* If there are no NaNs, the first comparison should always fall through.
6137          Effectively change the comparison to the other one.  */
6138       if (!HONOR_NANS (mode))
6139         {
6140           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6141           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6142                                     target_mode);
6143         }
6144
6145       if (!HAVE_conditional_move)
6146         return 0;
6147
6148       /* Do not turn a trapping comparison into a non-trapping one.  */
6149       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6150           && flag_trapping_math)
6151         return 0;
6152
6153       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6154          conditional move.  */
6155       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6156                                normalizep, target_mode);
6157       if (tem == 0)
6158         return 0;
6159
6160       if (and_them)
6161         tem = emit_conditional_move (target, { code, op0, op1, mode },
6162                                      tem, const0_rtx, GET_MODE (tem), 0);
6163       else
6164         tem = emit_conditional_move (target, { code, op0, op1, mode },
6165                                      trueval, tem, GET_MODE (tem), 0);
6166
6167       if (tem == 0)
6168         delete_insns_since (last);
6169       return tem;
6170     }
6171
6172   /* The remaining tricks only apply to integer comparisons.  */
6173
6174   scalar_int_mode int_mode;
6175   if (is_int_mode (mode, &int_mode))
6176     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6177                                 unsignedp, normalizep, trueval);
6178
6179   return 0;
6180 }
6181
6182 /* Like emit_store_flag, but always succeeds.  */
6183
6184 rtx
6185 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6186                        machine_mode mode, int unsignedp, int normalizep)
6187 {
6188   rtx tem;
6189   rtx_code_label *label;
6190   rtx trueval, falseval;
6191
6192   /* First see if emit_store_flag can do the job.  */
6193   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6194   if (tem != 0)
6195     return tem;
6196
6197   /* If one operand is constant, make it the second one.  Only do this
6198      if the other operand is not constant as well.  */
6199   if (swap_commutative_operands_p (op0, op1))
6200     {
6201       std::swap (op0, op1);
6202       code = swap_condition (code);
6203     }
6204
6205   if (mode == VOIDmode)
6206     mode = GET_MODE (op0);
6207
6208   if (!target)
6209     target = gen_reg_rtx (word_mode);
6210
6211   /* If this failed, we have to do this with set/compare/jump/set code.
6212      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6213   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6214   if (code == NE
6215       && GET_MODE_CLASS (mode) == MODE_INT
6216       && REG_P (target)
6217       && op0 == target
6218       && op1 == const0_rtx)
6219     {
6220       label = gen_label_rtx ();
6221       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6222                                NULL_RTX, NULL, label,
6223                                profile_probability::uninitialized ());
6224       emit_move_insn (target, trueval);
6225       emit_label (label);
6226       return target;
6227     }
6228
6229   if (!REG_P (target)
6230       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6231     target = gen_reg_rtx (GET_MODE (target));
6232
6233   /* Jump in the right direction if the target cannot implement CODE
6234      but can jump on its reverse condition.  */
6235   falseval = const0_rtx;
6236   if (! can_compare_p (code, mode, ccp_jump)
6237       && (! FLOAT_MODE_P (mode)
6238           || code == ORDERED || code == UNORDERED
6239           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6240           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6241     {
6242       enum rtx_code rcode;
6243       if (FLOAT_MODE_P (mode))
6244         rcode = reverse_condition_maybe_unordered (code);
6245       else
6246         rcode = reverse_condition (code);
6247
6248       /* Canonicalize to UNORDERED for the libcall.  */
6249       if (can_compare_p (rcode, mode, ccp_jump)
6250           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6251         {
6252           falseval = trueval;
6253           trueval = const0_rtx;
6254           code = rcode;
6255         }
6256     }
6257
6258   emit_move_insn (target, trueval);
6259   label = gen_label_rtx ();
6260   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6261                            label, profile_probability::uninitialized ());
6262
6263   emit_move_insn (target, falseval);
6264   emit_label (label);
6265
6266   return target;
6267 }
6268
6269 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6270    and exclusive ranges in order to create an equivalent comparison.  See
6271    canonicalize_cmp_for_target for the possible cases.  */
6272
6273 static enum rtx_code
6274 equivalent_cmp_code (enum rtx_code code)
6275 {
6276   switch (code)
6277     {
6278     case GT:
6279       return GE;
6280     case GE:
6281       return GT;
6282     case LT:
6283       return LE;
6284     case LE:
6285       return LT;
6286     case GTU:
6287       return GEU;
6288     case GEU:
6289       return GTU;
6290     case LTU:
6291       return LEU;
6292     case LEU:
6293       return LTU;
6294
6295     default:
6296       return code;
6297     }
6298 }
6299
6300 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6301    purpose of this is to end up with an immediate which can be loaded into a
6302    register in fewer moves, if possible.
6303
6304    For each integer comparison there exists an equivalent choice:
6305      i)   a >  b or a >= b + 1
6306      ii)  a <= b or a <  b + 1
6307      iii) a >= b or a >  b - 1
6308      iv)  a <  b or a <= b - 1
6309
6310    MODE is the mode of the first operand.
6311    CODE points to the comparison code.
6312    IMM points to the rtx containing the immediate.  *IMM must satisfy
6313    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6314    on exit.  */
6315
6316 void
6317 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6318 {
6319   if (!SCALAR_INT_MODE_P (mode))
6320     return;
6321
6322   int to_add = 0;
6323   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6324
6325   /* Extract the immediate value from the rtx.  */
6326   wide_int imm_val = rtx_mode_t (*imm, mode);
6327
6328   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6329     to_add = 1;
6330   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6331     to_add = -1;
6332   else
6333     return;
6334
6335   /* Check for overflow/underflow in the case of signed values and
6336      wrapping around in the case of unsigned values.  If any occur
6337      cancel the optimization.  */
6338   wi::overflow_type overflow = wi::OVF_NONE;
6339   wide_int imm_modif;
6340
6341   if (to_add == 1)
6342     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6343   else
6344     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6345
6346   if (overflow)
6347     return;
6348
6349   /* The following creates a pseudo; if we cannot do that, bail out.  */
6350   if (!can_create_pseudo_p ())
6351     return;
6352
6353   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6354   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6355
6356   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6357   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6358
6359   /* Update the immediate and the code.  */
6360   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6361     {
6362       *code = equivalent_cmp_code (*code);
6363       *imm = new_imm;
6364     }
6365 }
6366
6367
6368 \f
6369 /* Perform possibly multi-word comparison and conditional jump to LABEL
6370    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6371    now a thin wrapper around do_compare_rtx_and_jump.  */
6372
6373 static void
6374 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6375                  rtx_code_label *label)
6376 {
6377   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6378   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6379                            NULL, label, profile_probability::uninitialized ());
6380 }