gcc/expmed.cc

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2024 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46 #include "recog.h"
  47
  48 struct target_expmed default_target_expmed;
  49 #if SWITCHABLE_TARGET
  50 struct target_expmed *this_target_expmed = &default_target_expmed;
  51 #endif
  52
  53 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  54                                       unsigned HOST_WIDE_INT,
  55                                       unsigned HOST_WIDE_INT,
  56                                       poly_uint64, poly_uint64,
  57                                       machine_mode, rtx, bool, bool);
  58 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  59                                    unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    poly_uint64, poly_uint64,
  62                                    rtx, scalar_int_mode, bool);
  63 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  64                                      unsigned HOST_WIDE_INT,
  65                                      unsigned HOST_WIDE_INT,
  66                                      rtx, scalar_int_mode, bool);
  67 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  68                                    unsigned HOST_WIDE_INT,
  69                                    unsigned HOST_WIDE_INT,
  70                                    poly_uint64, poly_uint64,
  71                                    rtx, scalar_int_mode, bool);
  72 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  73                                        unsigned HOST_WIDE_INT,
  74                                        unsigned HOST_WIDE_INT, int, rtx,
  75                                        machine_mode, machine_mode, bool, bool);
  76 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  77                                     unsigned HOST_WIDE_INT,
  78                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  79 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  80                                       unsigned HOST_WIDE_INT,
  81                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  82 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  83 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  84                                     unsigned HOST_WIDE_INT,
  85                                     unsigned HOST_WIDE_INT, int, bool);
  86 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  87 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  89
  90 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  91    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  92    The mask is truncated if necessary to the width of mode MODE.  The
  93    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  94
  95 static inline rtx
  96 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  97 {
  98   return immed_wide_int_const
  99     (wi::shifted_mask (bitpos, bitsize, complement,
 100                        GET_MODE_PRECISION (mode)), mode);
 101 }
 102
 103 /* Test whether a value is zero of a power of two.  */
 104 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 105   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 106
 107 struct init_expmed_rtl
 108 {
 109   rtx reg;
 110   rtx plus;
 111   rtx neg;
 112   rtx mult;
 113   rtx sdiv;
 114   rtx udiv;
 115   rtx sdiv_32;
 116   rtx smod_32;
 117   rtx wide_mult;
 118   rtx wide_lshr;
 119   rtx wide_trunc;
 120   rtx shift;
 121   rtx shift_mult;
 122   rtx shift_add;
 123   rtx shift_sub0;
 124   rtx shift_sub1;
 125   rtx zext;
 126   rtx trunc;
 127
 128   rtx pow2[MAX_BITS_PER_WORD];
 129   rtx cint[MAX_BITS_PER_WORD];
 130 };
 131
 132 static void
 133 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 134                       scalar_int_mode from_mode, bool speed)
 135 {
 136   int to_size, from_size;
 137   rtx which;
 138
 139   to_size = GET_MODE_PRECISION (to_mode);
 140   from_size = GET_MODE_PRECISION (from_mode);
 141
 142   /* Most partial integers have a precision less than the "full"
 143      integer it requires for storage.  In case one doesn't, for
 144      comparison purposes here, reduce the bit size by one in that
 145      case.  */
 146   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 147       && pow2p_hwi (to_size))
 148     to_size --;
 149   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 150       && pow2p_hwi (from_size))
 151     from_size --;
 152
 153   /* Assume cost of zero-extend and sign-extend is the same.  */
 154   which = (to_size < from_size ? all->trunc : all->zext);
 155
 156   PUT_MODE (all->reg, from_mode);
 157   set_convert_cost (to_mode, from_mode, speed,
 158                     set_src_cost (which, to_mode, speed));
 159   /* Restore all->reg's mode.  */
 160   PUT_MODE (all->reg, to_mode);
 161 }
 162
 163 static void
 164 init_expmed_one_mode (struct init_expmed_rtl *all,
 165                       machine_mode mode, int speed)
 166 {
 167   int m, n, mode_bitsize;
 168   machine_mode mode_from;
 169
 170   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 171
 172   PUT_MODE (all->reg, mode);
 173   PUT_MODE (all->plus, mode);
 174   PUT_MODE (all->neg, mode);
 175   PUT_MODE (all->mult, mode);
 176   PUT_MODE (all->sdiv, mode);
 177   PUT_MODE (all->udiv, mode);
 178   PUT_MODE (all->sdiv_32, mode);
 179   PUT_MODE (all->smod_32, mode);
 180   PUT_MODE (all->wide_trunc, mode);
 181   PUT_MODE (all->shift, mode);
 182   PUT_MODE (all->shift_mult, mode);
 183   PUT_MODE (all->shift_add, mode);
 184   PUT_MODE (all->shift_sub0, mode);
 185   PUT_MODE (all->shift_sub1, mode);
 186   PUT_MODE (all->zext, mode);
 187   PUT_MODE (all->trunc, mode);
 188
 189   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 190   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 191   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 192   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 193   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 194
 195   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 196                                      <= 2 * add_cost (speed, mode)));
 197   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 198                                      <= 4 * add_cost (speed, mode)));
 199
 200   set_shift_cost (speed, mode, 0, 0);
 201   {
 202     int cost = add_cost (speed, mode);
 203     set_shiftadd_cost (speed, mode, 0, cost);
 204     set_shiftsub0_cost (speed, mode, 0, cost);
 205     set_shiftsub1_cost (speed, mode, 0, cost);
 206   }
 207
 208   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 209   for (m = 1; m < n; m++)
 210     {
 211       XEXP (all->shift, 1) = all->cint[m];
 212       XEXP (all->shift_mult, 1) = all->pow2[m];
 213
 214       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 215       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 216                                                        speed));
 217       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 218                                                         speed));
 219       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 220                                                         speed));
 221     }
 222
 223   scalar_int_mode int_mode_to;
 224   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 225     {
 226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 227            mode_from = (machine_mode)(mode_from + 1))
 228         init_expmed_one_conv (all, int_mode_to,
 229                               as_a <scalar_int_mode> (mode_from), speed);
 230
 231       scalar_int_mode wider_mode;
 232       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 233           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 234         {
 235           PUT_MODE (all->reg, mode);
 236           PUT_MODE (all->zext, wider_mode);
 237           PUT_MODE (all->wide_mult, wider_mode);
 238           PUT_MODE (all->wide_lshr, wider_mode);
 239           XEXP (all->wide_lshr, 1)
 240             = gen_int_shift_amount (wider_mode, mode_bitsize);
 241
 242           set_mul_widen_cost (speed, wider_mode,
 243                               set_src_cost (all->wide_mult, wider_mode, speed));
 244           set_mul_highpart_cost (speed, int_mode_to,
 245                                  set_src_cost (all->wide_trunc,
 246                                                int_mode_to, speed));
 247         }
 248     }
 249 }
 250
 251 void
 252 init_expmed (void)
 253 {
 254   struct init_expmed_rtl all;
 255   machine_mode mode = QImode;
 256   int m, speed;
 257
 258   memset (&all, 0, sizeof all);
 259   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 260     {
 261       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 262       all.cint[m] = GEN_INT (m);
 263     }
 264
 265   /* Avoid using hard regs in ways which may be unsupported.  */
 266   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 267   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 268   all.neg = gen_rtx_NEG (mode, all.reg);
 269   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 270   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 271   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 272   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 273   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 274   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 275   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 276   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 277   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 278   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 279   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 280   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 282   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 283   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 284
 285   for (speed = 0; speed < 2; speed++)
 286     {
 287       crtl->maybe_hot_insn_p = speed;
 288       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 289
 290       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 291            mode = (machine_mode)(mode + 1))
 292         init_expmed_one_mode (&all, mode, speed);
 293
 294       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 295         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 296              mode = (machine_mode)(mode + 1))
 297           init_expmed_one_mode (&all, mode, speed);
 298
 299       if (MIN_MODE_VECTOR_INT != VOIDmode)
 300         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 301              mode = (machine_mode)(mode + 1))
 302           init_expmed_one_mode (&all, mode, speed);
 303     }
 304
 305   if (alg_hash_used_p ())
 306     {
 307       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 308       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 309     }
 310   else
 311     set_alg_hash_used_p (true);
 312   default_rtl_profile ();
 313
 314   ggc_free (all.trunc);
 315   ggc_free (all.shift_sub1);
 316   ggc_free (all.shift_sub0);
 317   ggc_free (all.shift_add);
 318   ggc_free (all.shift_mult);
 319   ggc_free (all.shift);
 320   ggc_free (all.wide_trunc);
 321   ggc_free (all.wide_lshr);
 322   ggc_free (all.wide_mult);
 323   ggc_free (all.zext);
 324   ggc_free (all.smod_32);
 325   ggc_free (all.sdiv_32);
 326   ggc_free (all.udiv);
 327   ggc_free (all.sdiv);
 328   ggc_free (all.mult);
 329   ggc_free (all.neg);
 330   ggc_free (all.plus);
 331   ggc_free (all.reg);
 332 }
 333
 334 /* Return an rtx representing minus the value of X.
 335    MODE is the intended mode of the result,
 336    useful if X is a CONST_INT.  */
 337
 338 rtx
 339 negate_rtx (machine_mode mode, rtx x)
 340 {
 341   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 342
 343   if (result == 0)
 344     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 345
 346   return result;
 347 }
 348
 349 /* Whether reverse storage order is supported on the target.  */
 350 static int reverse_storage_order_supported = -1;
 351
 352 /* Check whether reverse storage order is supported on the target.  */
 353
 354 static void
 355 check_reverse_storage_order_support (void)
 356 {
 357   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 358     {
 359       reverse_storage_order_supported = 0;
 360       sorry ("reverse scalar storage order");
 361     }
 362   else
 363     reverse_storage_order_supported = 1;
 364 }
 365
 366 /* Whether reverse FP storage order is supported on the target.  */
 367 static int reverse_float_storage_order_supported = -1;
 368
 369 /* Check whether reverse FP storage order is supported on the target.  */
 370
 371 static void
 372 check_reverse_float_storage_order_support (void)
 373 {
 374   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 375     {
 376       reverse_float_storage_order_supported = 0;
 377       sorry ("reverse floating-point scalar storage order");
 378     }
 379   else
 380     reverse_float_storage_order_supported = 1;
 381 }
 382
 383 /* Return an rtx representing value of X with reverse storage order.
 384    MODE is the intended mode of the result,
 385    useful if X is a CONST_INT.  */
 386
 387 rtx
 388 flip_storage_order (machine_mode mode, rtx x)
 389 {
 390   scalar_int_mode int_mode;
 391   rtx result;
 392
 393   if (mode == QImode)
 394     return x;
 395
 396   if (COMPLEX_MODE_P (mode))
 397     {
 398       rtx real = read_complex_part (x, false);
 399       rtx imag = read_complex_part (x, true);
 400
 401       real = flip_storage_order (GET_MODE_INNER (mode), real);
 402       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 403
 404       return gen_rtx_CONCAT (mode, real, imag);
 405     }
 406
 407   if (UNLIKELY (reverse_storage_order_supported < 0))
 408     check_reverse_storage_order_support ();
 409
 410   if (!is_a <scalar_int_mode> (mode, &int_mode))
 411     {
 412       if (FLOAT_MODE_P (mode)
 413           && UNLIKELY (reverse_float_storage_order_supported < 0))
 414         check_reverse_float_storage_order_support ();
 415
 416       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 417           || !targetm.scalar_mode_supported_p (int_mode))
 418         {
 419           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 420           return x;
 421         }
 422       x = gen_lowpart (int_mode, x);
 423     }
 424
 425   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 426   if (result == 0)
 427     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 428
 429   if (int_mode != mode)
 430     result = gen_lowpart (mode, result);
 431
 432   return result;
 433 }
 434
 435 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 436    first unit of mode MODE that contains a bitfield of size BITSIZE at
 437    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 438    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 439    of the field within the new memory.  */
 440
 441 static rtx
 442 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 443                       unsigned HOST_WIDE_INT bitsize,
 444                       unsigned HOST_WIDE_INT bitnum,
 445                       unsigned HOST_WIDE_INT *new_bitnum)
 446 {
 447   scalar_int_mode imode;
 448   if (mode.exists (&imode))
 449     {
 450       unsigned int unit = GET_MODE_BITSIZE (imode);
 451       *new_bitnum = bitnum % unit;
 452       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 453       return adjust_bitfield_address (mem, imode, offset);
 454     }
 455   else
 456     {
 457       *new_bitnum = bitnum % BITS_PER_UNIT;
 458       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 459       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 460                             / BITS_PER_UNIT);
 461       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 462     }
 463 }
 464
 465 /* The caller wants to perform insertion or extraction PATTERN on a
 466    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 467    BITREGION_START and BITREGION_END are as for store_bit_field
 468    and FIELDMODE is the natural mode of the field.
 469
 470    Search for a mode that is compatible with the memory access
 471    restrictions and (where applicable) with a register insertion or
 472    extraction.  Return the new memory on success, storing the adjusted
 473    bit position in *NEW_BITNUM.  Return null otherwise.  */
 474
 475 static rtx
 476 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 477                               rtx op0, HOST_WIDE_INT bitsize,
 478                               HOST_WIDE_INT bitnum,
 479                               poly_uint64 bitregion_start,
 480                               poly_uint64 bitregion_end,
 481                               machine_mode fieldmode,
 482                               unsigned HOST_WIDE_INT *new_bitnum)
 483 {
 484   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 485                                 bitregion_end, MEM_ALIGN (op0),
 486                                 MEM_VOLATILE_P (op0));
 487   scalar_int_mode best_mode;
 488   if (iter.next_mode (&best_mode))
 489     {
 490       /* We can use a memory in BEST_MODE.  See whether this is true for
 491          any wider modes.  All other things being equal, we prefer to
 492          use the widest mode possible because it tends to expose more
 493          CSE opportunities.  */
 494       if (!iter.prefer_smaller_modes ())
 495         {
 496           /* Limit the search to the mode required by the corresponding
 497              register insertion or extraction instruction, if any.  */
 498           scalar_int_mode limit_mode = word_mode;
 499           extraction_insn insn;
 500           if (get_best_reg_extraction_insn (&insn, pattern,
 501                                             GET_MODE_BITSIZE (best_mode),
 502                                             fieldmode))
 503             limit_mode = insn.field_mode;
 504
 505           scalar_int_mode wider_mode;
 506           while (iter.next_mode (&wider_mode)
 507                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 508             best_mode = wider_mode;
 509         }
 510       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 511                                    new_bitnum);
 512     }
 513   return NULL_RTX;
 514 }
 515
 516 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 517    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 518    offset is then BITNUM / BITS_PER_UNIT.  */
 519
 520 static bool
 521 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 522                      machine_mode struct_mode)
 523 {
 524   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 525   if (BYTES_BIG_ENDIAN)
 526     return (multiple_p (bitnum, BITS_PER_UNIT)
 527             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 528                 || multiple_p (bitnum + bitsize,
 529                                regsize * BITS_PER_UNIT)));
 530   else
 531     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 532 }
 533
 534 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 535    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 536    Return false if the access would touch memory outside the range
 537    BITREGION_START to BITREGION_END for conformance to the C++ memory
 538    model.  */
 539
 540 static bool
 541 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 542                             unsigned HOST_WIDE_INT bitnum,
 543                             scalar_int_mode fieldmode,
 544                             poly_uint64 bitregion_start,
 545                             poly_uint64 bitregion_end)
 546 {
 547   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 548
 549   /* -fstrict-volatile-bitfields must be enabled and we must have a
 550      volatile MEM.  */
 551   if (!MEM_P (op0)
 552       || !MEM_VOLATILE_P (op0)
 553       || flag_strict_volatile_bitfields <= 0)
 554     return false;
 555
 556   /* The bit size must not be larger than the field mode, and
 557      the field mode must not be larger than a word.  */
 558   if (bitsize > modesize || modesize > BITS_PER_WORD)
 559     return false;
 560
 561   /* Check for cases of unaligned fields that must be split.  */
 562   if (bitnum % modesize + bitsize > modesize)
 563     return false;
 564
 565   /* The memory must be sufficiently aligned for a MODESIZE access.
 566      This condition guarantees, that the memory access will not
 567      touch anything after the end of the structure.  */
 568   if (MEM_ALIGN (op0) < modesize)
 569     return false;
 570
 571   /* Check for cases where the C++ memory model applies.  */
 572   if (maybe_ne (bitregion_end, 0U)
 573       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 574           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 575                        bitregion_end)))
 576     return false;
 577
 578   return true;
 579 }
 580
 581 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 582    bit number BITNUM can be treated as a simple value of mode MODE.
 583    Store the byte offset in *BYTENUM if so.  */
 584
 585 static bool
 586 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 587                        machine_mode mode, poly_uint64 *bytenum)
 588 {
 589   return (MEM_P (op0)
 590           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 591           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 592           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 593               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 594                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 595 }
 596 \f
 597 /* Try to use instruction INSV to store VALUE into a field of OP0.
 598    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 599    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 600    are as for store_bit_field.  */
 601
 602 static bool
 603 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 604                             opt_scalar_int_mode op0_mode,
 605                             unsigned HOST_WIDE_INT bitsize,
 606                             unsigned HOST_WIDE_INT bitnum,
 607                             rtx value, scalar_int_mode value_mode)
 608 {
 609   class expand_operand ops[4];
 610   rtx value1;
 611   rtx xop0 = op0;
 612   rtx_insn *last = get_last_insn ();
 613   bool copy_back = false;
 614
 615   scalar_int_mode op_mode = insv->field_mode;
 616   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 617   if (bitsize == 0 || bitsize > unit)
 618     return false;
 619
 620   if (MEM_P (xop0))
 621     /* Get a reference to the first byte of the field.  */
 622     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 623                                  &bitnum);
 624   else
 625     {
 626       /* Convert from counting within OP0 to counting in OP_MODE.  */
 627       if (BYTES_BIG_ENDIAN)
 628         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 629
 630       /* If xop0 is a register, we need it in OP_MODE
 631          to make it acceptable to the format of insv.  */
 632       if (GET_CODE (xop0) == SUBREG)
 633         {
 634           /* If such a SUBREG can't be created, give up.  */
 635           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 636                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 637             return false;
 638           /* We can't just change the mode, because this might clobber op0,
 639              and we will need the original value of op0 if insv fails.  */
 640           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 641                                  SUBREG_BYTE (xop0));
 642         }
 643       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 644         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 645     }
 646
 647   /* If the destination is a paradoxical subreg such that we need a
 648      truncate to the inner mode, perform the insertion on a temporary and
 649      truncate the result to the original destination.  Note that we can't
 650      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 651      X) 0)) is (reg:N X).  */
 652   if (GET_CODE (xop0) == SUBREG
 653       && REG_P (SUBREG_REG (xop0))
 654       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 655                                          op_mode))
 656     {
 657       rtx tem = gen_reg_rtx (op_mode);
 658       emit_move_insn (tem, xop0);
 659       xop0 = tem;
 660       copy_back = true;
 661     }
 662
 663   /* There are similar overflow check at the start of store_bit_field_1,
 664      but that only check the situation where the field lies completely
 665      outside the register, while there do have situation where the field
 666      lies partialy in the register, we need to adjust bitsize for this
 667      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 668      will broken on those arch support bit insert instruction, like arm, aarch64
 669      etc.  */
 670   if (bitsize + bitnum > unit && bitnum < unit)
 671     {
 672       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 673                "destination object, data truncated into %wu-bit",
 674                bitsize, unit - bitnum);
 675       bitsize = unit - bitnum;
 676     }
 677
 678   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 679      "backwards" from the size of the unit we are inserting into.
 680      Otherwise, we count bits from the most significant on a
 681      BYTES/BITS_BIG_ENDIAN machine.  */
 682
 683   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 684     bitnum = unit - bitsize - bitnum;
 685
 686   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 687   value1 = value;
 688   if (value_mode != op_mode)
 689     {
 690       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 691         {
 692           rtx tmp;
 693           /* Optimization: Don't bother really extending VALUE
 694              if it has all the bits we will actually use.  However,
 695              if we must narrow it, be sure we do it correctly.  */
 696
 697           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 698             {
 699               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 700               if (! tmp)
 701                 tmp = simplify_gen_subreg (op_mode,
 702                                            force_reg (value_mode, value1),
 703                                            value_mode, 0);
 704             }
 705           else
 706             {
 707               tmp = gen_lowpart_if_possible (op_mode, value1);
 708               if (! tmp)
 709                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 710             }
 711           value1 = tmp;
 712         }
 713       else if (CONST_INT_P (value))
 714         value1 = gen_int_mode (INTVAL (value), op_mode);
 715       else
 716         /* Parse phase is supposed to make VALUE's data type
 717            match that of the component reference, which is a type
 718            at least as wide as the field; so VALUE should have
 719            a mode that corresponds to that type.  */
 720         gcc_assert (CONSTANT_P (value));
 721     }
 722
 723   create_fixed_operand (&ops[0], xop0);
 724   create_integer_operand (&ops[1], bitsize);
 725   create_integer_operand (&ops[2], bitnum);
 726   create_input_operand (&ops[3], value1, op_mode);
 727   if (maybe_expand_insn (insv->icode, 4, ops))
 728     {
 729       if (copy_back)
 730         convert_move (op0, xop0, true);
 731       return true;
 732     }
 733   delete_insns_since (last);
 734   return false;
 735 }
 736
 737 /* A subroutine of store_bit_field, with the same arguments.  Return true
 738    if the operation could be implemented.
 739
 740    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 741    no other way of implementing the operation.  If FALLBACK_P is false,
 742    return false instead.
 743
 744    if UNDEFINED_P is true then STR_RTX is undefined and may be set using
 745    a subreg instead.  */
 746
 747 static bool
 748 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 749                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 750                    machine_mode fieldmode,
 751                    rtx value, bool reverse, bool fallback_p, bool undefined_p)
 752 {
 753   rtx op0 = str_rtx;
 754
 755   while (GET_CODE (op0) == SUBREG)
 756     {
 757       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 758       op0 = SUBREG_REG (op0);
 759     }
 760
 761   /* No action is needed if the target is a register and if the field
 762      lies completely outside that register.  This can occur if the source
 763      code contains an out-of-bounds access to a small array.  */
 764   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 765     return true;
 766
 767   /* Use vec_set patterns for inserting parts of vectors whenever
 768      available.  */
 769   machine_mode outermode = GET_MODE (op0);
 770   scalar_mode innermode = GET_MODE_INNER (outermode);
 771   poly_uint64 pos;
 772   if (VECTOR_MODE_P (outermode)
 773       && !MEM_P (op0)
 774       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 775       && fieldmode == innermode
 776       && known_eq (bitsize, GET_MODE_PRECISION (innermode))
 777       && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
 778     {
 779       class expand_operand ops[3];
 780       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 781
 782       create_fixed_operand (&ops[0], op0);
 783       create_input_operand (&ops[1], value, innermode);
 784       create_integer_operand (&ops[2], pos);
 785       if (maybe_expand_insn (icode, 3, ops))
 786         return true;
 787     }
 788
 789   /* If the target is a register, overwriting the entire object, or storing
 790      a full-word or multi-word field can be done with just a SUBREG.  */
 791   if (!MEM_P (op0)
 792       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 793     {
 794       /* Use the subreg machinery either to narrow OP0 to the required
 795          words or to cope with mode punning between equal-sized modes.
 796          In the latter case, use subreg on the rhs side, not lhs.  */
 797       rtx sub;
 798       poly_uint64 bytenum;
 799       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 800       if (known_eq (bitnum, 0U)
 801           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 802         {
 803           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 804           if (sub)
 805             {
 806               if (reverse)
 807                 sub = flip_storage_order (GET_MODE (op0), sub);
 808               emit_move_insn (op0, sub);
 809               return true;
 810             }
 811         }
 812       else if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
 813                && (undefined_p
 814                    || (multiple_p (bitnum, regsize * BITS_PER_UNIT)
 815                        && multiple_p (bitsize, regsize * BITS_PER_UNIT)))
 816                && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
 817         {
 818           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), bytenum);
 819           if (sub)
 820             {
 821               if (reverse)
 822                 value = flip_storage_order (fieldmode, value);
 823               emit_move_insn (sub, value);
 824               return true;
 825             }
 826         }
 827     }
 828
 829   /* If the target is memory, storing any naturally aligned field can be
 830      done with a simple store.  For targets that support fast unaligned
 831      memory, any naturally sized, unit aligned field can be done directly.  */
 832   poly_uint64 bytenum;
 833   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 834     {
 835       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 836       if (reverse)
 837         value = flip_storage_order (fieldmode, value);
 838       emit_move_insn (op0, value);
 839       return true;
 840     }
 841
 842   /* It's possible we'll need to handle other cases here for
 843      polynomial bitnum and bitsize.  */
 844
 845   /* From here on we need to be looking at a fixed-size insertion.  */
 846   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 847   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 848
 849   /* Make sure we are playing with integral modes.  Pun with subregs
 850      if we aren't.  This must come after the entire register case above,
 851      since that case is valid for any mode.  The following cases are only
 852      valid for integral modes.  */
 853   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 854   scalar_int_mode imode;
 855   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 856     {
 857       if (MEM_P (op0))
 858         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 859                                             0, MEM_SIZE (op0));
 860       else if (!op0_mode.exists ())
 861         {
 862           if (ibitnum == 0
 863               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 864               && MEM_P (value)
 865               && !reverse)
 866             {
 867               value = adjust_address (value, GET_MODE (op0), 0);
 868               emit_move_insn (op0, value);
 869               return true;
 870             }
 871           if (!fallback_p)
 872             return false;
 873           rtx temp = assign_stack_temp (GET_MODE (op0),
 874                                         GET_MODE_SIZE (GET_MODE (op0)));
 875           emit_move_insn (temp, op0);
 876           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 877                              reverse, fallback_p, undefined_p);
 878           emit_move_insn (op0, temp);
 879           return true;
 880         }
 881       else
 882         op0 = gen_lowpart (op0_mode.require (), op0);
 883     }
 884
 885   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 886                                    bitregion_start, bitregion_end,
 887                                    fieldmode, value, reverse, fallback_p);
 888 }
 889
 890 /* Subroutine of store_bit_field_1, with the same arguments, except
 891    that BITSIZE and BITNUM are constant.  Handle cases specific to
 892    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 893    otherwise OP0 is a BLKmode MEM.  */
 894
 895 static bool
 896 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 897                           unsigned HOST_WIDE_INT bitsize,
 898                           unsigned HOST_WIDE_INT bitnum,
 899                           poly_uint64 bitregion_start,
 900                           poly_uint64 bitregion_end,
 901                           machine_mode fieldmode,
 902                           rtx value, bool reverse, bool fallback_p)
 903 {
 904   /* Storing an lsb-aligned field in a register
 905      can be done with a movstrict instruction.  */
 906
 907   if (!MEM_P (op0)
 908       && !reverse
 909       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 910       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 911       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 912     {
 913       class expand_operand ops[2];
 914       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 915       rtx arg0 = op0;
 916       unsigned HOST_WIDE_INT subreg_off;
 917
 918       if (GET_CODE (arg0) == SUBREG)
 919         {
 920           /* Else we've got some float mode source being extracted into
 921              a different float mode destination -- this combination of
 922              subregs results in Severe Tire Damage.  */
 923           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 924                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 925                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 926           arg0 = SUBREG_REG (arg0);
 927         }
 928
 929       subreg_off = bitnum / BITS_PER_UNIT;
 930       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
 931           /* STRICT_LOW_PART must have a non-paradoxical subreg as
 932              operand.  */
 933           && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
 934         {
 935           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 936
 937           create_fixed_operand (&ops[0], arg0);
 938           /* Shrink the source operand to FIELDMODE.  */
 939           create_convert_operand_to (&ops[1], value, fieldmode, false);
 940           if (maybe_expand_insn (icode, 2, ops))
 941             return true;
 942         }
 943     }
 944
 945   /* Handle fields bigger than a word.  */
 946
 947   if (bitsize > BITS_PER_WORD)
 948     {
 949       /* Here we transfer the words of the field
 950          in the order least significant first.
 951          This is because the most significant word is the one which may
 952          be less than full.
 953          However, only do that if the value is not BLKmode.  */
 954
 955       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 956       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 957       rtx_insn *last;
 958
 959       /* This is the mode we must force value to, so that there will be enough
 960          subwords to extract.  Note that fieldmode will often (always?) be
 961          VOIDmode, because that is what store_field uses to indicate that this
 962          is a bit field, but passing VOIDmode to operand_subword_force
 963          is not allowed.
 964
 965          The mode must be fixed-size, since insertions into variable-sized
 966          objects are meant to be handled before calling this function.  */
 967       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 968       if (value_mode == VOIDmode)
 969         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 970
 971       last = get_last_insn ();
 972       for (int i = 0; i < nwords; i++)
 973         {
 974           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 975              except maybe for the last iteration.  */
 976           const unsigned HOST_WIDE_INT new_bitsize
 977             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 978           /* Bit offset from the starting bit number in the target.  */
 979           const unsigned int bit_offset
 980             = backwards ^ reverse
 981               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 982               : i * BITS_PER_WORD;
 983           /* Starting word number in the value.  */
 984           const unsigned int wordnum
 985             = backwards
 986               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 987               : i;
 988           /* The chunk of the value in word_mode.  We use bit-field extraction
 989               in BLKmode to handle unaligned memory references and to shift the
 990               last chunk right on big-endian machines if need be.  */
 991           rtx value_word
 992             = fieldmode == BLKmode
 993               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 994                                    1, NULL_RTX, word_mode, word_mode, false,
 995                                    NULL)
 996               : operand_subword_force (value, wordnum, value_mode);
 997
 998           if (!store_bit_field_1 (op0, new_bitsize,
 999                                   bitnum + bit_offset,
1000                                   bitregion_start, bitregion_end,
1001                                   word_mode,
1002                                   value_word, reverse, fallback_p, false))
1003             {
1004               delete_insns_since (last);
1005               return false;
1006             }
1007         }
1008       return true;
1009     }
1010
1011   /* If VALUE has a floating-point or complex mode, access it as an
1012      integer of the corresponding size.  This can occur on a machine
1013      with 64 bit registers that uses SFmode for float.  It can also
1014      occur for unaligned float or complex fields.  */
1015   rtx orig_value = value;
1016   scalar_int_mode value_mode;
1017   if (GET_MODE (value) == VOIDmode)
1018     /* By this point we've dealt with values that are bigger than a word,
1019        so word_mode is a conservatively correct choice.  */
1020     value_mode = word_mode;
1021   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1022     {
1023       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1024       value = gen_reg_rtx (value_mode);
1025       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1026     }
1027
1028   /* If OP0 is a multi-word register, narrow it to the affected word.
1029      If the region spans two words, defer to store_split_bit_field.
1030      Don't do this if op0 is a single hard register wider than word
1031      such as a float or vector register.  */
1032   if (!MEM_P (op0)
1033       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1034       && (!REG_P (op0)
1035           || !HARD_REGISTER_P (op0)
1036           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1037     {
1038       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1039         {
1040           if (!fallback_p)
1041             return false;
1042
1043           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1044                                  bitregion_start, bitregion_end,
1045                                  value, value_mode, reverse);
1046           return true;
1047         }
1048       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1049                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1050       gcc_assert (op0);
1051       op0_mode = word_mode;
1052       bitnum %= BITS_PER_WORD;
1053     }
1054
1055   /* From here on we can assume that the field to be stored in fits
1056      within a word.  If the destination is a register, it too fits
1057      in a word.  */
1058
1059   extraction_insn insv;
1060   if (!MEM_P (op0)
1061       && !reverse
1062       && get_best_reg_extraction_insn (&insv, EP_insv,
1063                                        GET_MODE_BITSIZE (op0_mode.require ()),
1064                                        fieldmode)
1065       && store_bit_field_using_insv (&insv, op0, op0_mode,
1066                                      bitsize, bitnum, value, value_mode))
1067     return true;
1068
1069   /* If OP0 is a memory, try copying it to a register and seeing if a
1070      cheap register alternative is available.  */
1071   if (MEM_P (op0) && !reverse)
1072     {
1073       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1074                                         fieldmode)
1075           && store_bit_field_using_insv (&insv, op0, op0_mode,
1076                                          bitsize, bitnum, value, value_mode))
1077         return true;
1078
1079       rtx_insn *last = get_last_insn ();
1080
1081       /* Try loading part of OP0 into a register, inserting the bitfield
1082          into that, and then copying the result back to OP0.  */
1083       unsigned HOST_WIDE_INT bitpos;
1084       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1085                                                bitregion_start, bitregion_end,
1086                                                fieldmode, &bitpos);
1087       if (xop0)
1088         {
1089           rtx tempreg = copy_to_reg (xop0);
1090           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1091                                  bitregion_start, bitregion_end,
1092                                  fieldmode, orig_value, reverse, false, false))
1093             {
1094               emit_move_insn (xop0, tempreg);
1095               return true;
1096             }
1097           delete_insns_since (last);
1098         }
1099     }
1100
1101   if (!fallback_p)
1102     return false;
1103
1104   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1105                          bitregion_end, value, value_mode, reverse);
1106   return true;
1107 }
1108
1109 /* Generate code to store value from rtx VALUE
1110    into a bit-field within structure STR_RTX
1111    containing BITSIZE bits starting at bit BITNUM.
1112
1113    BITREGION_START is bitpos of the first bitfield in this region.
1114    BITREGION_END is the bitpos of the ending bitfield in this region.
1115    These two fields are 0, if the C++ memory model does not apply,
1116    or we are not interested in keeping track of bitfield regions.
1117
1118    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1119
1120    If REVERSE is true, the store is to be done in reverse order.
1121
1122    If UNDEFINED_P is true then STR_RTX is currently undefined.  */
1123
1124 void
1125 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1126                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1127                  machine_mode fieldmode,
1128                  rtx value, bool reverse, bool undefined_p)
1129 {
1130   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1131   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1132   scalar_int_mode int_mode;
1133   if (bitsize.is_constant (&ibitsize)
1134       && bitnum.is_constant (&ibitnum)
1135       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1136       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1137                                      bitregion_start, bitregion_end))
1138     {
1139       /* Storing of a full word can be done with a simple store.
1140          We know here that the field can be accessed with one single
1141          instruction.  For targets that support unaligned memory,
1142          an unaligned access may be necessary.  */
1143       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1144         {
1145           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1146                                              ibitnum / BITS_PER_UNIT);
1147           if (reverse)
1148             value = flip_storage_order (int_mode, value);
1149           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1150           emit_move_insn (str_rtx, value);
1151         }
1152       else
1153         {
1154           rtx temp;
1155
1156           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1157                                           ibitnum, &ibitnum);
1158           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1159           temp = copy_to_reg (str_rtx);
1160           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1161                                   int_mode, value, reverse, true, undefined_p))
1162             gcc_unreachable ();
1163
1164           emit_move_insn (str_rtx, temp);
1165         }
1166
1167       return;
1168     }
1169
1170   /* Under the C++0x memory model, we must not touch bits outside the
1171      bit region.  Adjust the address to start at the beginning of the
1172      bit region.  */
1173   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1174     {
1175       scalar_int_mode best_mode;
1176       machine_mode addr_mode = VOIDmode;
1177
1178       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1179       bitnum -= bitregion_start;
1180       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1181       bitregion_end -= bitregion_start;
1182       bitregion_start = 0;
1183       if (bitsize.is_constant (&ibitsize)
1184           && bitnum.is_constant (&ibitnum)
1185           && get_best_mode (ibitsize, ibitnum,
1186                             bitregion_start, bitregion_end,
1187                             MEM_ALIGN (str_rtx), INT_MAX,
1188                             MEM_VOLATILE_P (str_rtx), &best_mode))
1189         addr_mode = best_mode;
1190       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1191                                               offset, size);
1192     }
1193
1194   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1195                           bitregion_start, bitregion_end,
1196                           fieldmode, value, reverse, true, undefined_p))
1197     gcc_unreachable ();
1198 }
1199 \f
1200 /* Use shifts and boolean operations to store VALUE into a bit field of
1201    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1202    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1203    the mode of VALUE.
1204
1205    If REVERSE is true, the store is to be done in reverse order.  */
1206
1207 static void
1208 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1209                        unsigned HOST_WIDE_INT bitsize,
1210                        unsigned HOST_WIDE_INT bitnum,
1211                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1212                        rtx value, scalar_int_mode value_mode, bool reverse)
1213 {
1214   /* There is a case not handled here:
1215      a structure with a known alignment of just a halfword
1216      and a field split across two aligned halfwords within the structure.
1217      Or likewise a structure with a known alignment of just a byte
1218      and a field split across two bytes.
1219      Such cases are not supposed to be able to occur.  */
1220
1221   scalar_int_mode best_mode;
1222   if (MEM_P (op0))
1223     {
1224       unsigned int max_bitsize = BITS_PER_WORD;
1225       scalar_int_mode imode;
1226       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1227         max_bitsize = GET_MODE_BITSIZE (imode);
1228
1229       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1230                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1231                           &best_mode))
1232         {
1233           /* The only way this should occur is if the field spans word
1234              boundaries.  */
1235           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1236                                  bitregion_start, bitregion_end,
1237                                  value, value_mode, reverse);
1238           return;
1239         }
1240
1241       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1242     }
1243   else
1244     best_mode = op0_mode.require ();
1245
1246   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1247                            value, value_mode, reverse);
1248 }
1249
1250 /* Helper function for store_fixed_bit_field, stores
1251    the bit field always using MODE, which is the mode of OP0.  The other
1252    arguments are as for store_fixed_bit_field.  */
1253
1254 static void
1255 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1256                          unsigned HOST_WIDE_INT bitsize,
1257                          unsigned HOST_WIDE_INT bitnum,
1258                          rtx value, scalar_int_mode value_mode, bool reverse)
1259 {
1260   rtx temp;
1261   int all_zero = 0;
1262   int all_one = 0;
1263
1264   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1265      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1266
1267   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1268     /* BITNUM is the distance between our msb
1269        and that of the containing datum.
1270        Convert it to the distance from the lsb.  */
1271     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1272
1273   /* Now BITNUM is always the distance between our lsb
1274      and that of OP0.  */
1275
1276   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1277      we must first convert its mode to MODE.  */
1278
1279   if (CONST_INT_P (value))
1280     {
1281       unsigned HOST_WIDE_INT v = UINTVAL (value);
1282
1283       if (bitsize < HOST_BITS_PER_WIDE_INT)
1284         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1285
1286       if (v == 0)
1287         all_zero = 1;
1288       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1289                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1290                || (bitsize == HOST_BITS_PER_WIDE_INT
1291                    && v == HOST_WIDE_INT_M1U))
1292         all_one = 1;
1293
1294       value = lshift_value (mode, v, bitnum);
1295     }
1296   else
1297     {
1298       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1299                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1300
1301       if (value_mode != mode)
1302         value = convert_to_mode (mode, value, 1);
1303
1304       if (must_and)
1305         value = expand_binop (mode, and_optab, value,
1306                               mask_rtx (mode, 0, bitsize, 0),
1307                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1308       if (bitnum > 0)
1309         value = expand_shift (LSHIFT_EXPR, mode, value,
1310                               bitnum, NULL_RTX, 1);
1311     }
1312
1313   if (reverse)
1314     value = flip_storage_order (mode, value);
1315
1316   /* Now clear the chosen bits in OP0,
1317      except that if VALUE is -1 we need not bother.  */
1318   /* We keep the intermediates in registers to allow CSE to combine
1319      consecutive bitfield assignments.  */
1320
1321   temp = force_reg (mode, op0);
1322
1323   if (! all_one)
1324     {
1325       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1326       if (reverse)
1327         mask = flip_storage_order (mode, mask);
1328       temp = expand_binop (mode, and_optab, temp, mask,
1329                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1330       temp = force_reg (mode, temp);
1331     }
1332
1333   /* Now logical-or VALUE into OP0, unless it is zero.  */
1334
1335   if (! all_zero)
1336     {
1337       temp = expand_binop (mode, ior_optab, temp, value,
1338                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1339       temp = force_reg (mode, temp);
1340     }
1341
1342   if (op0 != temp)
1343     {
1344       op0 = copy_rtx (op0);
1345       emit_move_insn (op0, temp);
1346     }
1347 }
1348 \f
1349 /* Store a bit field that is split across multiple accessible memory objects.
1350
1351    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1352    BITSIZE is the field width; BITPOS the position of its first bit
1353    (within the word).
1354    VALUE is the value to store, which has mode VALUE_MODE.
1355    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1356    a BLKmode MEM.
1357
1358    If REVERSE is true, the store is to be done in reverse order.
1359
1360    This does not yet handle fields wider than BITS_PER_WORD.  */
1361
1362 static void
1363 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1364                        unsigned HOST_WIDE_INT bitsize,
1365                        unsigned HOST_WIDE_INT bitpos,
1366                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1367                        rtx value, scalar_int_mode value_mode, bool reverse)
1368 {
1369   unsigned int unit, total_bits, bitsdone = 0;
1370
1371   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1372      much at a time.  */
1373   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1374     unit = BITS_PER_WORD;
1375   else
1376     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1377
1378   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1379      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1380      again, and we will mutually recurse forever.  */
1381   if (MEM_P (op0) && op0_mode.exists ())
1382     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1383
1384   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1385      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1386      that VALUE might be a floating-point constant.  */
1387   if (CONSTANT_P (value) && !CONST_INT_P (value))
1388     {
1389       rtx word = gen_lowpart_common (word_mode, value);
1390
1391       if (word && (value != word))
1392         value = word;
1393       else
1394         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1395       value_mode = word_mode;
1396     }
1397
1398   total_bits = GET_MODE_BITSIZE (value_mode);
1399
1400   while (bitsdone < bitsize)
1401     {
1402       unsigned HOST_WIDE_INT thissize;
1403       unsigned HOST_WIDE_INT thispos;
1404       unsigned HOST_WIDE_INT offset;
1405       rtx part;
1406
1407       offset = (bitpos + bitsdone) / unit;
1408       thispos = (bitpos + bitsdone) % unit;
1409
1410       /* When region of bytes we can touch is restricted, decrease
1411          UNIT close to the end of the region as needed.  If op0 is a REG
1412          or SUBREG of REG, don't do this, as there can't be data races
1413          on a register and we can expand shorter code in some cases.  */
1414       if (maybe_ne (bitregion_end, 0U)
1415           && unit > BITS_PER_UNIT
1416           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1417           && !REG_P (op0)
1418           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1419         {
1420           unit = unit / 2;
1421           continue;
1422         }
1423
1424       /* THISSIZE must not overrun a word boundary.  Otherwise,
1425          store_fixed_bit_field will call us again, and we will mutually
1426          recurse forever.  */
1427       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1428       thissize = MIN (thissize, unit - thispos);
1429
1430       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1431         {
1432           /* Fetch successively less significant portions.  */
1433           if (CONST_INT_P (value))
1434             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1435                              >> (bitsize - bitsdone - thissize))
1436                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1437           /* Likewise, but the source is little-endian.  */
1438           else if (reverse)
1439             part = extract_fixed_bit_field (word_mode, value, value_mode,
1440                                             thissize,
1441                                             bitsize - bitsdone - thissize,
1442                                             NULL_RTX, 1, false);
1443           else
1444             /* The args are chosen so that the last part includes the
1445                lsb.  Give extract_bit_field the value it needs (with
1446                endianness compensation) to fetch the piece we want.  */
1447             part = extract_fixed_bit_field (word_mode, value, value_mode,
1448                                             thissize,
1449                                             total_bits - bitsize + bitsdone,
1450                                             NULL_RTX, 1, false);
1451         }
1452       else
1453         {
1454           /* Fetch successively more significant portions.  */
1455           if (CONST_INT_P (value))
1456             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1457                              >> bitsdone)
1458                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1459           /* Likewise, but the source is big-endian.  */
1460           else if (reverse)
1461             part = extract_fixed_bit_field (word_mode, value, value_mode,
1462                                             thissize,
1463                                             total_bits - bitsdone - thissize,
1464                                             NULL_RTX, 1, false);
1465           else
1466             part = extract_fixed_bit_field (word_mode, value, value_mode,
1467                                             thissize, bitsdone, NULL_RTX,
1468                                             1, false);
1469         }
1470
1471       /* If OP0 is a register, then handle OFFSET here.  */
1472       rtx op0_piece = op0;
1473       opt_scalar_int_mode op0_piece_mode = op0_mode;
1474       if (SUBREG_P (op0) || REG_P (op0))
1475         {
1476           scalar_int_mode imode;
1477           if (op0_mode.exists (&imode)
1478               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1479             {
1480               if (offset)
1481                 op0_piece = const0_rtx;
1482             }
1483           else
1484             {
1485               op0_piece = operand_subword_force (op0,
1486                                                  offset * unit / BITS_PER_WORD,
1487                                                  GET_MODE (op0));
1488               op0_piece_mode = word_mode;
1489             }
1490           offset &= BITS_PER_WORD / unit - 1;
1491         }
1492
1493       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1494          it is just an out-of-bounds access.  Ignore it.  */
1495       if (op0_piece != const0_rtx)
1496         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1497                                offset * unit + thispos, bitregion_start,
1498                                bitregion_end, part, word_mode, reverse);
1499       bitsdone += thissize;
1500     }
1501 }
1502 \f
1503 /* A subroutine of extract_bit_field_1 that converts return value X
1504    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1505    to extract_bit_field.  */
1506
1507 static rtx
1508 convert_extracted_bit_field (rtx x, machine_mode mode,
1509                              machine_mode tmode, bool unsignedp)
1510 {
1511   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1512     return x;
1513
1514   /* If the x mode is not a scalar integral, first convert to the
1515      integer mode of that size and then access it as a floating-point
1516      value via a SUBREG.  */
1517   if (!SCALAR_INT_MODE_P (tmode))
1518     {
1519       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1520       x = convert_to_mode (int_mode, x, unsignedp);
1521       x = force_reg (int_mode, x);
1522       return gen_lowpart (tmode, x);
1523     }
1524
1525   return convert_to_mode (tmode, x, unsignedp);
1526 }
1527
1528 /* Try to use an ext(z)v pattern to extract a field from OP0.
1529    Return the extracted value on success, otherwise return null.
1530    EXTV describes the extraction instruction to use.  If OP0_MODE
1531    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1532    The other arguments are as for extract_bit_field.  */
1533
1534 static rtx
1535 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1536                               opt_scalar_int_mode op0_mode,
1537                               unsigned HOST_WIDE_INT bitsize,
1538                               unsigned HOST_WIDE_INT bitnum,
1539                               int unsignedp, rtx target,
1540                               machine_mode mode, machine_mode tmode)
1541 {
1542   class expand_operand ops[4];
1543   rtx spec_target = target;
1544   rtx spec_target_subreg = 0;
1545   scalar_int_mode ext_mode = extv->field_mode;
1546   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1547
1548   if (bitsize == 0 || unit < bitsize)
1549     return NULL_RTX;
1550
1551   if (MEM_P (op0))
1552     /* Get a reference to the first byte of the field.  */
1553     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1554                                 &bitnum);
1555   else
1556     {
1557       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1558       if (BYTES_BIG_ENDIAN)
1559         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1560
1561       /* If op0 is a register, we need it in EXT_MODE to make it
1562          acceptable to the format of ext(z)v.  */
1563       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1564         return NULL_RTX;
1565       if (REG_P (op0) && op0_mode.require () != ext_mode)
1566         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1567     }
1568
1569   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1570      "backwards" from the size of the unit we are extracting from.
1571      Otherwise, we count bits from the most significant on a
1572      BYTES/BITS_BIG_ENDIAN machine.  */
1573
1574   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1575     bitnum = unit - bitsize - bitnum;
1576
1577   if (target == 0)
1578     target = spec_target = gen_reg_rtx (tmode);
1579
1580   if (GET_MODE (target) != ext_mode)
1581     {
1582       rtx temp;
1583       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1584          between the mode of the extraction (word_mode) and the target
1585          mode.  Instead, create a temporary and use convert_move to set
1586          the target.  */
1587       if (REG_P (target)
1588           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1589           && (temp = gen_lowpart_if_possible (ext_mode, target)))
1590         {
1591           target = temp;
1592           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1593             spec_target_subreg = target;
1594         }
1595       else
1596         target = gen_reg_rtx (ext_mode);
1597     }
1598
1599   create_output_operand (&ops[0], target, ext_mode);
1600   create_fixed_operand (&ops[1], op0);
1601   create_integer_operand (&ops[2], bitsize);
1602   create_integer_operand (&ops[3], bitnum);
1603   if (maybe_expand_insn (extv->icode, 4, ops))
1604     {
1605       target = ops[0].value;
1606       if (target == spec_target)
1607         return target;
1608       if (target == spec_target_subreg)
1609         return spec_target;
1610       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1611     }
1612   return NULL_RTX;
1613 }
1614
1615 /* See whether it would be valid to extract the part of OP0 with
1616    mode OP0_MODE described by BITNUM and BITSIZE into a value of
1617    mode MODE using a subreg operation.
1618    Return the subreg if so, otherwise return null.  */
1619
1620 static rtx
1621 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1622                              machine_mode op0_mode,
1623                              poly_uint64 bitsize, poly_uint64 bitnum)
1624 {
1625   poly_uint64 bytenum;
1626   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1627       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1628       && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1629       && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1630     return simplify_gen_subreg (mode, op0, op0_mode, bytenum);
1631   return NULL_RTX;
1632 }
1633
1634 /* A subroutine of extract_bit_field, with the same arguments.
1635    If UNSIGNEDP is -1, the result need not be sign or zero extended.
1636    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1637    if we can find no other means of implementing the operation.
1638    if FALLBACK_P is false, return NULL instead.  */
1639
1640 static rtx
1641 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1642                      int unsignedp, rtx target, machine_mode mode,
1643                      machine_mode tmode, bool reverse, bool fallback_p,
1644                      rtx *alt_rtl)
1645 {
1646   rtx op0 = str_rtx;
1647   machine_mode mode1;
1648
1649   if (tmode == VOIDmode)
1650     tmode = mode;
1651
1652   while (GET_CODE (op0) == SUBREG)
1653     {
1654       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1655       op0 = SUBREG_REG (op0);
1656     }
1657
1658   /* If we have an out-of-bounds access to a register, just return an
1659      uninitialized register of the required mode.  This can occur if the
1660      source code contains an out-of-bounds access to a small array.  */
1661   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1662     return gen_reg_rtx (tmode);
1663
1664   if (REG_P (op0)
1665       && mode == GET_MODE (op0)
1666       && known_eq (bitnum, 0U)
1667       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1668     {
1669       if (reverse)
1670         op0 = flip_storage_order (mode, op0);
1671       /* We're trying to extract a full register from itself.  */
1672       return op0;
1673     }
1674
1675   /* First try to check for vector from vector extractions.  */
1676   if (VECTOR_MODE_P (GET_MODE (op0))
1677       && !MEM_P (op0)
1678       && VECTOR_MODE_P (tmode)
1679       && known_eq (bitsize, GET_MODE_PRECISION (tmode))
1680       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1681     {
1682       machine_mode new_mode = GET_MODE (op0);
1683       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1684         {
1685           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1686           poly_uint64 nunits;
1687           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1688                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1689               || !related_vector_mode (tmode, inner_mode,
1690                                        nunits).exists (&new_mode)
1691               || maybe_ne (GET_MODE_SIZE (new_mode),
1692                            GET_MODE_SIZE (GET_MODE (op0))))
1693             new_mode = VOIDmode;
1694         }
1695       poly_uint64 pos;
1696       if (new_mode != VOIDmode
1697           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1698               != CODE_FOR_nothing)
1699           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1700         {
1701           class expand_operand ops[3];
1702           machine_mode outermode = new_mode;
1703           machine_mode innermode = tmode;
1704           enum insn_code icode
1705             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1706
1707           if (new_mode != GET_MODE (op0))
1708             op0 = gen_lowpart (new_mode, op0);
1709           create_output_operand (&ops[0], target, innermode);
1710           ops[0].target = 1;
1711           create_input_operand (&ops[1], op0, outermode);
1712           create_integer_operand (&ops[2], pos);
1713           if (maybe_expand_insn (icode, 3, ops))
1714             {
1715               if (alt_rtl && ops[0].target)
1716                 *alt_rtl = target;
1717               target = ops[0].value;
1718               if (GET_MODE (target) != mode)
1719                 return gen_lowpart (tmode, target);
1720               return target;
1721             }
1722         }
1723     }
1724
1725   /* See if we can get a better vector mode before extracting.  */
1726   if (VECTOR_MODE_P (GET_MODE (op0))
1727       && !MEM_P (op0)
1728       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1729     {
1730       machine_mode new_mode;
1731
1732       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1733         new_mode = MIN_MODE_VECTOR_FLOAT;
1734       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1735         new_mode = MIN_MODE_VECTOR_FRACT;
1736       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1737         new_mode = MIN_MODE_VECTOR_UFRACT;
1738       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1739         new_mode = MIN_MODE_VECTOR_ACCUM;
1740       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1741         new_mode = MIN_MODE_VECTOR_UACCUM;
1742       else
1743         new_mode = MIN_MODE_VECTOR_INT;
1744
1745       FOR_EACH_MODE_FROM (new_mode, new_mode)
1746         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1747             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1748             && known_eq (bitsize, GET_MODE_UNIT_PRECISION (new_mode))
1749             && multiple_p (bitnum, GET_MODE_UNIT_PRECISION (new_mode))
1750             && targetm.vector_mode_supported_p (new_mode)
1751             && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1752           break;
1753       if (new_mode != VOIDmode)
1754         op0 = gen_lowpart (new_mode, op0);
1755     }
1756
1757   /* Use vec_extract patterns for extracting parts of vectors whenever
1758      available.  If that fails, see whether the current modes and bitregion
1759      give a natural subreg.  */
1760   machine_mode outermode = GET_MODE (op0);
1761   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1762     {
1763       scalar_mode innermode = GET_MODE_INNER (outermode);
1764
1765       enum insn_code icode
1766         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1767
1768       poly_uint64 pos;
1769       if (icode != CODE_FOR_nothing
1770           && known_eq (bitsize, GET_MODE_PRECISION (innermode))
1771           && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
1772         {
1773           class expand_operand ops[3];
1774
1775           create_output_operand (&ops[0], target,
1776                                  insn_data[icode].operand[0].mode);
1777           ops[0].target = 1;
1778           create_input_operand (&ops[1], op0, outermode);
1779           create_integer_operand (&ops[2], pos);
1780           if (maybe_expand_insn (icode, 3, ops))
1781             {
1782               if (alt_rtl && ops[0].target)
1783                 *alt_rtl = target;
1784               target = ops[0].value;
1785               if (GET_MODE (target) != mode)
1786                 return gen_lowpart (tmode, target);
1787               return target;
1788             }
1789         }
1790       /* Using subregs is useful if we're extracting one register vector
1791          from a multi-register vector.  extract_bit_field_as_subreg checks
1792          for valid bitsize and bitnum, so we don't need to do that here.  */
1793       if (VECTOR_MODE_P (mode))
1794         {
1795           rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1796                                                  bitsize, bitnum);
1797           if (sub)
1798             return sub;
1799         }
1800     }
1801
1802   /* Make sure we are playing with integral modes.  Pun with subregs
1803      if we aren't.  */
1804   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1805   scalar_int_mode imode;
1806   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1807     {
1808       if (MEM_P (op0))
1809         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1810                                             0, MEM_SIZE (op0));
1811       else if (op0_mode.exists (&imode))
1812         {
1813           op0 = gen_lowpart (imode, op0);
1814
1815           /* If we got a SUBREG, force it into a register since we
1816              aren't going to be able to do another SUBREG on it.  */
1817           if (GET_CODE (op0) == SUBREG)
1818             op0 = force_reg (imode, op0);
1819         }
1820       else
1821         {
1822           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1823           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1824           emit_move_insn (mem, op0);
1825           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1826         }
1827     }
1828
1829   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1830      If that's wrong, the solution is to test for it and set TARGET to 0
1831      if needed.  */
1832
1833   /* Get the mode of the field to use for atomic access or subreg
1834      conversion.  */
1835   if (!SCALAR_INT_MODE_P (tmode)
1836       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1837     mode1 = mode;
1838   gcc_assert (mode1 != BLKmode);
1839
1840   /* Extraction of a full MODE1 value can be done with a subreg as long
1841      as the least significant bit of the value is the least significant
1842      bit of either OP0 or a word of OP0.  */
1843   if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1844     {
1845       rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1846                                              bitsize, bitnum);
1847       if (sub)
1848         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1849     }
1850
1851   /* Extraction of a full MODE1 value can be done with a load as long as
1852      the field is on a byte boundary and is sufficiently aligned.  */
1853   poly_uint64 bytenum;
1854   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1855     {
1856       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1857       if (reverse)
1858         op0 = flip_storage_order (mode1, op0);
1859       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1860     }
1861
1862   /* If we have a memory source and a non-constant bit offset, restrict
1863      the memory to the referenced bytes.  This is a worst-case fallback
1864      but is useful for things like vector booleans.  */
1865   if (MEM_P (op0) && !bitnum.is_constant ())
1866     {
1867       bytenum = bits_to_bytes_round_down (bitnum);
1868       bitnum = num_trailing_bits (bitnum);
1869       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1870       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1871       op0_mode = opt_scalar_int_mode ();
1872     }
1873
1874   /* It's possible we'll need to handle other cases here for
1875      polynomial bitnum and bitsize.  */
1876
1877   /* From here on we need to be looking at a fixed-size insertion.  */
1878   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1879                                      bitnum.to_constant (), unsignedp,
1880                                      target, mode, tmode, reverse, fallback_p);
1881 }
1882
1883 /* Subroutine of extract_bit_field_1, with the same arguments, except
1884    that BITSIZE and BITNUM are constant.  Handle cases specific to
1885    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1886    otherwise OP0 is a BLKmode MEM.  */
1887
1888 static rtx
1889 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1890                             unsigned HOST_WIDE_INT bitsize,
1891                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1892                             rtx target, machine_mode mode, machine_mode tmode,
1893                             bool reverse, bool fallback_p)
1894 {
1895   /* Handle fields bigger than a word.  */
1896
1897   if (bitsize > BITS_PER_WORD)
1898     {
1899       /* Here we transfer the words of the field
1900          in the order least significant first.
1901          This is because the most significant word is the one which may
1902          be less than full.  */
1903
1904       const bool backwards = WORDS_BIG_ENDIAN;
1905       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1906       unsigned int i;
1907       rtx_insn *last;
1908
1909       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1910         target = gen_reg_rtx (mode);
1911
1912       /* In case we're about to clobber a base register or something
1913          (see gcc.c-torture/execute/20040625-1.c).   */
1914       if (reg_mentioned_p (target, op0))
1915         target = gen_reg_rtx (mode);
1916
1917       /* Indicate for flow that the entire target reg is being set.  */
1918       emit_clobber (target);
1919
1920       /* The mode must be fixed-size, since extract_bit_field_1 handles
1921          extractions from variable-sized objects before calling this
1922          function.  */
1923       unsigned int target_size
1924         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1925       last = get_last_insn ();
1926       for (i = 0; i < nwords; i++)
1927         {
1928           /* If I is 0, use the low-order word in both field and target;
1929              if I is 1, use the next to lowest word; and so on.  */
1930           /* Word number in TARGET to use.  */
1931           unsigned int wordnum
1932             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1933           /* Offset from start of field in OP0.  */
1934           unsigned int bit_offset = (backwards ^ reverse
1935                                      ? MAX ((int) bitsize - ((int) i + 1)
1936                                             * BITS_PER_WORD,
1937                                             0)
1938                                      : (int) i * BITS_PER_WORD);
1939           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1940           rtx result_part
1941             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1942                                              bitsize - i * BITS_PER_WORD),
1943                                    bitnum + bit_offset,
1944                                    (unsignedp ? 1 : -1), target_part,
1945                                    mode, word_mode, reverse, fallback_p, NULL);
1946
1947           gcc_assert (target_part);
1948           if (!result_part)
1949             {
1950               delete_insns_since (last);
1951               return NULL;
1952             }
1953
1954           if (result_part != target_part)
1955             emit_move_insn (target_part, result_part);
1956         }
1957
1958       if (unsignedp)
1959         {
1960           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1961              need to be zero'd out.  */
1962           if (target_size > nwords * UNITS_PER_WORD)
1963             {
1964               unsigned int i, total_words;
1965
1966               total_words = target_size / UNITS_PER_WORD;
1967               for (i = nwords; i < total_words; i++)
1968                 emit_move_insn
1969                   (operand_subword (target,
1970                                     backwards ? total_words - i - 1 : i,
1971                                     1, VOIDmode),
1972                    const0_rtx);
1973             }
1974           return target;
1975         }
1976
1977       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1978       target = expand_shift (LSHIFT_EXPR, mode, target,
1979                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1980       return expand_shift (RSHIFT_EXPR, mode, target,
1981                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1982     }
1983
1984   /* If OP0 is a multi-word register, narrow it to the affected word.
1985      If the region spans two words, defer to extract_split_bit_field.  */
1986   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1987     {
1988       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1989         {
1990           if (!fallback_p)
1991             return NULL_RTX;
1992           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1993                                             unsignedp, reverse);
1994           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1995         }
1996       /* If OP0 is a hard register, copy it to a pseudo before calling
1997          simplify_gen_subreg.  */
1998       if (REG_P (op0) && HARD_REGISTER_P (op0))
1999         op0 = copy_to_reg (op0);
2000       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
2001                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
2002       op0_mode = word_mode;
2003       bitnum %= BITS_PER_WORD;
2004     }
2005
2006   /* From here on we know the desired field is smaller than a word.
2007      If OP0 is a register, it too fits within a word.  */
2008   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
2009   extraction_insn extv;
2010   if (!MEM_P (op0)
2011       && !reverse
2012       /* ??? We could limit the structure size to the part of OP0 that
2013          contains the field, with appropriate checks for endianness
2014          and TARGET_TRULY_NOOP_TRUNCATION.  */
2015       && get_best_reg_extraction_insn (&extv, pattern,
2016                                        GET_MODE_BITSIZE (op0_mode.require ()),
2017                                        tmode))
2018     {
2019       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2020                                                  bitsize, bitnum,
2021                                                  unsignedp, target, mode,
2022                                                  tmode);
2023       if (result)
2024         return result;
2025     }
2026
2027   /* If OP0 is a memory, try copying it to a register and seeing if a
2028      cheap register alternative is available.  */
2029   if (MEM_P (op0) & !reverse)
2030     {
2031       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2032                                         tmode))
2033         {
2034           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2035                                                      bitsize, bitnum,
2036                                                      unsignedp, target, mode,
2037                                                      tmode);
2038           if (result)
2039             return result;
2040         }
2041
2042       rtx_insn *last = get_last_insn ();
2043
2044       /* Try loading part of OP0 into a register and extracting the
2045          bitfield from that.  */
2046       unsigned HOST_WIDE_INT bitpos;
2047       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2048                                                0, 0, tmode, &bitpos);
2049       if (xop0)
2050         {
2051           xop0 = copy_to_reg (xop0);
2052           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2053                                             unsignedp, target,
2054                                             mode, tmode, reverse, false, NULL);
2055           if (result)
2056             return result;
2057           delete_insns_since (last);
2058         }
2059     }
2060
2061   if (!fallback_p)
2062     return NULL;
2063
2064   /* Find a correspondingly-sized integer field, so we can apply
2065      shifts and masks to it.  */
2066   scalar_int_mode int_mode;
2067   if (!int_mode_for_mode (tmode).exists (&int_mode))
2068     /* If this fails, we should probably push op0 out to memory and then
2069        do a load.  */
2070     int_mode = int_mode_for_mode (mode).require ();
2071
2072   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2073                                     bitnum, target, unsignedp, reverse);
2074
2075   /* Complex values must be reversed piecewise, so we need to undo the global
2076      reversal, convert to the complex mode and reverse again.  */
2077   if (reverse && COMPLEX_MODE_P (tmode))
2078     {
2079       target = flip_storage_order (int_mode, target);
2080       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2081       target = flip_storage_order (tmode, target);
2082     }
2083   else
2084     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2085
2086   return target;
2087 }
2088
2089 /* Generate code to extract a byte-field from STR_RTX
2090    containing BITSIZE bits, starting at BITNUM,
2091    and put it in TARGET if possible (if TARGET is nonzero).
2092    Regardless of TARGET, we return the rtx for where the value is placed.
2093
2094    STR_RTX is the structure containing the byte (a REG or MEM).
2095    UNSIGNEDP is nonzero if this is an unsigned bit field.
2096    MODE is the natural mode of the field value once extracted.
2097    TMODE is the mode the caller would like the value to have;
2098    but the value may be returned with type MODE instead.
2099
2100    If REVERSE is true, the extraction is to be done in reverse order.
2101
2102    If a TARGET is specified and we can store in it at no extra cost,
2103    we do so, and return TARGET.
2104    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2105    if they are equally easy.
2106
2107    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2108    then *ALT_RTL is set to TARGET (before legitimziation).  */
2109
2110 rtx
2111 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2112                    int unsignedp, rtx target, machine_mode mode,
2113                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2114 {
2115   machine_mode mode1;
2116
2117   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2118   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2119     mode1 = GET_MODE (str_rtx);
2120   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2121     mode1 = GET_MODE (target);
2122   else
2123     mode1 = tmode;
2124
2125   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2126   scalar_int_mode int_mode;
2127   if (bitsize.is_constant (&ibitsize)
2128       && bitnum.is_constant (&ibitnum)
2129       && is_a <scalar_int_mode> (mode1, &int_mode)
2130       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2131                                      int_mode, 0, 0))
2132     {
2133       /* Extraction of a full INT_MODE value can be done with a simple load.
2134          We know here that the field can be accessed with one single
2135          instruction.  For targets that support unaligned memory,
2136          an unaligned access may be necessary.  */
2137       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2138         {
2139           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2140                                                 ibitnum / BITS_PER_UNIT);
2141           if (reverse)
2142             result = flip_storage_order (int_mode, result);
2143           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2144           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2145         }
2146
2147       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2148                                       &ibitnum);
2149       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2150       str_rtx = copy_to_reg (str_rtx);
2151       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2152                                   target, mode, tmode, reverse, true, alt_rtl);
2153     }
2154
2155   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2156                               target, mode, tmode, reverse, true, alt_rtl);
2157 }
2158 \f
2159 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2160    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2161    otherwise OP0 is a BLKmode MEM.
2162
2163    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2164    If REVERSE is true, the extraction is to be done in reverse order.
2165
2166    If TARGET is nonzero, attempts to store the value there
2167    and return TARGET, but this is not guaranteed.
2168    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2169
2170 static rtx
2171 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2172                          opt_scalar_int_mode op0_mode,
2173                          unsigned HOST_WIDE_INT bitsize,
2174                          unsigned HOST_WIDE_INT bitnum, rtx target,
2175                          int unsignedp, bool reverse)
2176 {
2177   scalar_int_mode mode;
2178   if (MEM_P (op0))
2179     {
2180       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2181                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2182         /* The only way this should occur is if the field spans word
2183            boundaries.  */
2184         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2185                                         unsignedp, reverse);
2186
2187       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2188     }
2189   else
2190     mode = op0_mode.require ();
2191
2192   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2193                                     target, unsignedp, reverse);
2194 }
2195
2196 /* Helper function for extract_fixed_bit_field, extracts
2197    the bit field always using MODE, which is the mode of OP0.
2198    If UNSIGNEDP is -1, the result need not be sign or zero extended.
2199    The other arguments are as for extract_fixed_bit_field.  */
2200
2201 static rtx
2202 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2203                            unsigned HOST_WIDE_INT bitsize,
2204                            unsigned HOST_WIDE_INT bitnum, rtx target,
2205                            int unsignedp, bool reverse)
2206 {
2207   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2208      for invalid input, such as extract equivalent of f5 from
2209      gcc.dg/pr48335-2.c.  */
2210
2211   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2212     /* BITNUM is the distance between our msb and that of OP0.
2213        Convert it to the distance from the lsb.  */
2214     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2215
2216   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2217      We have reduced the big-endian case to the little-endian case.  */
2218   if (reverse)
2219     op0 = flip_storage_order (mode, op0);
2220
2221   if (unsignedp)
2222     {
2223       if (bitnum)
2224         {
2225           /* If the field does not already start at the lsb,
2226              shift it so it does.  */
2227           /* Maybe propagate the target for the shift.  */
2228           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2229           if (tmode != mode)
2230             subtarget = 0;
2231           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2232         }
2233       /* Convert the value to the desired mode.  TMODE must also be a
2234          scalar integer for this conversion to make sense, since we
2235          shouldn't reinterpret the bits.  */
2236       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2237       if (mode != new_mode)
2238         op0 = convert_to_mode (new_mode, op0, 1);
2239
2240       /* Unless the msb of the field used to be the msb when we shifted,
2241          mask out the upper bits.  */
2242
2243       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize
2244           && unsignedp != -1)
2245         return expand_binop (new_mode, and_optab, op0,
2246                              mask_rtx (new_mode, 0, bitsize, 0),
2247                              target, 1, OPTAB_LIB_WIDEN);
2248       return op0;
2249     }
2250
2251   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2252      then arithmetic-shift its lsb to the lsb of the word.  */
2253   op0 = force_reg (mode, op0);
2254
2255   /* Find the narrowest integer mode that contains the field.  */
2256
2257   opt_scalar_int_mode mode_iter;
2258   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2259     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2260       break;
2261
2262   mode = mode_iter.require ();
2263   op0 = convert_to_mode (mode, op0, 0);
2264
2265   if (mode != tmode)
2266     target = 0;
2267
2268   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2269     {
2270       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2271       /* Maybe propagate the target for the shift.  */
2272       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2273       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2274     }
2275
2276   return expand_shift (RSHIFT_EXPR, mode, op0,
2277                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2278 }
2279
2280 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2281    VALUE << BITPOS.  */
2282
2283 static rtx
2284 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2285               int bitpos)
2286 {
2287   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2288 }
2289 \f
2290 /* Extract a bit field that is split across two words
2291    and return an RTX for the result.
2292
2293    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2294    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2295    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2296    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2297    a BLKmode MEM.
2298
2299    If REVERSE is true, the extraction is to be done in reverse order.  */
2300
2301 static rtx
2302 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2303                          unsigned HOST_WIDE_INT bitsize,
2304                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2305                          bool reverse)
2306 {
2307   unsigned int unit;
2308   unsigned int bitsdone = 0;
2309   rtx result = NULL_RTX;
2310   int first = 1;
2311
2312   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2313      much at a time.  */
2314   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2315     unit = BITS_PER_WORD;
2316   else
2317     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2318
2319   while (bitsdone < bitsize)
2320     {
2321       unsigned HOST_WIDE_INT thissize;
2322       rtx part;
2323       unsigned HOST_WIDE_INT thispos;
2324       unsigned HOST_WIDE_INT offset;
2325
2326       offset = (bitpos + bitsdone) / unit;
2327       thispos = (bitpos + bitsdone) % unit;
2328
2329       /* THISSIZE must not overrun a word boundary.  Otherwise,
2330          extract_fixed_bit_field will call us again, and we will mutually
2331          recurse forever.  */
2332       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2333       thissize = MIN (thissize, unit - thispos);
2334
2335       /* If OP0 is a register, then handle OFFSET here.  */
2336       rtx op0_piece = op0;
2337       opt_scalar_int_mode op0_piece_mode = op0_mode;
2338       if (SUBREG_P (op0) || REG_P (op0))
2339         {
2340           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2341           op0_piece_mode = word_mode;
2342           offset = 0;
2343         }
2344
2345       /* Extract the parts in bit-counting order,
2346          whose meaning is determined by BYTES_PER_UNIT.
2347          OFFSET is in UNITs, and UNIT is in bits.  */
2348       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2349                                       thissize, offset * unit + thispos,
2350                                       0, 1, reverse);
2351       bitsdone += thissize;
2352
2353       /* Shift this part into place for the result.  */
2354       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2355         {
2356           if (bitsize != bitsdone)
2357             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2358                                  bitsize - bitsdone, 0, 1);
2359         }
2360       else
2361         {
2362           if (bitsdone != thissize)
2363             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2364                                  bitsdone - thissize, 0, 1);
2365         }
2366
2367       if (first)
2368         result = part;
2369       else
2370         /* Combine the parts with bitwise or.  This works
2371            because we extracted each part as an unsigned bit field.  */
2372         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2373                                OPTAB_LIB_WIDEN);
2374
2375       first = 0;
2376     }
2377
2378   /* Unsigned bit field: we are done.  */
2379   if (unsignedp)
2380     return result;
2381   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2382   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2383                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2384   return expand_shift (RSHIFT_EXPR, word_mode, result,
2385                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2386 }
2387 \f
2388 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2389    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2390    MODE, fill the upper bits with zeros.  Fail if the layout of either
2391    mode is unknown (as for CC modes) or if the extraction would involve
2392    unprofitable mode punning.  Return the value on success, otherwise
2393    return null.
2394
2395    This is different from gen_lowpart* in these respects:
2396
2397      - the returned value must always be considered an rvalue
2398
2399      - when MODE is wider than SRC_MODE, the extraction involves
2400        a zero extension
2401
2402      - when MODE is smaller than SRC_MODE, the extraction involves
2403        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2404
2405    In other words, this routine performs a computation, whereas the
2406    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2407    operations.  */
2408
2409 rtx
2410 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2411 {
2412   scalar_int_mode int_mode, src_int_mode;
2413
2414   if (mode == src_mode)
2415     return src;
2416
2417   if (CONSTANT_P (src))
2418     {
2419       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2420          fails, it will happily create (subreg (symbol_ref)) or similar
2421          invalid SUBREGs.  */
2422       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2423       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2424       if (ret)
2425         return ret;
2426
2427       if (GET_MODE (src) == VOIDmode
2428           || !validate_subreg (mode, src_mode, src, byte))
2429         return NULL_RTX;
2430
2431       src = force_reg (GET_MODE (src), src);
2432       return gen_rtx_SUBREG (mode, src, byte);
2433     }
2434
2435   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2436     return NULL_RTX;
2437
2438   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2439       && targetm.modes_tieable_p (mode, src_mode))
2440     {
2441       rtx x = gen_lowpart_common (mode, src);
2442       if (x)
2443         return x;
2444     }
2445
2446   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2447       || !int_mode_for_mode (mode).exists (&int_mode))
2448     return NULL_RTX;
2449
2450   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2451     return NULL_RTX;
2452   if (!targetm.modes_tieable_p (int_mode, mode))
2453     return NULL_RTX;
2454
2455   src = gen_lowpart (src_int_mode, src);
2456   if (!validate_subreg (int_mode, src_int_mode, src,
2457                         subreg_lowpart_offset (int_mode, src_int_mode)))
2458     return NULL_RTX;
2459
2460   src = convert_modes (int_mode, src_int_mode, src, true);
2461   src = gen_lowpart (mode, src);
2462   return src;
2463 }
2464 \f
2465 /* Add INC into TARGET.  */
2466
2467 void
2468 expand_inc (rtx target, rtx inc)
2469 {
2470   rtx value = expand_binop (GET_MODE (target), add_optab,
2471                             target, inc,
2472                             target, 0, OPTAB_LIB_WIDEN);
2473   if (value != target)
2474     emit_move_insn (target, value);
2475 }
2476
2477 /* Subtract DEC from TARGET.  */
2478
2479 void
2480 expand_dec (rtx target, rtx dec)
2481 {
2482   rtx value = expand_binop (GET_MODE (target), sub_optab,
2483                             target, dec,
2484                             target, 0, OPTAB_LIB_WIDEN);
2485   if (value != target)
2486     emit_move_insn (target, value);
2487 }
2488 \f
2489 /* Output a shift instruction for expression code CODE,
2490    with SHIFTED being the rtx for the value to shift,
2491    and AMOUNT the rtx for the amount to shift by.
2492    Store the result in the rtx TARGET, if that is convenient.
2493    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2494    Return the rtx for where the value is.
2495    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2496    in which case 0 is returned.  */
2497
2498 static rtx
2499 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2500                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2501 {
2502   rtx op1, temp = 0;
2503   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2504   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2505   optab lshift_optab = ashl_optab;
2506   optab rshift_arith_optab = ashr_optab;
2507   optab rshift_uns_optab = lshr_optab;
2508   optab lrotate_optab = rotl_optab;
2509   optab rrotate_optab = rotr_optab;
2510   machine_mode op1_mode;
2511   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2512   int attempt;
2513   bool speed = optimize_insn_for_speed_p ();
2514
2515   op1 = amount;
2516   op1_mode = GET_MODE (op1);
2517
2518   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2519      shift amount is a vector, use the vector/vector shift patterns.  */
2520   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2521     {
2522       lshift_optab = vashl_optab;
2523       rshift_arith_optab = vashr_optab;
2524       rshift_uns_optab = vlshr_optab;
2525       lrotate_optab = vrotl_optab;
2526       rrotate_optab = vrotr_optab;
2527     }
2528
2529   /* Previously detected shift-counts computed by NEGATE_EXPR
2530      and shifted in the other direction; but that does not work
2531      on all machines.  */
2532
2533   if (SHIFT_COUNT_TRUNCATED)
2534     {
2535       if (CONST_INT_P (op1)
2536           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2537               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2538         op1 = gen_int_shift_amount (mode,
2539                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2540                                     % GET_MODE_BITSIZE (scalar_mode));
2541       else if (GET_CODE (op1) == SUBREG
2542                && subreg_lowpart_p (op1)
2543                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2544                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2545         op1 = SUBREG_REG (op1);
2546     }
2547
2548   /* Canonicalize rotates by constant amount.  We may canonicalize
2549      to reduce the immediate or if the ISA can rotate by constants
2550      in only on direction.  */
2551   if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1))
2552     {
2553       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2554                                          - INTVAL (op1)));
2555       left = !left;
2556       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2557     }
2558
2559   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2560      Note that this is not the case for bigger values.  For instance a rotation
2561      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2562      0x04030201 (bswapsi).  */
2563   if (rotate
2564       && CONST_INT_P (op1)
2565       && INTVAL (op1) == BITS_PER_UNIT
2566       && GET_MODE_SIZE (scalar_mode) == 2
2567       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2568     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2569
2570   if (op1 == const0_rtx)
2571     return shifted;
2572
2573   /* Check whether its cheaper to implement a left shift by a constant
2574      bit count by a sequence of additions.  */
2575   if (code == LSHIFT_EXPR
2576       && CONST_INT_P (op1)
2577       && INTVAL (op1) > 0
2578       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2579       && INTVAL (op1) < MAX_BITS_PER_WORD
2580       && (shift_cost (speed, mode, INTVAL (op1))
2581           > INTVAL (op1) * add_cost (speed, mode))
2582       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2583     {
2584       int i;
2585       for (i = 0; i < INTVAL (op1); i++)
2586         {
2587           temp = force_reg (mode, shifted);
2588           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2589                                   unsignedp, OPTAB_LIB_WIDEN);
2590         }
2591       return shifted;
2592     }
2593
2594   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2595     {
2596       enum optab_methods methods;
2597
2598       if (attempt == 0)
2599         methods = OPTAB_DIRECT;
2600       else if (attempt == 1)
2601         methods = OPTAB_WIDEN;
2602       else
2603         methods = OPTAB_LIB_WIDEN;
2604
2605       if (rotate)
2606         {
2607           /* Widening does not work for rotation.  */
2608           if (methods == OPTAB_WIDEN)
2609             continue;
2610           else if (methods == OPTAB_LIB_WIDEN)
2611             {
2612               /* If we have been unable to open-code this by a rotation,
2613                  do it as the IOR of two shifts.  I.e., to rotate A
2614                  by N bits, compute
2615                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2616                  where C is the bitsize of A.
2617
2618                  It is theoretically possible that the target machine might
2619                  not be able to perform either shift and hence we would
2620                  be making two libcalls rather than just the one for the
2621                  shift (similarly if IOR could not be done).  We will allow
2622                  this extremely unlikely lossage to avoid complicating the
2623                  code below.  */
2624
2625               rtx subtarget = target == shifted ? 0 : target;
2626               rtx new_amount, other_amount;
2627               rtx temp1;
2628
2629               new_amount = op1;
2630               if (op1 == const0_rtx)
2631                 return shifted;
2632               else if (CONST_INT_P (op1))
2633                 other_amount = gen_int_shift_amount
2634                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2635               else
2636                 {
2637                   other_amount
2638                     = simplify_gen_unary (NEG, GET_MODE (op1),
2639                                           op1, GET_MODE (op1));
2640                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2641                   other_amount
2642                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2643                                            gen_int_mode (mask, GET_MODE (op1)));
2644                 }
2645
2646               shifted = force_reg (mode, shifted);
2647
2648               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2649                                      mode, shifted, new_amount, 0, 1);
2650               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2651                                       mode, shifted, other_amount,
2652                                       subtarget, 1);
2653               return expand_binop (mode, ior_optab, temp, temp1, target,
2654                                    unsignedp, methods);
2655             }
2656
2657           temp = expand_binop (mode,
2658                                left ? lrotate_optab : rrotate_optab,
2659                                shifted, op1, target, unsignedp, methods);
2660         }
2661       else if (unsignedp)
2662         temp = expand_binop (mode,
2663                              left ? lshift_optab : rshift_uns_optab,
2664                              shifted, op1, target, unsignedp, methods);
2665
2666       /* Do arithmetic shifts.
2667          Also, if we are going to widen the operand, we can just as well
2668          use an arithmetic right-shift instead of a logical one.  */
2669       if (temp == 0 && ! rotate
2670           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2671         {
2672           enum optab_methods methods1 = methods;
2673
2674           /* If trying to widen a log shift to an arithmetic shift,
2675              don't accept an arithmetic shift of the same size.  */
2676           if (unsignedp)
2677             methods1 = OPTAB_MUST_WIDEN;
2678
2679           /* Arithmetic shift */
2680
2681           temp = expand_binop (mode,
2682                                left ? lshift_optab : rshift_arith_optab,
2683                                shifted, op1, target, unsignedp, methods1);
2684         }
2685
2686       /* We used to try extzv here for logical right shifts, but that was
2687          only useful for one machine, the VAX, and caused poor code
2688          generation there for lshrdi3, so the code was deleted and a
2689          define_expand for lshrsi3 was added to vax.md.  */
2690     }
2691
2692   gcc_assert (temp != NULL_RTX || may_fail);
2693   return temp;
2694 }
2695
2696 /* Output a shift instruction for expression code CODE,
2697    with SHIFTED being the rtx for the value to shift,
2698    and AMOUNT the amount to shift by.
2699    Store the result in the rtx TARGET, if that is convenient.
2700    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2701    Return the rtx for where the value is.  */
2702
2703 rtx
2704 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2705               poly_int64 amount, rtx target, int unsignedp)
2706 {
2707   return expand_shift_1 (code, mode, shifted,
2708                          gen_int_shift_amount (mode, amount),
2709                          target, unsignedp);
2710 }
2711
2712 /* Likewise, but return 0 if that cannot be done.  */
2713
2714 rtx
2715 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2716                     int amount, rtx target, int unsignedp)
2717 {
2718   return expand_shift_1 (code, mode,
2719                          shifted, GEN_INT (amount), target, unsignedp, true);
2720 }
2721
2722 /* Output a shift instruction for expression code CODE,
2723    with SHIFTED being the rtx for the value to shift,
2724    and AMOUNT the tree for the amount to shift by.
2725    Store the result in the rtx TARGET, if that is convenient.
2726    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2727    Return the rtx for where the value is.  */
2728
2729 rtx
2730 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2731                        tree amount, rtx target, int unsignedp)
2732 {
2733   return expand_shift_1 (code, mode,
2734                          shifted, expand_normal (amount), target, unsignedp);
2735 }
2736
2737 \f
2738 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2739                         const struct mult_cost *, machine_mode mode);
2740 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2741                               const struct algorithm *, enum mult_variant);
2742 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2743 static rtx extract_high_half (scalar_int_mode, rtx);
2744 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2745 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2746                                        int, int);
2747 /* Compute and return the best algorithm for multiplying by T.
2748    The algorithm must cost less than cost_limit
2749    If retval.cost >= COST_LIMIT, no algorithm was found and all
2750    other field of the returned struct are undefined.
2751    MODE is the machine mode of the multiplication.  */
2752
2753 static void
2754 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2755             const struct mult_cost *cost_limit, machine_mode mode)
2756 {
2757   int m;
2758   struct algorithm *alg_in, *best_alg;
2759   struct mult_cost best_cost;
2760   struct mult_cost new_limit;
2761   int op_cost, op_latency;
2762   unsigned HOST_WIDE_INT orig_t = t;
2763   unsigned HOST_WIDE_INT q;
2764   int maxm, hash_index;
2765   bool cache_hit = false;
2766   enum alg_code cache_alg = alg_zero;
2767   bool speed = optimize_insn_for_speed_p ();
2768   scalar_int_mode imode;
2769   struct alg_hash_entry *entry_ptr;
2770
2771   /* Indicate that no algorithm is yet found.  If no algorithm
2772      is found, this value will be returned and indicate failure.  */
2773   alg_out->cost.cost = cost_limit->cost + 1;
2774   alg_out->cost.latency = cost_limit->latency + 1;
2775
2776   if (cost_limit->cost < 0
2777       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2778     return;
2779
2780   /* Be prepared for vector modes.  */
2781   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2782
2783   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2784
2785   /* Restrict the bits of "t" to the multiplication's mode.  */
2786   t &= GET_MODE_MASK (imode);
2787
2788   /* t == 1 can be done in zero cost.  */
2789   if (t == 1)
2790     {
2791       alg_out->ops = 1;
2792       alg_out->cost.cost = 0;
2793       alg_out->cost.latency = 0;
2794       alg_out->op[0] = alg_m;
2795       return;
2796     }
2797
2798   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2799      fail now.  */
2800   if (t == 0)
2801     {
2802       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2803         return;
2804       else
2805         {
2806           alg_out->ops = 1;
2807           alg_out->cost.cost = zero_cost (speed);
2808           alg_out->cost.latency = zero_cost (speed);
2809           alg_out->op[0] = alg_zero;
2810           return;
2811         }
2812     }
2813
2814   /* We'll be needing a couple extra algorithm structures now.  */
2815
2816   alg_in = XALLOCA (struct algorithm);
2817   best_alg = XALLOCA (struct algorithm);
2818   best_cost = *cost_limit;
2819
2820   /* Compute the hash index.  */
2821   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2822
2823   /* See if we already know what to do for T.  */
2824   entry_ptr = alg_hash_entry_ptr (hash_index);
2825   if (entry_ptr->t == t
2826       && entry_ptr->mode == mode
2827       && entry_ptr->speed == speed
2828       && entry_ptr->alg != alg_unknown)
2829     {
2830       cache_alg = entry_ptr->alg;
2831
2832       if (cache_alg == alg_impossible)
2833         {
2834           /* The cache tells us that it's impossible to synthesize
2835              multiplication by T within entry_ptr->cost.  */
2836           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2837             /* COST_LIMIT is at least as restrictive as the one
2838                recorded in the hash table, in which case we have no
2839                hope of synthesizing a multiplication.  Just
2840                return.  */
2841             return;
2842
2843           /* If we get here, COST_LIMIT is less restrictive than the
2844              one recorded in the hash table, so we may be able to
2845              synthesize a multiplication.  Proceed as if we didn't
2846              have the cache entry.  */
2847         }
2848       else
2849         {
2850           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2851             /* The cached algorithm shows that this multiplication
2852                requires more cost than COST_LIMIT.  Just return.  This
2853                way, we don't clobber this cache entry with
2854                alg_impossible but retain useful information.  */
2855             return;
2856
2857           cache_hit = true;
2858
2859           switch (cache_alg)
2860             {
2861             case alg_shift:
2862               goto do_alg_shift;
2863
2864             case alg_add_t_m2:
2865             case alg_sub_t_m2:
2866               goto do_alg_addsub_t_m2;
2867
2868             case alg_add_factor:
2869             case alg_sub_factor:
2870               goto do_alg_addsub_factor;
2871
2872             case alg_add_t2_m:
2873               goto do_alg_add_t2_m;
2874
2875             case alg_sub_t2_m:
2876               goto do_alg_sub_t2_m;
2877
2878             default:
2879               gcc_unreachable ();
2880             }
2881         }
2882     }
2883
2884   /* If we have a group of zero bits at the low-order part of T, try
2885      multiplying by the remaining bits and then doing a shift.  */
2886
2887   if ((t & 1) == 0)
2888     {
2889     do_alg_shift:
2890       m = ctz_or_zero (t); /* m = number of low zero bits */
2891       if (m < maxm)
2892         {
2893           q = t >> m;
2894           /* The function expand_shift will choose between a shift and
2895              a sequence of additions, so the observed cost is given as
2896              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2897           op_cost = m * add_cost (speed, mode);
2898           if (shift_cost (speed, mode, m) < op_cost)
2899             op_cost = shift_cost (speed, mode, m);
2900           new_limit.cost = best_cost.cost - op_cost;
2901           new_limit.latency = best_cost.latency - op_cost;
2902           synth_mult (alg_in, q, &new_limit, mode);
2903
2904           alg_in->cost.cost += op_cost;
2905           alg_in->cost.latency += op_cost;
2906           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2907             {
2908               best_cost = alg_in->cost;
2909               std::swap (alg_in, best_alg);
2910               best_alg->log[best_alg->ops] = m;
2911               best_alg->op[best_alg->ops] = alg_shift;
2912             }
2913
2914           /* See if treating ORIG_T as a signed number yields a better
2915              sequence.  Try this sequence only for a negative ORIG_T
2916              as it would be useless for a non-negative ORIG_T.  */
2917           if ((HOST_WIDE_INT) orig_t < 0)
2918             {
2919               /* Shift ORIG_T as follows because a right shift of a
2920                  negative-valued signed type is implementation
2921                  defined.  */
2922               q = ~(~orig_t >> m);
2923               /* The function expand_shift will choose between a shift
2924                  and a sequence of additions, so the observed cost is
2925                  given as MIN (m * add_cost(speed, mode),
2926                  shift_cost(speed, mode, m)).  */
2927               op_cost = m * add_cost (speed, mode);
2928               if (shift_cost (speed, mode, m) < op_cost)
2929                 op_cost = shift_cost (speed, mode, m);
2930               new_limit.cost = best_cost.cost - op_cost;
2931               new_limit.latency = best_cost.latency - op_cost;
2932               synth_mult (alg_in, q, &new_limit, mode);
2933
2934               alg_in->cost.cost += op_cost;
2935               alg_in->cost.latency += op_cost;
2936               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2937                 {
2938                   best_cost = alg_in->cost;
2939                   std::swap (alg_in, best_alg);
2940                   best_alg->log[best_alg->ops] = m;
2941                   best_alg->op[best_alg->ops] = alg_shift;
2942                 }
2943             }
2944         }
2945       if (cache_hit)
2946         goto done;
2947     }
2948
2949   /* If we have an odd number, add or subtract one.  */
2950   if ((t & 1) != 0)
2951     {
2952       unsigned HOST_WIDE_INT w;
2953
2954     do_alg_addsub_t_m2:
2955       for (w = 1; (w & t) != 0; w <<= 1)
2956         ;
2957       /* If T was -1, then W will be zero after the loop.  This is another
2958          case where T ends with ...111.  Handling this with (T + 1) and
2959          subtract 1 produces slightly better code and results in algorithm
2960          selection much faster than treating it like the ...0111 case
2961          below.  */
2962       if (w == 0
2963           || (w > 2
2964               /* Reject the case where t is 3.
2965                  Thus we prefer addition in that case.  */
2966               && t != 3))
2967         {
2968           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2969
2970           op_cost = add_cost (speed, mode);
2971           new_limit.cost = best_cost.cost - op_cost;
2972           new_limit.latency = best_cost.latency - op_cost;
2973           synth_mult (alg_in, t + 1, &new_limit, mode);
2974
2975           alg_in->cost.cost += op_cost;
2976           alg_in->cost.latency += op_cost;
2977           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2978             {
2979               best_cost = alg_in->cost;
2980               std::swap (alg_in, best_alg);
2981               best_alg->log[best_alg->ops] = 0;
2982               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2983             }
2984         }
2985       else
2986         {
2987           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2988
2989           op_cost = add_cost (speed, mode);
2990           new_limit.cost = best_cost.cost - op_cost;
2991           new_limit.latency = best_cost.latency - op_cost;
2992           synth_mult (alg_in, t - 1, &new_limit, mode);
2993
2994           alg_in->cost.cost += op_cost;
2995           alg_in->cost.latency += op_cost;
2996           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2997             {
2998               best_cost = alg_in->cost;
2999               std::swap (alg_in, best_alg);
3000               best_alg->log[best_alg->ops] = 0;
3001               best_alg->op[best_alg->ops] = alg_add_t_m2;
3002             }
3003         }
3004
3005       /* We may be able to calculate a * -7, a * -15, a * -31, etc
3006          quickly with a - a * n for some appropriate constant n.  */
3007       m = exact_log2 (-orig_t + 1);
3008       if (m >= 0 && m < maxm)
3009         {
3010           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3011           /* If the target has a cheap shift-and-subtract insn use
3012              that in preference to a shift insn followed by a sub insn.
3013              Assume that the shift-and-sub is "atomic" with a latency
3014              equal to it's cost, otherwise assume that on superscalar
3015              hardware the shift may be executed concurrently with the
3016              earlier steps in the algorithm.  */
3017           if (shiftsub1_cost (speed, mode, m) <= op_cost)
3018             {
3019               op_cost = shiftsub1_cost (speed, mode, m);
3020               op_latency = op_cost;
3021             }
3022           else
3023             op_latency = add_cost (speed, mode);
3024
3025           new_limit.cost = best_cost.cost - op_cost;
3026           new_limit.latency = best_cost.latency - op_latency;
3027           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3028                       &new_limit, mode);
3029
3030           alg_in->cost.cost += op_cost;
3031           alg_in->cost.latency += op_latency;
3032           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3033             {
3034               best_cost = alg_in->cost;
3035               std::swap (alg_in, best_alg);
3036               best_alg->log[best_alg->ops] = m;
3037               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3038             }
3039         }
3040
3041       if (cache_hit)
3042         goto done;
3043     }
3044
3045   /* Look for factors of t of the form
3046      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3047      If we find such a factor, we can multiply by t using an algorithm that
3048      multiplies by q, shift the result by m and add/subtract it to itself.
3049
3050      We search for large factors first and loop down, even if large factors
3051      are less probable than small; if we find a large factor we will find a
3052      good sequence quickly, and therefore be able to prune (by decreasing
3053      COST_LIMIT) the search.  */
3054
3055  do_alg_addsub_factor:
3056   for (m = floor_log2 (t - 1); m >= 2; m--)
3057     {
3058       unsigned HOST_WIDE_INT d;
3059
3060       d = (HOST_WIDE_INT_1U << m) + 1;
3061       if (t % d == 0 && t > d && m < maxm
3062           && (!cache_hit || cache_alg == alg_add_factor))
3063         {
3064           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3065           if (shiftadd_cost (speed, mode, m) <= op_cost)
3066             op_cost = shiftadd_cost (speed, mode, m);
3067
3068           op_latency = op_cost;
3069
3070
3071           new_limit.cost = best_cost.cost - op_cost;
3072           new_limit.latency = best_cost.latency - op_latency;
3073           synth_mult (alg_in, t / d, &new_limit, mode);
3074
3075           alg_in->cost.cost += op_cost;
3076           alg_in->cost.latency += op_latency;
3077           if (alg_in->cost.latency < op_cost)
3078             alg_in->cost.latency = op_cost;
3079           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3080             {
3081               best_cost = alg_in->cost;
3082               std::swap (alg_in, best_alg);
3083               best_alg->log[best_alg->ops] = m;
3084               best_alg->op[best_alg->ops] = alg_add_factor;
3085             }
3086           /* Other factors will have been taken care of in the recursion.  */
3087           break;
3088         }
3089
3090       d = (HOST_WIDE_INT_1U << m) - 1;
3091       if (t % d == 0 && t > d && m < maxm
3092           && (!cache_hit || cache_alg == alg_sub_factor))
3093         {
3094           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3095           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3096             op_cost = shiftsub0_cost (speed, mode, m);
3097
3098           op_latency = op_cost;
3099
3100           new_limit.cost = best_cost.cost - op_cost;
3101           new_limit.latency = best_cost.latency - op_latency;
3102           synth_mult (alg_in, t / d, &new_limit, mode);
3103
3104           alg_in->cost.cost += op_cost;
3105           alg_in->cost.latency += op_latency;
3106           if (alg_in->cost.latency < op_cost)
3107             alg_in->cost.latency = op_cost;
3108           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3109             {
3110               best_cost = alg_in->cost;
3111               std::swap (alg_in, best_alg);
3112               best_alg->log[best_alg->ops] = m;
3113               best_alg->op[best_alg->ops] = alg_sub_factor;
3114             }
3115           break;
3116         }
3117     }
3118   if (cache_hit)
3119     goto done;
3120
3121   /* Try shift-and-add (load effective address) instructions,
3122      i.e. do a*3, a*5, a*9.  */
3123   if ((t & 1) != 0)
3124     {
3125     do_alg_add_t2_m:
3126       q = t - 1;
3127       m = ctz_hwi (q);
3128       if (q && m < maxm)
3129         {
3130           op_cost = shiftadd_cost (speed, mode, m);
3131           new_limit.cost = best_cost.cost - op_cost;
3132           new_limit.latency = best_cost.latency - op_cost;
3133           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3134
3135           alg_in->cost.cost += op_cost;
3136           alg_in->cost.latency += op_cost;
3137           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3138             {
3139               best_cost = alg_in->cost;
3140               std::swap (alg_in, best_alg);
3141               best_alg->log[best_alg->ops] = m;
3142               best_alg->op[best_alg->ops] = alg_add_t2_m;
3143             }
3144         }
3145       if (cache_hit)
3146         goto done;
3147
3148     do_alg_sub_t2_m:
3149       q = t + 1;
3150       m = ctz_hwi (q);
3151       if (q && m < maxm)
3152         {
3153           op_cost = shiftsub0_cost (speed, mode, m);
3154           new_limit.cost = best_cost.cost - op_cost;
3155           new_limit.latency = best_cost.latency - op_cost;
3156           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3157
3158           alg_in->cost.cost += op_cost;
3159           alg_in->cost.latency += op_cost;
3160           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3161             {
3162               best_cost = alg_in->cost;
3163               std::swap (alg_in, best_alg);
3164               best_alg->log[best_alg->ops] = m;
3165               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3166             }
3167         }
3168       if (cache_hit)
3169         goto done;
3170     }
3171
3172  done:
3173   /* If best_cost has not decreased, we have not found any algorithm.  */
3174   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3175     {
3176       /* We failed to find an algorithm.  Record alg_impossible for
3177          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3178          we are asked to find an algorithm for T within the same or
3179          lower COST_LIMIT, we can immediately return to the
3180          caller.  */
3181       entry_ptr->t = t;
3182       entry_ptr->mode = mode;
3183       entry_ptr->speed = speed;
3184       entry_ptr->alg = alg_impossible;
3185       entry_ptr->cost = *cost_limit;
3186       return;
3187     }
3188
3189   /* Cache the result.  */
3190   if (!cache_hit)
3191     {
3192       entry_ptr->t = t;
3193       entry_ptr->mode = mode;
3194       entry_ptr->speed = speed;
3195       entry_ptr->alg = best_alg->op[best_alg->ops];
3196       entry_ptr->cost.cost = best_cost.cost;
3197       entry_ptr->cost.latency = best_cost.latency;
3198     }
3199
3200   /* If we are getting a too long sequence for `struct algorithm'
3201      to record, make this search fail.  */
3202   if (best_alg->ops == MAX_BITS_PER_WORD)
3203     return;
3204
3205   /* Copy the algorithm from temporary space to the space at alg_out.
3206      We avoid using structure assignment because the majority of
3207      best_alg is normally undefined, and this is a critical function.  */
3208   alg_out->ops = best_alg->ops + 1;
3209   alg_out->cost = best_cost;
3210   memcpy (alg_out->op, best_alg->op,
3211           alg_out->ops * sizeof *alg_out->op);
3212   memcpy (alg_out->log, best_alg->log,
3213           alg_out->ops * sizeof *alg_out->log);
3214 }
3215 \f
3216 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3217    Try three variations:
3218
3219        - a shift/add sequence based on VAL itself
3220        - a shift/add sequence based on -VAL, followed by a negation
3221        - a shift/add sequence based on VAL - 1, followed by an addition.
3222
3223    Return true if the cheapest of these cost less than MULT_COST,
3224    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3225
3226 bool
3227 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3228                      struct algorithm *alg, enum mult_variant *variant,
3229                      int mult_cost)
3230 {
3231   struct algorithm alg2;
3232   struct mult_cost limit;
3233   int op_cost;
3234   bool speed = optimize_insn_for_speed_p ();
3235
3236   /* Fail quickly for impossible bounds.  */
3237   if (mult_cost < 0)
3238     return false;
3239
3240   /* Ensure that mult_cost provides a reasonable upper bound.
3241      Any constant multiplication can be performed with less
3242      than 2 * bits additions.  */
3243   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3244   if (mult_cost > op_cost)
3245     mult_cost = op_cost;
3246
3247   *variant = basic_variant;
3248   limit.cost = mult_cost;
3249   limit.latency = mult_cost;
3250   synth_mult (alg, val, &limit, mode);
3251
3252   /* This works only if the inverted value actually fits in an
3253      `unsigned int' */
3254   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3255     {
3256       op_cost = neg_cost (speed, mode);
3257       if (MULT_COST_LESS (&alg->cost, mult_cost))
3258         {
3259           limit.cost = alg->cost.cost - op_cost;
3260           limit.latency = alg->cost.latency - op_cost;
3261         }
3262       else
3263         {
3264           limit.cost = mult_cost - op_cost;
3265           limit.latency = mult_cost - op_cost;
3266         }
3267
3268       synth_mult (&alg2, -val, &limit, mode);
3269       alg2.cost.cost += op_cost;
3270       alg2.cost.latency += op_cost;
3271       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3272         *alg = alg2, *variant = negate_variant;
3273     }
3274
3275   /* This proves very useful for division-by-constant.  */
3276   op_cost = add_cost (speed, mode);
3277   if (MULT_COST_LESS (&alg->cost, mult_cost))
3278     {
3279       limit.cost = alg->cost.cost - op_cost;
3280       limit.latency = alg->cost.latency - op_cost;
3281     }
3282   else
3283     {
3284       limit.cost = mult_cost - op_cost;
3285       limit.latency = mult_cost - op_cost;
3286     }
3287
3288   synth_mult (&alg2, val - 1, &limit, mode);
3289   alg2.cost.cost += op_cost;
3290   alg2.cost.latency += op_cost;
3291   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3292     *alg = alg2, *variant = add_variant;
3293
3294   return MULT_COST_LESS (&alg->cost, mult_cost);
3295 }
3296
3297 /* A subroutine of expand_mult, used for constant multiplications.
3298    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3299    convenient.  Use the shift/add sequence described by ALG and apply
3300    the final fixup specified by VARIANT.  */
3301
3302 static rtx
3303 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3304                    rtx target, const struct algorithm *alg,
3305                    enum mult_variant variant)
3306 {
3307   unsigned HOST_WIDE_INT val_so_far;
3308   rtx_insn *insn;
3309   rtx accum, tem;
3310   int opno;
3311   machine_mode nmode;
3312
3313   /* Avoid referencing memory over and over and invalid sharing
3314      on SUBREGs.  */
3315   op0 = force_reg (mode, op0);
3316
3317   /* ACCUM starts out either as OP0 or as a zero, depending on
3318      the first operation.  */
3319
3320   if (alg->op[0] == alg_zero)
3321     {
3322       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3323       val_so_far = 0;
3324     }
3325   else if (alg->op[0] == alg_m)
3326     {
3327       accum = copy_to_mode_reg (mode, op0);
3328       val_so_far = 1;
3329     }
3330   else
3331     gcc_unreachable ();
3332
3333   for (opno = 1; opno < alg->ops; opno++)
3334     {
3335       int log = alg->log[opno];
3336       rtx shift_subtarget = optimize ? 0 : accum;
3337       rtx add_target
3338         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3339            && !optimize)
3340           ? target : 0;
3341       rtx accum_target = optimize ? 0 : accum;
3342       rtx accum_inner;
3343
3344       switch (alg->op[opno])
3345         {
3346         case alg_shift:
3347           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3348           /* REG_EQUAL note will be attached to the following insn.  */
3349           emit_move_insn (accum, tem);
3350           val_so_far <<= log;
3351           break;
3352
3353         case alg_add_t_m2:
3354           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3355           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3356                                  add_target ? add_target : accum_target);
3357           val_so_far += HOST_WIDE_INT_1U << log;
3358           break;
3359
3360         case alg_sub_t_m2:
3361           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3362           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3363                                  add_target ? add_target : accum_target);
3364           val_so_far -= HOST_WIDE_INT_1U << log;
3365           break;
3366
3367         case alg_add_t2_m:
3368           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3369                                 log, shift_subtarget, 0);
3370           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3371                                  add_target ? add_target : accum_target);
3372           val_so_far = (val_so_far << log) + 1;
3373           break;
3374
3375         case alg_sub_t2_m:
3376           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3377                                 log, shift_subtarget, 0);
3378           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3379                                  add_target ? add_target : accum_target);
3380           val_so_far = (val_so_far << log) - 1;
3381           break;
3382
3383         case alg_add_factor:
3384           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3385           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3386                                  add_target ? add_target : accum_target);
3387           val_so_far += val_so_far << log;
3388           break;
3389
3390         case alg_sub_factor:
3391           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3392           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3393                                  (add_target
3394                                   ? add_target : (optimize ? 0 : tem)));
3395           val_so_far = (val_so_far << log) - val_so_far;
3396           break;
3397
3398         default:
3399           gcc_unreachable ();
3400         }
3401
3402       if (SCALAR_INT_MODE_P (mode))
3403         {
3404           /* Write a REG_EQUAL note on the last insn so that we can cse
3405              multiplication sequences.  Note that if ACCUM is a SUBREG,
3406              we've set the inner register and must properly indicate that.  */
3407           tem = op0, nmode = mode;
3408           accum_inner = accum;
3409           if (GET_CODE (accum) == SUBREG)
3410             {
3411               accum_inner = SUBREG_REG (accum);
3412               nmode = GET_MODE (accum_inner);
3413               tem = gen_lowpart (nmode, op0);
3414             }
3415
3416           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3417              In that case, only the low bits of accum would be guaranteed to
3418              be equal to the content of the REG_EQUAL note, the upper bits
3419              can be anything.  */
3420           if (!paradoxical_subreg_p (tem))
3421             {
3422               insn = get_last_insn ();
3423               wide_int wval_so_far
3424                 = wi::uhwi (val_so_far,
3425                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3426               rtx c = immed_wide_int_const (wval_so_far, nmode);
3427               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3428                                 accum_inner);
3429             }
3430         }
3431     }
3432
3433   if (variant == negate_variant)
3434     {
3435       val_so_far = -val_so_far;
3436       accum = expand_unop (mode, neg_optab, accum, target, 0);
3437     }
3438   else if (variant == add_variant)
3439     {
3440       val_so_far = val_so_far + 1;
3441       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3442     }
3443
3444   /* Compare only the bits of val and val_so_far that are significant
3445      in the result mode, to avoid sign-/zero-extension confusion.  */
3446   nmode = GET_MODE_INNER (mode);
3447   val &= GET_MODE_MASK (nmode);
3448   val_so_far &= GET_MODE_MASK (nmode);
3449   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3450
3451   return accum;
3452 }
3453
3454 /* Perform a multiplication and return an rtx for the result.
3455    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3456    TARGET is a suggestion for where to store the result (an rtx).
3457
3458    We check specially for a constant integer as OP1.
3459    If you want this check for OP0 as well, then before calling
3460    you should swap the two operands if OP0 would be constant.  */
3461
3462 rtx
3463 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3464              int unsignedp, bool no_libcall)
3465 {
3466   enum mult_variant variant;
3467   struct algorithm algorithm;
3468   rtx scalar_op1;
3469   int max_cost;
3470   bool speed = optimize_insn_for_speed_p ();
3471   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3472
3473   if (CONSTANT_P (op0))
3474     std::swap (op0, op1);
3475
3476   /* For vectors, there are several simplifications that can be made if
3477      all elements of the vector constant are identical.  */
3478   scalar_op1 = unwrap_const_vec_duplicate (op1);
3479
3480   if (INTEGRAL_MODE_P (mode))
3481     {
3482       rtx fake_reg;
3483       HOST_WIDE_INT coeff;
3484       bool is_neg;
3485       int mode_bitsize;
3486
3487       if (op1 == CONST0_RTX (mode))
3488         return op1;
3489       if (op1 == CONST1_RTX (mode))
3490         return op0;
3491       if (op1 == CONSTM1_RTX (mode))
3492         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3493                             op0, target, 0);
3494
3495       if (do_trapv)
3496         goto skip_synth;
3497
3498       /* If mode is integer vector mode, check if the backend supports
3499          vector lshift (by scalar or vector) at all.  If not, we can't use
3500          synthetized multiply.  */
3501       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3502           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3503           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3504         goto skip_synth;
3505
3506       /* These are the operations that are potentially turned into
3507          a sequence of shifts and additions.  */
3508       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3509
3510       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3511          less than or equal in size to `unsigned int' this doesn't matter.
3512          If the mode is larger than `unsigned int', then synth_mult works
3513          only if the constant value exactly fits in an `unsigned int' without
3514          any truncation.  This means that multiplying by negative values does
3515          not work; results are off by 2^32 on a 32 bit machine.  */
3516       if (CONST_INT_P (scalar_op1))
3517         {
3518           coeff = INTVAL (scalar_op1);
3519           is_neg = coeff < 0;
3520         }
3521 #if TARGET_SUPPORTS_WIDE_INT
3522       else if (CONST_WIDE_INT_P (scalar_op1))
3523 #else
3524       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3525 #endif
3526         {
3527           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3528           /* Perfect power of 2 (other than 1, which is handled above).  */
3529           if (shift > 0)
3530             return expand_shift (LSHIFT_EXPR, mode, op0,
3531                                  shift, target, unsignedp);
3532           else
3533             goto skip_synth;
3534         }
3535       else
3536         goto skip_synth;
3537
3538       /* We used to test optimize here, on the grounds that it's better to
3539          produce a smaller program when -O is not used.  But this causes
3540          such a terrible slowdown sometimes that it seems better to always
3541          use synth_mult.  */
3542
3543       /* Special case powers of two.  */
3544       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3545           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3546         return expand_shift (LSHIFT_EXPR, mode, op0,
3547                              floor_log2 (coeff), target, unsignedp);
3548
3549       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3550
3551       /* Attempt to handle multiplication of DImode values by negative
3552          coefficients, by performing the multiplication by a positive
3553          multiplier and then inverting the result.  */
3554       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3555         {
3556           /* Its safe to use -coeff even for INT_MIN, as the
3557              result is interpreted as an unsigned coefficient.
3558              Exclude cost of op0 from max_cost to match the cost
3559              calculation of the synth_mult.  */
3560           coeff = -(unsigned HOST_WIDE_INT) coeff;
3561           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3562                                     mode, speed)
3563                       - neg_cost (speed, mode));
3564           if (max_cost <= 0)
3565             goto skip_synth;
3566
3567           /* Special case powers of two.  */
3568           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3569             {
3570               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3571                                        floor_log2 (coeff), target, unsignedp);
3572               return expand_unop (mode, neg_optab, temp, target, 0);
3573             }
3574
3575           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3576                                    max_cost))
3577             {
3578               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3579                                             &algorithm, variant);
3580               return expand_unop (mode, neg_optab, temp, target, 0);
3581             }
3582           goto skip_synth;
3583         }
3584
3585       /* Exclude cost of op0 from max_cost to match the cost
3586          calculation of the synth_mult.  */
3587       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3588       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3589         return expand_mult_const (mode, op0, coeff, target,
3590                                   &algorithm, variant);
3591     }
3592  skip_synth:
3593
3594   /* Expand x*2.0 as x+x.  */
3595   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3596       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3597     {
3598       op0 = force_reg (GET_MODE (op0), op0);
3599       return expand_binop (mode, add_optab, op0, op0,
3600                            target, unsignedp,
3601                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3602     }
3603
3604   /* This used to use umul_optab if unsigned, but for non-widening multiply
3605      there is no difference between signed and unsigned.  */
3606   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3607                       op0, op1, target, unsignedp,
3608                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3609   gcc_assert (op0 || no_libcall);
3610   return op0;
3611 }
3612
3613 /* Return a cost estimate for multiplying a register by the given
3614    COEFFicient in the given MODE and SPEED.  */
3615
3616 int
3617 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3618 {
3619   int max_cost;
3620   struct algorithm algorithm;
3621   enum mult_variant variant;
3622
3623   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3624   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3625                            mode, speed);
3626   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3627     return algorithm.cost.cost;
3628   else
3629     return max_cost;
3630 }
3631
3632 /* Perform a widening multiplication and return an rtx for the result.
3633    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3634    TARGET is a suggestion for where to store the result (an rtx).
3635    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3636    or smul_widen_optab.
3637
3638    We check specially for a constant integer as OP1, comparing the
3639    cost of a widening multiply against the cost of a sequence of shifts
3640    and adds.  */
3641
3642 rtx
3643 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3644                       int unsignedp, optab this_optab)
3645 {
3646   bool speed = optimize_insn_for_speed_p ();
3647   rtx cop1;
3648
3649   if (CONST_INT_P (op1)
3650       && GET_MODE (op0) != VOIDmode
3651       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3652                                 this_optab == umul_widen_optab))
3653       && CONST_INT_P (cop1)
3654       && (INTVAL (cop1) >= 0
3655           || HWI_COMPUTABLE_MODE_P (mode)))
3656     {
3657       HOST_WIDE_INT coeff = INTVAL (cop1);
3658       int max_cost;
3659       enum mult_variant variant;
3660       struct algorithm algorithm;
3661
3662       if (coeff == 0)
3663         return CONST0_RTX (mode);
3664
3665       /* Special case powers of two.  */
3666       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3667         {
3668           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3669           return expand_shift (LSHIFT_EXPR, mode, op0,
3670                                floor_log2 (coeff), target, unsignedp);
3671         }
3672
3673       /* Exclude cost of op0 from max_cost to match the cost
3674          calculation of the synth_mult.  */
3675       max_cost = mul_widen_cost (speed, mode);
3676       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3677                                max_cost))
3678         {
3679           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3680           return expand_mult_const (mode, op0, coeff, target,
3681                                     &algorithm, variant);
3682         }
3683     }
3684   return expand_binop (mode, this_optab, op0, op1, target,
3685                        unsignedp, OPTAB_LIB_WIDEN);
3686 }
3687 \f
3688 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3689    replace division by D, and put the least significant N bits of the result
3690    in *MULTIPLIER_PTR and return the most significant bit.
3691
3692    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3693    needed precision is in PRECISION (should be <= N).
3694
3695    PRECISION should be as small as possible so this function can choose
3696    multiplier more freely.
3697
3698    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3699    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3700
3701    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3702    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3703
3704 unsigned HOST_WIDE_INT
3705 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3706                    unsigned HOST_WIDE_INT *multiplier_ptr,
3707                    int *post_shift_ptr, int *lgup_ptr)
3708 {
3709   int lgup, post_shift;
3710   int pow, pow2;
3711
3712   /* lgup = ceil(log2(divisor)); */
3713   lgup = ceil_log2 (d);
3714
3715   gcc_assert (lgup <= n);
3716
3717   pow = n + lgup;
3718   pow2 = n + lgup - precision;
3719
3720   /* mlow = 2^(N + lgup)/d */
3721   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3722   wide_int mlow = wi::udiv_trunc (val, d);
3723
3724   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3725   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3726   wide_int mhigh = wi::udiv_trunc (val, d);
3727
3728   /* If precision == N, then mlow, mhigh exceed 2^N
3729      (but they do not exceed 2^(N+1)).  */
3730
3731   /* Reduce to lowest terms.  */
3732   for (post_shift = lgup; post_shift > 0; post_shift--)
3733     {
3734       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3735                                                        HOST_BITS_PER_WIDE_INT);
3736       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3737                                                        HOST_BITS_PER_WIDE_INT);
3738       if (ml_lo >= mh_lo)
3739         break;
3740
3741       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3742       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3743     }
3744
3745   *post_shift_ptr = post_shift;
3746   *lgup_ptr = lgup;
3747   if (n < HOST_BITS_PER_WIDE_INT)
3748     {
3749       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3750       *multiplier_ptr = mhigh.to_uhwi () & mask;
3751       return mhigh.to_uhwi () > mask;
3752     }
3753   else
3754     {
3755       *multiplier_ptr = mhigh.to_uhwi ();
3756       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3757     }
3758 }
3759
3760 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3761    congruent to 1 (mod 2**N).  */
3762
3763 static unsigned HOST_WIDE_INT
3764 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3765 {
3766   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3767
3768   /* The algorithm notes that the choice y = x satisfies
3769      x*y == 1 mod 2^3, since x is assumed odd.
3770      Each iteration doubles the number of bits of significance in y.  */
3771
3772   unsigned HOST_WIDE_INT mask;
3773   unsigned HOST_WIDE_INT y = x;
3774   int nbit = 3;
3775
3776   mask = (n == HOST_BITS_PER_WIDE_INT
3777           ? HOST_WIDE_INT_M1U
3778           : (HOST_WIDE_INT_1U << n) - 1);
3779
3780   while (nbit < n)
3781     {
3782       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3783       nbit *= 2;
3784     }
3785   return y;
3786 }
3787
3788 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3789    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3790    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3791    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3792    become signed.
3793
3794    The result is put in TARGET if that is convenient.
3795
3796    MODE is the mode of operation.  */
3797
3798 rtx
3799 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3800                              rtx op1, rtx target, int unsignedp)
3801 {
3802   rtx tem;
3803   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3804
3805   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3806                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3807   tem = expand_and (mode, tem, op1, NULL_RTX);
3808   adj_operand
3809     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3810                      adj_operand);
3811
3812   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3813                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3814   tem = expand_and (mode, tem, op0, NULL_RTX);
3815   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3816                           target);
3817
3818   return target;
3819 }
3820
3821 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3822
3823 static rtx
3824 extract_high_half (scalar_int_mode mode, rtx op)
3825 {
3826   if (mode == word_mode)
3827     return gen_highpart (mode, op);
3828
3829   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3830
3831   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3832                      GET_MODE_BITSIZE (mode), 0, 1);
3833   return convert_modes (mode, wider_mode, op, 0);
3834 }
3835
3836 /* Like expmed_mult_highpart, but only consider using a multiplication
3837    optab.  OP1 is an rtx for the constant operand.  */
3838
3839 static rtx
3840 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3841                             rtx target, int unsignedp, int max_cost)
3842 {
3843   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3844   optab moptab;
3845   rtx tem;
3846   int size;
3847   bool speed = optimize_insn_for_speed_p ();
3848
3849   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3850
3851   size = GET_MODE_BITSIZE (mode);
3852
3853   /* Firstly, try using a multiplication insn that only generates the needed
3854      high part of the product, and in the sign flavor of unsignedp.  */
3855   if (mul_highpart_cost (speed, mode) < max_cost)
3856     {
3857       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3858       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3859                           unsignedp, OPTAB_DIRECT);
3860       if (tem)
3861         return tem;
3862     }
3863
3864   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3865      Need to adjust the result after the multiplication.  */
3866   if (size - 1 < BITS_PER_WORD
3867       && (mul_highpart_cost (speed, mode)
3868           + 2 * shift_cost (speed, mode, size-1)
3869           + 4 * add_cost (speed, mode) < max_cost))
3870     {
3871       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3872       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3873                           unsignedp, OPTAB_DIRECT);
3874       if (tem)
3875         /* We used the wrong signedness.  Adjust the result.  */
3876         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3877                                             tem, unsignedp);
3878     }
3879
3880   /* Try widening multiplication.  */
3881   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3882   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3883       && mul_widen_cost (speed, wider_mode) < max_cost)
3884     {
3885       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3886                           unsignedp, OPTAB_WIDEN);
3887       if (tem)
3888         return extract_high_half (mode, tem);
3889     }
3890
3891   /* Try widening the mode and perform a non-widening multiplication.  */
3892   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3893       && size - 1 < BITS_PER_WORD
3894       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3895           < max_cost))
3896     {
3897       rtx_insn *insns;
3898       rtx wop0, wop1;
3899
3900       /* We need to widen the operands, for example to ensure the
3901          constant multiplier is correctly sign or zero extended.
3902          Use a sequence to clean-up any instructions emitted by
3903          the conversions if things don't work out.  */
3904       start_sequence ();
3905       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3906       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3907       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3908                           unsignedp, OPTAB_WIDEN);
3909       insns = get_insns ();
3910       end_sequence ();
3911
3912       if (tem)
3913         {
3914           emit_insn (insns);
3915           return extract_high_half (mode, tem);
3916         }
3917     }
3918
3919   /* Try widening multiplication of opposite signedness, and adjust.  */
3920   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3921   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3922       && size - 1 < BITS_PER_WORD
3923       && (mul_widen_cost (speed, wider_mode)
3924           + 2 * shift_cost (speed, mode, size-1)
3925           + 4 * add_cost (speed, mode) < max_cost))
3926     {
3927       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3928                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3929       if (tem != 0)
3930         {
3931           tem = extract_high_half (mode, tem);
3932           /* We used the wrong signedness.  Adjust the result.  */
3933           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3934                                               target, unsignedp);
3935         }
3936     }
3937
3938   return 0;
3939 }
3940
3941 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3942    putting the high half of the result in TARGET if that is convenient,
3943    and return where the result is.  If the operation cannot be performed,
3944    0 is returned.
3945
3946    MODE is the mode of operation and result.
3947
3948    UNSIGNEDP nonzero means unsigned multiply.
3949
3950    MAX_COST is the total allowed cost for the expanded RTL.  */
3951
3952 static rtx
3953 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3954                       rtx target, int unsignedp, int max_cost)
3955 {
3956   unsigned HOST_WIDE_INT cnst1;
3957   int extra_cost;
3958   bool sign_adjust = false;
3959   enum mult_variant variant;
3960   struct algorithm alg;
3961   rtx tem;
3962   bool speed = optimize_insn_for_speed_p ();
3963
3964   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3965   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3966
3967   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3968
3969   /* We can't optimize modes wider than BITS_PER_WORD.
3970      ??? We might be able to perform double-word arithmetic if
3971      mode == word_mode, however all the cost calculations in
3972      synth_mult etc. assume single-word operations.  */
3973   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3974   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3975     return expmed_mult_highpart_optab (mode, op0, op1, target,
3976                                        unsignedp, max_cost);
3977
3978   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3979
3980   /* Check whether we try to multiply by a negative constant.  */
3981   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3982     {
3983       sign_adjust = true;
3984       extra_cost += add_cost (speed, mode);
3985     }
3986
3987   /* See whether shift/add multiplication is cheap enough.  */
3988   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3989                            max_cost - extra_cost))
3990     {
3991       /* See whether the specialized multiplication optabs are
3992          cheaper than the shift/add version.  */
3993       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3994                                         alg.cost.cost + extra_cost);
3995       if (tem)
3996         return tem;
3997
3998       tem = convert_to_mode (wider_mode, op0, unsignedp);
3999       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
4000       tem = extract_high_half (mode, tem);
4001
4002       /* Adjust result for signedness.  */
4003       if (sign_adjust)
4004         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
4005
4006       return tem;
4007     }
4008   return expmed_mult_highpart_optab (mode, op0, op1, target,
4009                                      unsignedp, max_cost);
4010 }
4011
4012
4013 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
4014
4015 static rtx
4016 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4017 {
4018   rtx result, temp, shift;
4019   rtx_code_label *label;
4020   int logd;
4021   int prec = GET_MODE_PRECISION (mode);
4022
4023   logd = floor_log2 (d);
4024   result = gen_reg_rtx (mode);
4025
4026   /* Avoid conditional branches when they're expensive.  */
4027   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4028       && optimize_insn_for_speed_p ())
4029     {
4030       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4031                                       mode, 0, -1);
4032       if (signmask)
4033         {
4034           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4035           signmask = force_reg (mode, signmask);
4036           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4037
4038           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4039              which instruction sequence to use.  If logical right shifts
4040              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4041              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4042
4043           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4044           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4045               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4046                   > COSTS_N_INSNS (2)))
4047             {
4048               temp = expand_binop (mode, xor_optab, op0, signmask,
4049                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4050               temp = expand_binop (mode, sub_optab, temp, signmask,
4051                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4052               temp = expand_binop (mode, and_optab, temp,
4053                                    gen_int_mode (masklow, mode),
4054                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4055               temp = expand_binop (mode, xor_optab, temp, signmask,
4056                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4057               temp = expand_binop (mode, sub_optab, temp, signmask,
4058                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4059             }
4060           else
4061             {
4062               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4063                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4064               signmask = force_reg (mode, signmask);
4065
4066               temp = expand_binop (mode, add_optab, op0, signmask,
4067                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4068               temp = expand_binop (mode, and_optab, temp,
4069                                    gen_int_mode (masklow, mode),
4070                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4071               temp = expand_binop (mode, sub_optab, temp, signmask,
4072                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4073             }
4074           return temp;
4075         }
4076     }
4077
4078   /* Mask contains the mode's signbit and the significant bits of the
4079      modulus.  By including the signbit in the operation, many targets
4080      can avoid an explicit compare operation in the following comparison
4081      against zero.  */
4082   wide_int mask = wi::mask (logd, false, prec);
4083   mask = wi::set_bit (mask, prec - 1);
4084
4085   temp = expand_binop (mode, and_optab, op0,
4086                        immed_wide_int_const (mask, mode),
4087                        result, 1, OPTAB_LIB_WIDEN);
4088   if (temp != result)
4089     emit_move_insn (result, temp);
4090
4091   label = gen_label_rtx ();
4092   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4093
4094   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4095                        0, OPTAB_LIB_WIDEN);
4096
4097   mask = wi::mask (logd, true, prec);
4098   temp = expand_binop (mode, ior_optab, temp,
4099                        immed_wide_int_const (mask, mode),
4100                        result, 1, OPTAB_LIB_WIDEN);
4101   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4102                        0, OPTAB_LIB_WIDEN);
4103   if (temp != result)
4104     emit_move_insn (result, temp);
4105   emit_label (label);
4106   return result;
4107 }
4108
4109 /* Expand signed division of OP0 by a power of two D in mode MODE.
4110    This routine is only called for positive values of D.  */
4111
4112 static rtx
4113 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4114 {
4115   rtx temp;
4116   rtx_code_label *label;
4117   int logd;
4118
4119   logd = floor_log2 (d);
4120
4121   if (d == 2
4122       && BRANCH_COST (optimize_insn_for_speed_p (),
4123                       false) >= 1)
4124     {
4125       temp = gen_reg_rtx (mode);
4126       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4127       if (temp != NULL_RTX)
4128         {
4129           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4130                                0, OPTAB_LIB_WIDEN);
4131           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4132         }
4133     }
4134
4135   if (HAVE_conditional_move
4136       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4137     {
4138       rtx temp2;
4139
4140       start_sequence ();
4141       temp2 = copy_to_mode_reg (mode, op0);
4142       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4143                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4144       temp = force_reg (mode, temp);
4145
4146       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4147       temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4148                                      temp, temp2, mode, 0);
4149       if (temp2)
4150         {
4151           rtx_insn *seq = get_insns ();
4152           end_sequence ();
4153           emit_insn (seq);
4154           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4155         }
4156       end_sequence ();
4157     }
4158
4159   if (BRANCH_COST (optimize_insn_for_speed_p (),
4160                    false) >= 2)
4161     {
4162       int ushift = GET_MODE_BITSIZE (mode) - logd;
4163
4164       temp = gen_reg_rtx (mode);
4165       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4166       if (temp != NULL_RTX)
4167         {
4168           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4169               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4170               > COSTS_N_INSNS (1))
4171             temp = expand_binop (mode, and_optab, temp,
4172                                  gen_int_mode (d - 1, mode),
4173                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4174           else
4175             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4176                                  ushift, NULL_RTX, 1);
4177           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4178                                0, OPTAB_LIB_WIDEN);
4179           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4180         }
4181     }
4182
4183   label = gen_label_rtx ();
4184   temp = copy_to_mode_reg (mode, op0);
4185   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4186   expand_inc (temp, gen_int_mode (d - 1, mode));
4187   emit_label (label);
4188   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4189 }
4190 \f
4191 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4192    if that is convenient, and returning where the result is.
4193    You may request either the quotient or the remainder as the result;
4194    specify REM_FLAG nonzero to get the remainder.
4195
4196    CODE is the expression code for which kind of division this is;
4197    it controls how rounding is done.  MODE is the machine mode to use.
4198    UNSIGNEDP nonzero means do unsigned division.  */
4199
4200 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4201    and then correct it by or'ing in missing high bits
4202    if result of ANDI is nonzero.
4203    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4204    This could optimize to a bfexts instruction.
4205    But C doesn't use these operations, so their optimizations are
4206    left for later.  */
4207 /* ??? For modulo, we don't actually need the highpart of the first product,
4208    the low part will do nicely.  And for small divisors, the second multiply
4209    can also be a low-part only multiply or even be completely left out.
4210    E.g. to calculate the remainder of a division by 3 with a 32 bit
4211    multiply, multiply with 0x55555556 and extract the upper two bits;
4212    the result is exact for inputs up to 0x1fffffff.
4213    The input range can be reduced by using cross-sum rules.
4214    For odd divisors >= 3, the following table gives right shift counts
4215    so that if a number is shifted by an integer multiple of the given
4216    amount, the remainder stays the same:
4217    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4218    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4219    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4220    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4221    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4222
4223    Cross-sum rules for even numbers can be derived by leaving as many bits
4224    to the right alone as the divisor has zeros to the right.
4225    E.g. if x is an unsigned 32 bit number:
4226    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4227    */
4228
4229 rtx
4230 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4231                rtx op0, rtx op1, rtx target, int unsignedp,
4232                enum optab_methods methods)
4233 {
4234   machine_mode compute_mode;
4235   rtx tquotient;
4236   rtx quotient = 0, remainder = 0;
4237   rtx_insn *last;
4238   rtx_insn *insn;
4239   optab optab1, optab2;
4240   int op1_is_constant, op1_is_pow2 = 0;
4241   int max_cost, extra_cost;
4242   static HOST_WIDE_INT last_div_const = 0;
4243   bool speed = optimize_insn_for_speed_p ();
4244
4245   op1_is_constant = CONST_INT_P (op1);
4246   if (op1_is_constant)
4247     {
4248       wide_int ext_op1 = rtx_mode_t (op1, mode);
4249       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4250                      || (! unsignedp
4251                          && wi::popcount (wi::neg (ext_op1)) == 1));
4252     }
4253
4254   /*
4255      This is the structure of expand_divmod:
4256
4257      First comes code to fix up the operands so we can perform the operations
4258      correctly and efficiently.
4259
4260      Second comes a switch statement with code specific for each rounding mode.
4261      For some special operands this code emits all RTL for the desired
4262      operation, for other cases, it generates only a quotient and stores it in
4263      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4264      to indicate that it has not done anything.
4265
4266      Last comes code that finishes the operation.  If QUOTIENT is set and
4267      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4268      QUOTIENT is not set, it is computed using trunc rounding.
4269
4270      We try to generate special code for division and remainder when OP1 is a
4271      constant.  If |OP1| = 2**n we can use shifts and some other fast
4272      operations.  For other values of OP1, we compute a carefully selected
4273      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4274      by m.
4275
4276      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4277      half of the product.  Different strategies for generating the product are
4278      implemented in expmed_mult_highpart.
4279
4280      If what we actually want is the remainder, we generate that by another
4281      by-constant multiplication and a subtraction.  */
4282
4283   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4284      code below will malfunction if we are, so check here and handle
4285      the special case if so.  */
4286   if (op1 == const1_rtx)
4287     return rem_flag ? const0_rtx : op0;
4288
4289     /* When dividing by -1, we could get an overflow.
4290      negv_optab can handle overflows.  */
4291   if (! unsignedp && op1 == constm1_rtx)
4292     {
4293       if (rem_flag)
4294         return const0_rtx;
4295       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4296                           ? negv_optab : neg_optab, op0, target, 0);
4297     }
4298
4299   if (target
4300       /* Don't use the function value register as a target
4301          since we have to read it as well as write it,
4302          and function-inlining gets confused by this.  */
4303       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4304           /* Don't clobber an operand while doing a multi-step calculation.  */
4305           || ((rem_flag || op1_is_constant)
4306               && (reg_mentioned_p (target, op0)
4307                   || (MEM_P (op0) && MEM_P (target))))
4308           || reg_mentioned_p (target, op1)
4309           || (MEM_P (op1) && MEM_P (target))))
4310     target = 0;
4311
4312   /* Get the mode in which to perform this computation.  Normally it will
4313      be MODE, but sometimes we can't do the desired operation in MODE.
4314      If so, pick a wider mode in which we can do the operation.  Convert
4315      to that mode at the start to avoid repeated conversions.
4316
4317      First see what operations we need.  These depend on the expression
4318      we are evaluating.  (We assume that divxx3 insns exist under the
4319      same conditions that modxx3 insns and that these insns don't normally
4320      fail.  If these assumptions are not correct, we may generate less
4321      efficient code in some cases.)
4322
4323      Then see if we find a mode in which we can open-code that operation
4324      (either a division, modulus, or shift).  Finally, check for the smallest
4325      mode for which we can do the operation with a library call.  */
4326
4327   /* We might want to refine this now that we have division-by-constant
4328      optimization.  Since expmed_mult_highpart tries so many variants, it is
4329      not straightforward to generalize this.  Maybe we should make an array
4330      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4331
4332   optab1 = (op1_is_pow2
4333             ? (unsignedp ? lshr_optab : ashr_optab)
4334             : (unsignedp ? udiv_optab : sdiv_optab));
4335   optab2 = (op1_is_pow2 ? optab1
4336             : (unsignedp ? udivmod_optab : sdivmod_optab));
4337
4338   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4339     {
4340       FOR_EACH_MODE_FROM (compute_mode, mode)
4341       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4342           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4343         break;
4344
4345       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4346         FOR_EACH_MODE_FROM (compute_mode, mode)
4347           if (optab_libfunc (optab1, compute_mode)
4348               || optab_libfunc (optab2, compute_mode))
4349             break;
4350     }
4351   else
4352     compute_mode = mode;
4353
4354   /* If we still couldn't find a mode, use MODE, but expand_binop will
4355      probably die.  */
4356   if (compute_mode == VOIDmode)
4357     compute_mode = mode;
4358
4359   if (target && GET_MODE (target) == compute_mode)
4360     tquotient = target;
4361   else
4362     tquotient = gen_reg_rtx (compute_mode);
4363
4364 #if 0
4365   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4366      (mode), and thereby get better code when OP1 is a constant.  Do that
4367      later.  It will require going over all usages of SIZE below.  */
4368   size = GET_MODE_BITSIZE (mode);
4369 #endif
4370
4371   /* Only deduct something for a REM if the last divide done was
4372      for a different constant.   Then set the constant of the last
4373      divide.  */
4374   max_cost = (unsignedp
4375               ? udiv_cost (speed, compute_mode)
4376               : sdiv_cost (speed, compute_mode));
4377   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4378                      && INTVAL (op1) == last_div_const))
4379     max_cost -= (mul_cost (speed, compute_mode)
4380                  + add_cost (speed, compute_mode));
4381
4382   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4383
4384   /* Now convert to the best mode to use.  */
4385   if (compute_mode != mode)
4386     {
4387       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4388       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4389
4390       /* convert_modes may have placed op1 into a register, so we
4391          must recompute the following.  */
4392       op1_is_constant = CONST_INT_P (op1);
4393       if (op1_is_constant)
4394         {
4395           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4396           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4397                          || (! unsignedp
4398                              && wi::popcount (wi::neg (ext_op1)) == 1));
4399         }
4400       else
4401         op1_is_pow2 = 0;
4402     }
4403
4404   /* If one of the operands is a volatile MEM, copy it into a register.  */
4405
4406   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4407     op0 = force_reg (compute_mode, op0);
4408   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4409     op1 = force_reg (compute_mode, op1);
4410
4411   /* If we need the remainder or if OP1 is constant, we need to
4412      put OP0 in a register in case it has any queued subexpressions.  */
4413   if (rem_flag || op1_is_constant)
4414     op0 = force_reg (compute_mode, op0);
4415
4416   last = get_last_insn ();
4417
4418   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4419   if (unsignedp)
4420     {
4421       if (code == FLOOR_DIV_EXPR)
4422         code = TRUNC_DIV_EXPR;
4423       if (code == FLOOR_MOD_EXPR)
4424         code = TRUNC_MOD_EXPR;
4425       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4426         code = TRUNC_DIV_EXPR;
4427     }
4428
4429   if (op1 != const0_rtx)
4430     switch (code)
4431       {
4432       case TRUNC_MOD_EXPR:
4433       case TRUNC_DIV_EXPR:
4434         if (op1_is_constant)
4435           {
4436             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4437             int size = GET_MODE_BITSIZE (int_mode);
4438             if (unsignedp)
4439               {
4440                 unsigned HOST_WIDE_INT mh, ml;
4441                 int pre_shift, post_shift;
4442                 int dummy;
4443                 wide_int wd = rtx_mode_t (op1, int_mode);
4444                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4445
4446                 if (wi::popcount (wd) == 1)
4447                   {
4448                     pre_shift = floor_log2 (d);
4449                     if (rem_flag)
4450                       {
4451                         unsigned HOST_WIDE_INT mask
4452                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4453                         remainder
4454                           = expand_binop (int_mode, and_optab, op0,
4455                                           gen_int_mode (mask, int_mode),
4456                                           remainder, 1, methods);
4457                         if (remainder)
4458                           return gen_lowpart (mode, remainder);
4459                       }
4460                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4461                                              pre_shift, tquotient, 1);
4462                   }
4463                 else if (size <= HOST_BITS_PER_WIDE_INT)
4464                   {
4465                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4466                       {
4467                         /* Most significant bit of divisor is set; emit an scc
4468                            insn.  */
4469                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4470                                                           int_mode, 1, 1);
4471                       }
4472                     else
4473                       {
4474                         /* Find a suitable multiplier and right shift count
4475                            instead of multiplying with D.  */
4476
4477                         mh = choose_multiplier (d, size, size,
4478                                                 &ml, &post_shift, &dummy);
4479
4480                         /* If the suggested multiplier is more than SIZE bits,
4481                            we can do better for even divisors, using an
4482                            initial right shift.  */
4483                         if (mh != 0 && (d & 1) == 0)
4484                           {
4485                             pre_shift = ctz_or_zero (d);
4486                             mh = choose_multiplier (d >> pre_shift, size,
4487                                                     size - pre_shift,
4488                                                     &ml, &post_shift, &dummy);
4489                             gcc_assert (!mh);
4490                           }
4491                         else
4492                           pre_shift = 0;
4493
4494                         if (mh != 0)
4495                           {
4496                             rtx t1, t2, t3, t4;
4497
4498                             if (post_shift - 1 >= BITS_PER_WORD)
4499                               goto fail1;
4500
4501                             extra_cost
4502                               = (shift_cost (speed, int_mode, post_shift - 1)
4503                                  + shift_cost (speed, int_mode, 1)
4504                                  + 2 * add_cost (speed, int_mode));
4505                             t1 = expmed_mult_highpart
4506                               (int_mode, op0, gen_int_mode (ml, int_mode),
4507                                NULL_RTX, 1, max_cost - extra_cost);
4508                             if (t1 == 0)
4509                               goto fail1;
4510                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4511                                                                op0, t1),
4512                                                 NULL_RTX);
4513                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4514                                                t2, 1, NULL_RTX, 1);
4515                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4516                                                               t1, t3),
4517                                                 NULL_RTX);
4518                             quotient = expand_shift
4519                               (RSHIFT_EXPR, int_mode, t4,
4520                                post_shift - 1, tquotient, 1);
4521                           }
4522                         else
4523                           {
4524                             rtx t1, t2;
4525
4526                             if (pre_shift >= BITS_PER_WORD
4527                                 || post_shift >= BITS_PER_WORD)
4528                               goto fail1;
4529
4530                             t1 = expand_shift
4531                               (RSHIFT_EXPR, int_mode, op0,
4532                                pre_shift, NULL_RTX, 1);
4533                             extra_cost
4534                               = (shift_cost (speed, int_mode, pre_shift)
4535                                  + shift_cost (speed, int_mode, post_shift));
4536                             t2 = expmed_mult_highpart
4537                               (int_mode, t1,
4538                                gen_int_mode (ml, int_mode),
4539                                NULL_RTX, 1, max_cost - extra_cost);
4540                             if (t2 == 0)
4541                               goto fail1;
4542                             quotient = expand_shift
4543                               (RSHIFT_EXPR, int_mode, t2,
4544                                post_shift, tquotient, 1);
4545                           }
4546                       }
4547                   }
4548                 else            /* Too wide mode to use tricky code */
4549                   break;
4550
4551                 insn = get_last_insn ();
4552                 if (insn != last)
4553                   set_dst_reg_note (insn, REG_EQUAL,
4554                                     gen_rtx_UDIV (int_mode, op0, op1),
4555                                     quotient);
4556               }
4557             else                /* TRUNC_DIV, signed */
4558               {
4559                 unsigned HOST_WIDE_INT ml;
4560                 int lgup, post_shift;
4561                 rtx mlr;
4562                 HOST_WIDE_INT d = INTVAL (op1);
4563                 unsigned HOST_WIDE_INT abs_d;
4564
4565                 /* Not prepared to handle division/remainder by
4566                    0xffffffffffffffff8000000000000000 etc.  */
4567                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4568                   break;
4569
4570                 /* Since d might be INT_MIN, we have to cast to
4571                    unsigned HOST_WIDE_INT before negating to avoid
4572                    undefined signed overflow.  */
4573                 abs_d = (d >= 0
4574                          ? (unsigned HOST_WIDE_INT) d
4575                          : - (unsigned HOST_WIDE_INT) d);
4576
4577                 /* n rem d = n rem -d */
4578                 if (rem_flag && d < 0)
4579                   {
4580                     d = abs_d;
4581                     op1 = gen_int_mode (abs_d, int_mode);
4582                   }
4583
4584                 if (d == 1)
4585                   quotient = op0;
4586                 else if (d == -1)
4587                   quotient = expand_unop (int_mode, neg_optab, op0,
4588                                           tquotient, 0);
4589                 else if (size <= HOST_BITS_PER_WIDE_INT
4590                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4591                   {
4592                     /* This case is not handled correctly below.  */
4593                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4594                                                 int_mode, 1, 1);
4595                     if (quotient == 0)
4596                       goto fail1;
4597                   }
4598                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4599                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4600                          && (rem_flag
4601                              ? smod_pow2_cheap (speed, int_mode)
4602                              : sdiv_pow2_cheap (speed, int_mode))
4603                          /* We assume that cheap metric is true if the
4604                             optab has an expander for this mode.  */
4605                          && ((optab_handler ((rem_flag ? smod_optab
4606                                               : sdiv_optab),
4607                                              int_mode)
4608                               != CODE_FOR_nothing)
4609                              || (optab_handler (sdivmod_optab, int_mode)
4610                                  != CODE_FOR_nothing)))
4611                   ;
4612                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4613                   {
4614                     if (rem_flag)
4615                       {
4616                         remainder = expand_smod_pow2 (int_mode, op0, d);
4617                         if (remainder)
4618                           return gen_lowpart (mode, remainder);
4619                       }
4620
4621                     if (sdiv_pow2_cheap (speed, int_mode)
4622                         && ((optab_handler (sdiv_optab, int_mode)
4623                              != CODE_FOR_nothing)
4624                             || (optab_handler (sdivmod_optab, int_mode)
4625                                 != CODE_FOR_nothing)))
4626                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4627                                                 int_mode, op0,
4628                                                 gen_int_mode (abs_d,
4629                                                               int_mode),
4630                                                 NULL_RTX, 0);
4631                     else
4632                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4633
4634                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4635                        negate the quotient.  */
4636                     if (d < 0)
4637                       {
4638                         insn = get_last_insn ();
4639                         if (insn != last
4640                             && abs_d < (HOST_WIDE_INT_1U
4641                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4642                           set_dst_reg_note (insn, REG_EQUAL,
4643                                             gen_rtx_DIV (int_mode, op0,
4644                                                          gen_int_mode
4645                                                            (abs_d,
4646                                                             int_mode)),
4647                                             quotient);
4648
4649                         quotient = expand_unop (int_mode, neg_optab,
4650                                                 quotient, quotient, 0);
4651                       }
4652                   }
4653                 else if (size <= HOST_BITS_PER_WIDE_INT)
4654                   {
4655                     choose_multiplier (abs_d, size, size - 1,
4656                                        &ml, &post_shift, &lgup);
4657                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4658                       {
4659                         rtx t1, t2, t3;
4660
4661                         if (post_shift >= BITS_PER_WORD
4662                             || size - 1 >= BITS_PER_WORD)
4663                           goto fail1;
4664
4665                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4666                                       + shift_cost (speed, int_mode, size - 1)
4667                                       + add_cost (speed, int_mode));
4668                         t1 = expmed_mult_highpart
4669                           (int_mode, op0, gen_int_mode (ml, int_mode),
4670                            NULL_RTX, 0, max_cost - extra_cost);
4671                         if (t1 == 0)
4672                           goto fail1;
4673                         t2 = expand_shift
4674                           (RSHIFT_EXPR, int_mode, t1,
4675                            post_shift, NULL_RTX, 0);
4676                         t3 = expand_shift
4677                           (RSHIFT_EXPR, int_mode, op0,
4678                            size - 1, NULL_RTX, 0);
4679                         if (d < 0)
4680                           quotient
4681                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4682                                              tquotient);
4683                         else
4684                           quotient
4685                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4686                                              tquotient);
4687                       }
4688                     else
4689                       {
4690                         rtx t1, t2, t3, t4;
4691
4692                         if (post_shift >= BITS_PER_WORD
4693                             || size - 1 >= BITS_PER_WORD)
4694                           goto fail1;
4695
4696                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4697                         mlr = gen_int_mode (ml, int_mode);
4698                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4699                                       + shift_cost (speed, int_mode, size - 1)
4700                                       + 2 * add_cost (speed, int_mode));
4701                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4702                                                    NULL_RTX, 0,
4703                                                    max_cost - extra_cost);
4704                         if (t1 == 0)
4705                           goto fail1;
4706                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4707                                             NULL_RTX);
4708                         t3 = expand_shift
4709                           (RSHIFT_EXPR, int_mode, t2,
4710                            post_shift, NULL_RTX, 0);
4711                         t4 = expand_shift
4712                           (RSHIFT_EXPR, int_mode, op0,
4713                            size - 1, NULL_RTX, 0);
4714                         if (d < 0)
4715                           quotient
4716                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4717                                              tquotient);
4718                         else
4719                           quotient
4720                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4721                                              tquotient);
4722                       }
4723                   }
4724                 else            /* Too wide mode to use tricky code */
4725                   break;
4726
4727                 insn = get_last_insn ();
4728                 if (insn != last)
4729                   set_dst_reg_note (insn, REG_EQUAL,
4730                                     gen_rtx_DIV (int_mode, op0, op1),
4731                                     quotient);
4732               }
4733             break;
4734           }
4735       fail1:
4736         delete_insns_since (last);
4737         break;
4738
4739       case FLOOR_DIV_EXPR:
4740       case FLOOR_MOD_EXPR:
4741       /* We will come here only for signed operations.  */
4742         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4743           {
4744             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4745             int size = GET_MODE_BITSIZE (int_mode);
4746             unsigned HOST_WIDE_INT mh, ml;
4747             int pre_shift, lgup, post_shift;
4748             HOST_WIDE_INT d = INTVAL (op1);
4749
4750             if (d > 0)
4751               {
4752                 /* We could just as easily deal with negative constants here,
4753                    but it does not seem worth the trouble for GCC 2.6.  */
4754                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4755                   {
4756                     pre_shift = floor_log2 (d);
4757                     if (rem_flag)
4758                       {
4759                         unsigned HOST_WIDE_INT mask
4760                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4761                         remainder = expand_binop
4762                           (int_mode, and_optab, op0,
4763                            gen_int_mode (mask, int_mode),
4764                            remainder, 0, methods);
4765                         if (remainder)
4766                           return gen_lowpart (mode, remainder);
4767                       }
4768                     quotient = expand_shift
4769                       (RSHIFT_EXPR, int_mode, op0,
4770                        pre_shift, tquotient, 0);
4771                   }
4772                 else
4773                   {
4774                     rtx t1, t2, t3, t4;
4775
4776                     mh = choose_multiplier (d, size, size - 1,
4777                                             &ml, &post_shift, &lgup);
4778                     gcc_assert (!mh);
4779
4780                     if (post_shift < BITS_PER_WORD
4781                         && size - 1 < BITS_PER_WORD)
4782                       {
4783                         t1 = expand_shift
4784                           (RSHIFT_EXPR, int_mode, op0,
4785                            size - 1, NULL_RTX, 0);
4786                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4787                                            NULL_RTX, 0, OPTAB_WIDEN);
4788                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4789                                       + shift_cost (speed, int_mode, size - 1)
4790                                       + 2 * add_cost (speed, int_mode));
4791                         t3 = expmed_mult_highpart
4792                           (int_mode, t2, gen_int_mode (ml, int_mode),
4793                            NULL_RTX, 1, max_cost - extra_cost);
4794                         if (t3 != 0)
4795                           {
4796                             t4 = expand_shift
4797                               (RSHIFT_EXPR, int_mode, t3,
4798                                post_shift, NULL_RTX, 1);
4799                             quotient = expand_binop (int_mode, xor_optab,
4800                                                      t4, t1, tquotient, 0,
4801                                                      OPTAB_WIDEN);
4802                           }
4803                       }
4804                   }
4805               }
4806             else
4807               {
4808                 rtx nsign, t1, t2, t3, t4;
4809                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4810                                                   op0, constm1_rtx), NULL_RTX);
4811                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4812                                    0, OPTAB_WIDEN);
4813                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4814                                       size - 1, NULL_RTX, 0);
4815                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4816                                     NULL_RTX);
4817                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4818                                     NULL_RTX, 0);
4819                 if (t4)
4820                   {
4821                     rtx t5;
4822                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4823                                       NULL_RTX, 0);
4824                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4825                                               tquotient);
4826                   }
4827               }
4828           }
4829
4830         if (quotient != 0)
4831           break;
4832         delete_insns_since (last);
4833
4834         /* Try using an instruction that produces both the quotient and
4835            remainder, using truncation.  We can easily compensate the quotient
4836            or remainder to get floor rounding, once we have the remainder.
4837            Notice that we compute also the final remainder value here,
4838            and return the result right away.  */
4839         if (target == 0 || GET_MODE (target) != compute_mode)
4840           target = gen_reg_rtx (compute_mode);
4841
4842         if (rem_flag)
4843           {
4844             remainder
4845               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4846             quotient = gen_reg_rtx (compute_mode);
4847           }
4848         else
4849           {
4850             quotient
4851               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4852             remainder = gen_reg_rtx (compute_mode);
4853           }
4854
4855         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4856                                  quotient, remainder, 0))
4857           {
4858             /* This could be computed with a branch-less sequence.
4859                Save that for later.  */
4860             rtx tem;
4861             rtx_code_label *label = gen_label_rtx ();
4862             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4863             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4864                                 NULL_RTX, 0, OPTAB_WIDEN);
4865             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4866             expand_dec (quotient, const1_rtx);
4867             expand_inc (remainder, op1);
4868             emit_label (label);
4869             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4870           }
4871
4872         /* No luck with division elimination or divmod.  Have to do it
4873            by conditionally adjusting op0 *and* the result.  */
4874         {
4875           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4876           rtx adjusted_op0;
4877           rtx tem;
4878
4879           quotient = gen_reg_rtx (compute_mode);
4880           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4881           label1 = gen_label_rtx ();
4882           label2 = gen_label_rtx ();
4883           label3 = gen_label_rtx ();
4884           label4 = gen_label_rtx ();
4885           label5 = gen_label_rtx ();
4886           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4887           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4888           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4889                               quotient, 0, methods);
4890           if (tem != quotient)
4891             emit_move_insn (quotient, tem);
4892           emit_jump_insn (targetm.gen_jump (label5));
4893           emit_barrier ();
4894           emit_label (label1);
4895           expand_inc (adjusted_op0, const1_rtx);
4896           emit_jump_insn (targetm.gen_jump (label4));
4897           emit_barrier ();
4898           emit_label (label2);
4899           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4900           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4901                               quotient, 0, methods);
4902           if (tem != quotient)
4903             emit_move_insn (quotient, tem);
4904           emit_jump_insn (targetm.gen_jump (label5));
4905           emit_barrier ();
4906           emit_label (label3);
4907           expand_dec (adjusted_op0, const1_rtx);
4908           emit_label (label4);
4909           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4910                               quotient, 0, methods);
4911           if (tem != quotient)
4912             emit_move_insn (quotient, tem);
4913           expand_dec (quotient, const1_rtx);
4914           emit_label (label5);
4915         }
4916         break;
4917
4918       case CEIL_DIV_EXPR:
4919       case CEIL_MOD_EXPR:
4920         if (unsignedp)
4921           {
4922             if (op1_is_constant
4923                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4924                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4925                     || INTVAL (op1) >= 0))
4926               {
4927                 scalar_int_mode int_mode
4928                   = as_a <scalar_int_mode> (compute_mode);
4929                 rtx t1, t2, t3;
4930                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4931                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4932                                    floor_log2 (d), tquotient, 1);
4933                 t2 = expand_binop (int_mode, and_optab, op0,
4934                                    gen_int_mode (d - 1, int_mode),
4935                                    NULL_RTX, 1, methods);
4936                 t3 = gen_reg_rtx (int_mode);
4937                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4938                 if (t3 == 0)
4939                   {
4940                     rtx_code_label *lab;
4941                     lab = gen_label_rtx ();
4942                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4943                     expand_inc (t1, const1_rtx);
4944                     emit_label (lab);
4945                     quotient = t1;
4946                   }
4947                 else
4948                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4949                                             tquotient);
4950                 break;
4951               }
4952
4953             /* Try using an instruction that produces both the quotient and
4954                remainder, using truncation.  We can easily compensate the
4955                quotient or remainder to get ceiling rounding, once we have the
4956                remainder.  Notice that we compute also the final remainder
4957                value here, and return the result right away.  */
4958             if (target == 0 || GET_MODE (target) != compute_mode)
4959               target = gen_reg_rtx (compute_mode);
4960
4961             if (rem_flag)
4962               {
4963                 remainder = (REG_P (target)
4964                              ? target : gen_reg_rtx (compute_mode));
4965                 quotient = gen_reg_rtx (compute_mode);
4966               }
4967             else
4968               {
4969                 quotient = (REG_P (target)
4970                             ? target : gen_reg_rtx (compute_mode));
4971                 remainder = gen_reg_rtx (compute_mode);
4972               }
4973
4974             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4975                                      remainder, 1))
4976               {
4977                 /* This could be computed with a branch-less sequence.
4978                    Save that for later.  */
4979                 rtx_code_label *label = gen_label_rtx ();
4980                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4981                                  compute_mode, label);
4982                 expand_inc (quotient, const1_rtx);
4983                 expand_dec (remainder, op1);
4984                 emit_label (label);
4985                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4986               }
4987
4988             /* No luck with division elimination or divmod.  Have to do it
4989                by conditionally adjusting op0 *and* the result.  */
4990             {
4991               rtx_code_label *label1, *label2;
4992               rtx adjusted_op0, tem;
4993
4994               quotient = gen_reg_rtx (compute_mode);
4995               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4996               label1 = gen_label_rtx ();
4997               label2 = gen_label_rtx ();
4998               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4999                                compute_mode, label1);
5000               emit_move_insn  (quotient, const0_rtx);
5001               emit_jump_insn (targetm.gen_jump (label2));
5002               emit_barrier ();
5003               emit_label (label1);
5004               expand_dec (adjusted_op0, const1_rtx);
5005               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
5006                                   quotient, 1, methods);
5007               if (tem != quotient)
5008                 emit_move_insn (quotient, tem);
5009               expand_inc (quotient, const1_rtx);
5010               emit_label (label2);
5011             }
5012           }
5013         else /* signed */
5014           {
5015             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5016                 && INTVAL (op1) >= 0)
5017               {
5018                 /* This is extremely similar to the code for the unsigned case
5019                    above.  For 2.7 we should merge these variants, but for
5020                    2.6.1 I don't want to touch the code for unsigned since that
5021                    get used in C.  The signed case will only be used by other
5022                    languages (Ada).  */
5023
5024                 rtx t1, t2, t3;
5025                 unsigned HOST_WIDE_INT d = INTVAL (op1);
5026                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5027                                    floor_log2 (d), tquotient, 0);
5028                 t2 = expand_binop (compute_mode, and_optab, op0,
5029                                    gen_int_mode (d - 1, compute_mode),
5030                                    NULL_RTX, 1, methods);
5031                 t3 = gen_reg_rtx (compute_mode);
5032                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5033                                       compute_mode, 1, 1);
5034                 if (t3 == 0)
5035                   {
5036                     rtx_code_label *lab;
5037                     lab = gen_label_rtx ();
5038                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5039                     expand_inc (t1, const1_rtx);
5040                     emit_label (lab);
5041                     quotient = t1;
5042                   }
5043                 else
5044                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5045                                                           t1, t3),
5046                                             tquotient);
5047                 break;
5048               }
5049
5050             /* Try using an instruction that produces both the quotient and
5051                remainder, using truncation.  We can easily compensate the
5052                quotient or remainder to get ceiling rounding, once we have the
5053                remainder.  Notice that we compute also the final remainder
5054                value here, and return the result right away.  */
5055             if (target == 0 || GET_MODE (target) != compute_mode)
5056               target = gen_reg_rtx (compute_mode);
5057             if (rem_flag)
5058               {
5059                 remainder= (REG_P (target)
5060                             ? target : gen_reg_rtx (compute_mode));
5061                 quotient = gen_reg_rtx (compute_mode);
5062               }
5063             else
5064               {
5065                 quotient = (REG_P (target)
5066                             ? target : gen_reg_rtx (compute_mode));
5067                 remainder = gen_reg_rtx (compute_mode);
5068               }
5069
5070             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5071                                      remainder, 0))
5072               {
5073                 /* This could be computed with a branch-less sequence.
5074                    Save that for later.  */
5075                 rtx tem;
5076                 rtx_code_label *label = gen_label_rtx ();
5077                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5078                                  compute_mode, label);
5079                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5080                                     NULL_RTX, 0, OPTAB_WIDEN);
5081                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5082                 expand_inc (quotient, const1_rtx);
5083                 expand_dec (remainder, op1);
5084                 emit_label (label);
5085                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5086               }
5087
5088             /* No luck with division elimination or divmod.  Have to do it
5089                by conditionally adjusting op0 *and* the result.  */
5090             {
5091               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5092               rtx adjusted_op0;
5093               rtx tem;
5094
5095               quotient = gen_reg_rtx (compute_mode);
5096               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5097               label1 = gen_label_rtx ();
5098               label2 = gen_label_rtx ();
5099               label3 = gen_label_rtx ();
5100               label4 = gen_label_rtx ();
5101               label5 = gen_label_rtx ();
5102               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5103               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5104                                compute_mode, label1);
5105               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5106                                   quotient, 0, methods);
5107               if (tem != quotient)
5108                 emit_move_insn (quotient, tem);
5109               emit_jump_insn (targetm.gen_jump (label5));
5110               emit_barrier ();
5111               emit_label (label1);
5112               expand_dec (adjusted_op0, const1_rtx);
5113               emit_jump_insn (targetm.gen_jump (label4));
5114               emit_barrier ();
5115               emit_label (label2);
5116               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5117                                compute_mode, label3);
5118               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5119                                   quotient, 0, methods);
5120               if (tem != quotient)
5121                 emit_move_insn (quotient, tem);
5122               emit_jump_insn (targetm.gen_jump (label5));
5123               emit_barrier ();
5124               emit_label (label3);
5125               expand_inc (adjusted_op0, const1_rtx);
5126               emit_label (label4);
5127               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5128                                   quotient, 0, methods);
5129               if (tem != quotient)
5130                 emit_move_insn (quotient, tem);
5131               expand_inc (quotient, const1_rtx);
5132               emit_label (label5);
5133             }
5134           }
5135         break;
5136
5137       case EXACT_DIV_EXPR:
5138         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5139           {
5140             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5141             int size = GET_MODE_BITSIZE (int_mode);
5142             HOST_WIDE_INT d = INTVAL (op1);
5143             unsigned HOST_WIDE_INT ml;
5144             int pre_shift;
5145             rtx t1;
5146
5147             pre_shift = ctz_or_zero (d);
5148             ml = invert_mod2n (d >> pre_shift, size);
5149             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5150                                pre_shift, NULL_RTX, unsignedp);
5151             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5152                                     NULL_RTX, 1);
5153
5154             insn = get_last_insn ();
5155             set_dst_reg_note (insn, REG_EQUAL,
5156                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5157                                               int_mode, op0, op1),
5158                               quotient);
5159           }
5160         break;
5161
5162       case ROUND_DIV_EXPR:
5163       case ROUND_MOD_EXPR:
5164         if (unsignedp)
5165           {
5166             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5167             rtx tem;
5168             rtx_code_label *label;
5169             label = gen_label_rtx ();
5170             quotient = gen_reg_rtx (int_mode);
5171             remainder = gen_reg_rtx (int_mode);
5172             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5173               {
5174                 rtx tem;
5175                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5176                                          quotient, 1, methods);
5177                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5178                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5179                                           remainder, 1, methods);
5180               }
5181             tem = plus_constant (int_mode, op1, -1);
5182             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5183             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5184             expand_inc (quotient, const1_rtx);
5185             expand_dec (remainder, op1);
5186             emit_label (label);
5187           }
5188         else
5189           {
5190             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5191             int size = GET_MODE_BITSIZE (int_mode);
5192             rtx abs_rem, abs_op1, tem, mask;
5193             rtx_code_label *label;
5194             label = gen_label_rtx ();
5195             quotient = gen_reg_rtx (int_mode);
5196             remainder = gen_reg_rtx (int_mode);
5197             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5198               {
5199                 rtx tem;
5200                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5201                                          quotient, 0, methods);
5202                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5203                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5204                                           remainder, 0, methods);
5205               }
5206             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5207             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5208             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5209                                 1, NULL_RTX, 1);
5210             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5211             tem = expand_binop (int_mode, xor_optab, op0, op1,
5212                                 NULL_RTX, 0, OPTAB_WIDEN);
5213             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5214                                  size - 1, NULL_RTX, 0);
5215             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5216                                 NULL_RTX, 0, OPTAB_WIDEN);
5217             tem = expand_binop (int_mode, sub_optab, tem, mask,
5218                                 NULL_RTX, 0, OPTAB_WIDEN);
5219             expand_inc (quotient, tem);
5220             tem = expand_binop (int_mode, xor_optab, mask, op1,
5221                                 NULL_RTX, 0, OPTAB_WIDEN);
5222             tem = expand_binop (int_mode, sub_optab, tem, mask,
5223                                 NULL_RTX, 0, OPTAB_WIDEN);
5224             expand_dec (remainder, tem);
5225             emit_label (label);
5226           }
5227         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5228
5229       default:
5230         gcc_unreachable ();
5231       }
5232
5233   if (quotient == 0)
5234     {
5235       if (target && GET_MODE (target) != compute_mode)
5236         target = 0;
5237
5238       if (rem_flag)
5239         {
5240           /* Try to produce the remainder without producing the quotient.
5241              If we seem to have a divmod pattern that does not require widening,
5242              don't try widening here.  We should really have a WIDEN argument
5243              to expand_twoval_binop, since what we'd really like to do here is
5244              1) try a mod insn in compute_mode
5245              2) try a divmod insn in compute_mode
5246              3) try a div insn in compute_mode and multiply-subtract to get
5247                 remainder
5248              4) try the same things with widening allowed.  */
5249           remainder
5250             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5251                                  op0, op1, target,
5252                                  unsignedp,
5253                                  ((optab_handler (optab2, compute_mode)
5254                                    != CODE_FOR_nothing)
5255                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5256           if (remainder == 0)
5257             {
5258               /* No luck there.  Can we do remainder and divide at once
5259                  without a library call?  */
5260               remainder = gen_reg_rtx (compute_mode);
5261               if (! expand_twoval_binop ((unsignedp
5262                                           ? udivmod_optab
5263                                           : sdivmod_optab),
5264                                          op0, op1,
5265                                          NULL_RTX, remainder, unsignedp))
5266                 remainder = 0;
5267             }
5268
5269           if (remainder)
5270             return gen_lowpart (mode, remainder);
5271         }
5272
5273       /* Produce the quotient.  Try a quotient insn, but not a library call.
5274          If we have a divmod in this mode, use it in preference to widening
5275          the div (for this test we assume it will not fail). Note that optab2
5276          is set to the one of the two optabs that the call below will use.  */
5277       quotient
5278         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5279                              op0, op1, rem_flag ? NULL_RTX : target,
5280                              unsignedp,
5281                              ((optab_handler (optab2, compute_mode)
5282                                != CODE_FOR_nothing)
5283                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5284
5285       if (quotient == 0)
5286         {
5287           /* No luck there.  Try a quotient-and-remainder insn,
5288              keeping the quotient alone.  */
5289           quotient = gen_reg_rtx (compute_mode);
5290           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5291                                      op0, op1,
5292                                      quotient, NULL_RTX, unsignedp))
5293             {
5294               quotient = 0;
5295               if (! rem_flag)
5296                 /* Still no luck.  If we are not computing the remainder,
5297                    use a library call for the quotient.  */
5298                 quotient = sign_expand_binop (compute_mode,
5299                                               udiv_optab, sdiv_optab,
5300                                               op0, op1, target,
5301                                               unsignedp, methods);
5302             }
5303         }
5304     }
5305
5306   if (rem_flag)
5307     {
5308       if (target && GET_MODE (target) != compute_mode)
5309         target = 0;
5310
5311       if (quotient == 0)
5312         {
5313           /* No divide instruction either.  Use library for remainder.  */
5314           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5315                                          op0, op1, target,
5316                                          unsignedp, methods);
5317           /* No remainder function.  Try a quotient-and-remainder
5318              function, keeping the remainder.  */
5319           if (!remainder
5320               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5321             {
5322               remainder = gen_reg_rtx (compute_mode);
5323               if (!expand_twoval_binop_libfunc
5324                   (unsignedp ? udivmod_optab : sdivmod_optab,
5325                    op0, op1,
5326                    NULL_RTX, remainder,
5327                    unsignedp ? UMOD : MOD))
5328                 remainder = NULL_RTX;
5329             }
5330         }
5331       else
5332         {
5333           /* We divided.  Now finish doing X - Y * (X / Y).  */
5334           remainder = expand_mult (compute_mode, quotient, op1,
5335                                    NULL_RTX, unsignedp);
5336           remainder = expand_binop (compute_mode, sub_optab, op0,
5337                                     remainder, target, unsignedp,
5338                                     methods);
5339         }
5340     }
5341
5342   if (methods != OPTAB_LIB_WIDEN
5343       && (rem_flag ? remainder : quotient) == NULL_RTX)
5344     return NULL_RTX;
5345
5346   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5347 }
5348 \f
5349 /* Return a tree node with data type TYPE, describing the value of X.
5350    Usually this is an VAR_DECL, if there is no obvious better choice.
5351    X may be an expression, however we only support those expressions
5352    generated by loop.c.  */
5353
5354 tree
5355 make_tree (tree type, rtx x)
5356 {
5357   tree t;
5358
5359   switch (GET_CODE (x))
5360     {
5361     case CONST_INT:
5362     case CONST_WIDE_INT:
5363       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5364       return t;
5365
5366     case CONST_DOUBLE:
5367       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5368       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5369         t = wide_int_to_tree (type,
5370                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5371                                                     HOST_BITS_PER_WIDE_INT * 2));
5372       else
5373         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5374
5375       return t;
5376
5377     case CONST_VECTOR:
5378       {
5379         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5380         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5381         tree itype = TREE_TYPE (type);
5382
5383         /* Build a tree with vector elements.  */
5384         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5385         unsigned int count = elts.encoded_nelts ();
5386         for (unsigned int i = 0; i < count; ++i)
5387           {
5388             rtx elt = CONST_VECTOR_ELT (x, i);
5389             elts.quick_push (make_tree (itype, elt));
5390           }
5391
5392         return elts.build ();
5393       }
5394
5395     case PLUS:
5396       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5397                           make_tree (type, XEXP (x, 1)));
5398
5399     case MINUS:
5400       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5401                           make_tree (type, XEXP (x, 1)));
5402
5403     case NEG:
5404       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5405
5406     case MULT:
5407       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5408                           make_tree (type, XEXP (x, 1)));
5409
5410     case ASHIFT:
5411       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5412                           make_tree (type, XEXP (x, 1)));
5413
5414     case LSHIFTRT:
5415       t = unsigned_type_for (type);
5416       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5417                                          make_tree (t, XEXP (x, 0)),
5418                                          make_tree (type, XEXP (x, 1))));
5419
5420     case ASHIFTRT:
5421       t = signed_type_for (type);
5422       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5423                                          make_tree (t, XEXP (x, 0)),
5424                                          make_tree (type, XEXP (x, 1))));
5425
5426     case DIV:
5427       if (TREE_CODE (type) != REAL_TYPE)
5428         t = signed_type_for (type);
5429       else
5430         t = type;
5431
5432       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5433                                          make_tree (t, XEXP (x, 0)),
5434                                          make_tree (t, XEXP (x, 1))));
5435     case UDIV:
5436       t = unsigned_type_for (type);
5437       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5438                                          make_tree (t, XEXP (x, 0)),
5439                                          make_tree (t, XEXP (x, 1))));
5440
5441     case SIGN_EXTEND:
5442     case ZERO_EXTEND:
5443       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5444                                           GET_CODE (x) == ZERO_EXTEND);
5445       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5446
5447     case CONST:
5448       return make_tree (type, XEXP (x, 0));
5449
5450     case SYMBOL_REF:
5451       t = SYMBOL_REF_DECL (x);
5452       if (t)
5453         return fold_convert (type, build_fold_addr_expr (t));
5454       /* fall through.  */
5455
5456     default:
5457       if (CONST_POLY_INT_P (x))
5458         return wide_int_to_tree (t, const_poly_int_value (x));
5459
5460       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5461
5462       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5463          address mode to pointer mode.  */
5464       if (POINTER_TYPE_P (type))
5465         x = convert_memory_address_addr_space
5466           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5467
5468       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5469          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5470       t->decl_with_rtl.rtl = x;
5471
5472       return t;
5473     }
5474 }
5475 \f
5476 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5477    and returning TARGET.
5478
5479    If TARGET is 0, a pseudo-register or constant is returned.  */
5480
5481 rtx
5482 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5483 {
5484   rtx tem = 0;
5485
5486   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5487     tem = simplify_binary_operation (AND, mode, op0, op1);
5488   if (tem == 0)
5489     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5490
5491   if (target == 0)
5492     target = tem;
5493   else if (tem != target)
5494     emit_move_insn (target, tem);
5495   return target;
5496 }
5497
5498 /* Helper function for emit_store_flag.  */
5499 rtx
5500 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5501              machine_mode mode, machine_mode compare_mode,
5502              int unsignedp, rtx x, rtx y, int normalizep,
5503              machine_mode target_mode)
5504 {
5505   class expand_operand ops[4];
5506   rtx op0, comparison, subtarget;
5507   rtx_insn *last;
5508   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5509   scalar_int_mode int_target_mode;
5510
5511   last = get_last_insn ();
5512   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5513   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5514   if (!x || !y)
5515     {
5516       delete_insns_since (last);
5517       return NULL_RTX;
5518     }
5519
5520   if (target_mode == VOIDmode)
5521     int_target_mode = result_mode;
5522   else
5523     int_target_mode = as_a <scalar_int_mode> (target_mode);
5524   if (!target)
5525     target = gen_reg_rtx (int_target_mode);
5526
5527   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5528
5529   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5530   create_fixed_operand (&ops[1], comparison);
5531   create_fixed_operand (&ops[2], x);
5532   create_fixed_operand (&ops[3], y);
5533   if (!maybe_expand_insn (icode, 4, ops))
5534     {
5535       delete_insns_since (last);
5536       return NULL_RTX;
5537     }
5538   subtarget = ops[0].value;
5539
5540   /* If we are converting to a wider mode, first convert to
5541      INT_TARGET_MODE, then normalize.  This produces better combining
5542      opportunities on machines that have a SIGN_EXTRACT when we are
5543      testing a single bit.  This mostly benefits the 68k.
5544
5545      If STORE_FLAG_VALUE does not have the sign bit set when
5546      interpreted in MODE, we can do this conversion as unsigned, which
5547      is usually more efficient.  */
5548   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5549     {
5550       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5551                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5552
5553       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5554       convert_move (target, subtarget, unsignedp);
5555
5556       op0 = target;
5557       result_mode = int_target_mode;
5558     }
5559   else
5560     op0 = subtarget;
5561
5562   /* If we want to keep subexpressions around, don't reuse our last
5563      target.  */
5564   if (optimize)
5565     subtarget = 0;
5566
5567   /* Now normalize to the proper value in MODE.  Sometimes we don't
5568      have to do anything.  */
5569   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5570     ;
5571   /* STORE_FLAG_VALUE might be the most negative number, so write
5572      the comparison this way to avoid a compiler-time warning.  */
5573   else if (- normalizep == STORE_FLAG_VALUE)
5574     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5575
5576   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5577      it hard to use a value of just the sign bit due to ANSI integer
5578      constant typing rules.  */
5579   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5580     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5581                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5582                         normalizep == 1);
5583   else
5584     {
5585       gcc_assert (STORE_FLAG_VALUE & 1);
5586
5587       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5588       if (normalizep == -1)
5589         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5590     }
5591
5592   /* If we were converting to a smaller mode, do the conversion now.  */
5593   if (int_target_mode != result_mode)
5594     {
5595       convert_move (target, op0, 0);
5596       return target;
5597     }
5598   else
5599     return op0;
5600 }
5601
5602
5603 /* A subroutine of emit_store_flag only including "tricks" that do not
5604    need a recursive call.  These are kept separate to avoid infinite
5605    loops.  */
5606
5607 static rtx
5608 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5609                    machine_mode mode, int unsignedp, int normalizep,
5610                    machine_mode target_mode)
5611 {
5612   rtx subtarget;
5613   enum insn_code icode;
5614   machine_mode compare_mode;
5615   enum mode_class mclass;
5616   enum rtx_code scode;
5617
5618   if (unsignedp)
5619     code = unsigned_condition (code);
5620   scode = swap_condition (code);
5621
5622   /* If one operand is constant, make it the second one.  Only do this
5623      if the other operand is not constant as well.  */
5624
5625   if (swap_commutative_operands_p (op0, op1))
5626     {
5627       std::swap (op0, op1);
5628       code = swap_condition (code);
5629     }
5630
5631   if (mode == VOIDmode)
5632     mode = GET_MODE (op0);
5633
5634   if (CONST_SCALAR_INT_P (op1))
5635     canonicalize_comparison (mode, &code, &op1);
5636
5637   /* For some comparisons with 1 and -1, we can convert this to
5638      comparisons with zero.  This will often produce more opportunities for
5639      store-flag insns.  */
5640
5641   switch (code)
5642     {
5643     case LT:
5644       if (op1 == const1_rtx)
5645         op1 = const0_rtx, code = LE;
5646       break;
5647     case LE:
5648       if (op1 == constm1_rtx)
5649         op1 = const0_rtx, code = LT;
5650       break;
5651     case GE:
5652       if (op1 == const1_rtx)
5653         op1 = const0_rtx, code = GT;
5654       break;
5655     case GT:
5656       if (op1 == constm1_rtx)
5657         op1 = const0_rtx, code = GE;
5658       break;
5659     case GEU:
5660       if (op1 == const1_rtx)
5661         op1 = const0_rtx, code = NE;
5662       break;
5663     case LTU:
5664       if (op1 == const1_rtx)
5665         op1 = const0_rtx, code = EQ;
5666       break;
5667     default:
5668       break;
5669     }
5670
5671   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5672      complement of A (for GE) and shifting the sign bit to the low bit.  */
5673   scalar_int_mode int_mode;
5674   if (op1 == const0_rtx && (code == LT || code == GE)
5675       && is_int_mode (mode, &int_mode)
5676       && (normalizep || STORE_FLAG_VALUE == 1
5677           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5678     {
5679       scalar_int_mode int_target_mode;
5680       subtarget = target;
5681
5682       if (!target)
5683         int_target_mode = int_mode;
5684       else
5685         {
5686           /* If the result is to be wider than OP0, it is best to convert it
5687              first.  If it is to be narrower, it is *incorrect* to convert it
5688              first.  */
5689           int_target_mode = as_a <scalar_int_mode> (target_mode);
5690           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5691             {
5692               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5693               int_mode = int_target_mode;
5694             }
5695         }
5696
5697       if (int_target_mode != int_mode)
5698         subtarget = 0;
5699
5700       if (code == GE)
5701         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5702                            ((STORE_FLAG_VALUE == 1 || normalizep)
5703                             ? 0 : subtarget), 0);
5704
5705       if (STORE_FLAG_VALUE == 1 || normalizep)
5706         /* If we are supposed to produce a 0/1 value, we want to do
5707            a logical shift from the sign bit to the low-order bit; for
5708            a -1/0 value, we do an arithmetic shift.  */
5709         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5710                             GET_MODE_BITSIZE (int_mode) - 1,
5711                             subtarget, normalizep != -1);
5712
5713       if (int_mode != int_target_mode)
5714         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5715
5716       return op0;
5717     }
5718
5719   /* Next try expanding this via the backend's cstore<mode>4.  */
5720   mclass = GET_MODE_CLASS (mode);
5721   FOR_EACH_WIDER_MODE_FROM (compare_mode, mode)
5722     {
5723      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5724      icode = optab_handler (cstore_optab, optab_mode);
5725      if (icode != CODE_FOR_nothing)
5726         {
5727           do_pending_stack_adjust ();
5728           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5729                                  unsignedp, op0, op1, normalizep, target_mode);
5730           if (tem)
5731             return tem;
5732
5733           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5734             {
5735               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5736                                  unsignedp, op1, op0, normalizep, target_mode);
5737               if (tem)
5738                 return tem;
5739             }
5740           break;
5741         }
5742     }
5743
5744   /* If we are comparing a double-word integer with zero or -1, we can
5745      convert the comparison into one involving a single word.  */
5746   if (is_int_mode (mode, &int_mode)
5747       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5748       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5749     {
5750       rtx tem;
5751       if ((code == EQ || code == NE)
5752           && (op1 == const0_rtx || op1 == constm1_rtx))
5753         {
5754           rtx op00, op01;
5755
5756           /* Do a logical OR or AND of the two words and compare the
5757              result.  */
5758           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5759           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5760           tem = expand_binop (word_mode,
5761                               op1 == const0_rtx ? ior_optab : and_optab,
5762                               op00, op01, NULL_RTX, unsignedp,
5763                               OPTAB_DIRECT);
5764
5765           if (tem != 0)
5766             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5767                                    unsignedp, normalizep);
5768         }
5769       else if ((code == LT || code == GE) && op1 == const0_rtx)
5770         {
5771           rtx op0h;
5772
5773           /* If testing the sign bit, can just test on high word.  */
5774           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5775                                       subreg_highpart_offset (word_mode,
5776                                                               int_mode));
5777           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5778                                  unsignedp, normalizep);
5779         }
5780       else
5781         tem = NULL_RTX;
5782
5783       if (tem)
5784         {
5785           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5786             return tem;
5787           if (!target)
5788             target = gen_reg_rtx (target_mode);
5789
5790           convert_move (target, tem,
5791                         !val_signbit_known_set_p (word_mode,
5792                                                   (normalizep ? normalizep
5793                                                    : STORE_FLAG_VALUE)));
5794           return target;
5795         }
5796     }
5797
5798   return 0;
5799 }
5800
5801 /* Subroutine of emit_store_flag that handles cases in which the operands
5802    are scalar integers.  SUBTARGET is the target to use for temporary
5803    operations and TRUEVAL is the value to store when the condition is
5804    true.  All other arguments are as for emit_store_flag.  */
5805
5806 rtx
5807 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5808                      rtx op1, scalar_int_mode mode, int unsignedp,
5809                      int normalizep, rtx trueval)
5810 {
5811   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5812   rtx_insn *last = get_last_insn ();
5813
5814   /* If this is an equality comparison of integers, we can try to exclusive-or
5815      (or subtract) the two operands and use a recursive call to try the
5816      comparison with zero.  Don't do any of these cases if branches are
5817      very cheap.  */
5818
5819   if ((code == EQ || code == NE) && op1 != const0_rtx)
5820     {
5821       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5822                               OPTAB_WIDEN);
5823
5824       if (tem == 0)
5825         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5826                             OPTAB_WIDEN);
5827       if (tem != 0)
5828         tem = emit_store_flag (target, code, tem, const0_rtx,
5829                                mode, unsignedp, normalizep);
5830       if (tem != 0)
5831         return tem;
5832
5833       delete_insns_since (last);
5834     }
5835
5836   /* For integer comparisons, try the reverse comparison.  However, for
5837      small X and if we'd have anyway to extend, implementing "X != 0"
5838      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5839   rtx_code rcode = reverse_condition (code);
5840   if (can_compare_p (rcode, mode, ccp_store_flag)
5841       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5842             && code == NE
5843             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5844             && op1 == const0_rtx))
5845     {
5846       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5847                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5848
5849       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5850       if (want_add
5851           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5852                        optimize_insn_for_speed_p ()) == 0)
5853         {
5854           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5855                                        STORE_FLAG_VALUE, target_mode);
5856           if (tem != 0)
5857             tem = expand_binop (target_mode, add_optab, tem,
5858                                 gen_int_mode (normalizep, target_mode),
5859                                 target, 0, OPTAB_WIDEN);
5860           if (tem != 0)
5861             return tem;
5862         }
5863       else if (!want_add
5864                && rtx_cost (trueval, mode, XOR, 1,
5865                             optimize_insn_for_speed_p ()) == 0)
5866         {
5867           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5868                                        normalizep, target_mode);
5869           if (tem != 0)
5870             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5871                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5872           if (tem != 0)
5873             return tem;
5874         }
5875
5876       delete_insns_since (last);
5877     }
5878
5879   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5880      the constant zero.  Reject all other comparisons at this point.  Only
5881      do LE and GT if branches are expensive since they are expensive on
5882      2-operand machines.  */
5883
5884   if (op1 != const0_rtx
5885       || (code != EQ && code != NE
5886           && (BRANCH_COST (optimize_insn_for_speed_p (),
5887                            false) <= 1 || (code != LE && code != GT))))
5888     return 0;
5889
5890   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5891      do the necessary operation below.  */
5892
5893   rtx tem = 0;
5894
5895   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5896      the sign bit set.  */
5897
5898   if (code == LE)
5899     {
5900       /* This is destructive, so SUBTARGET can't be OP0.  */
5901       if (rtx_equal_p (subtarget, op0))
5902         subtarget = 0;
5903
5904       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5905                           OPTAB_WIDEN);
5906       if (tem)
5907         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5908                             OPTAB_WIDEN);
5909     }
5910
5911   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5912      number of bits in the mode of OP0, minus one.  */
5913
5914   if (code == GT)
5915     {
5916       if (rtx_equal_p (subtarget, op0))
5917         subtarget = 0;
5918
5919       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5920                                 GET_MODE_BITSIZE (mode) - 1,
5921                                 subtarget, 0);
5922       if (tem)
5923         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5924                             OPTAB_WIDEN);
5925     }
5926
5927   if (code == EQ || code == NE)
5928     {
5929       /* For EQ or NE, one way to do the comparison is to apply an operation
5930          that converts the operand into a positive number if it is nonzero
5931          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5932          for NE we negate.  This puts the result in the sign bit.  Then we
5933          normalize with a shift, if needed.
5934
5935          Two operations that can do the above actions are ABS and FFS, so try
5936          them.  If that doesn't work, and MODE is smaller than a full word,
5937          we can use zero-extension to the wider mode (an unsigned conversion)
5938          as the operation.  */
5939
5940       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5941          that is compensated by the subsequent overflow when subtracting
5942          one / negating.  */
5943
5944       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5945         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5946       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5947         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5948       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5949         {
5950           tem = convert_modes (word_mode, mode, op0, 1);
5951           mode = word_mode;
5952         }
5953
5954       if (tem != 0)
5955         {
5956           if (code == EQ)
5957             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5958                                 0, OPTAB_WIDEN);
5959           else
5960             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5961         }
5962
5963       /* If we couldn't do it that way, for NE we can "or" the two's complement
5964          of the value with itself.  For EQ, we take the one's complement of
5965          that "or", which is an extra insn, so we only handle EQ if branches
5966          are expensive.  */
5967
5968       if (tem == 0
5969           && (code == NE
5970               || BRANCH_COST (optimize_insn_for_speed_p (),
5971                               false) > 1))
5972         {
5973           if (rtx_equal_p (subtarget, op0))
5974             subtarget = 0;
5975
5976           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5977           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5978                               OPTAB_WIDEN);
5979
5980           if (tem && code == EQ)
5981             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5982         }
5983     }
5984
5985   if (tem && normalizep)
5986     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5987                               GET_MODE_BITSIZE (mode) - 1,
5988                               subtarget, normalizep == 1);
5989
5990   if (tem)
5991     {
5992       if (!target)
5993         ;
5994       else if (GET_MODE (tem) != target_mode)
5995         {
5996           convert_move (target, tem, 0);
5997           tem = target;
5998         }
5999       else if (!subtarget)
6000         {
6001           emit_move_insn (target, tem);
6002           tem = target;
6003         }
6004     }
6005   else
6006     delete_insns_since (last);
6007
6008   return tem;
6009 }
6010
6011 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
6012    and storing in TARGET.  Normally return TARGET.
6013    Return 0 if that cannot be done.
6014
6015    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
6016    it is VOIDmode, they cannot both be CONST_INT.
6017
6018    UNSIGNEDP is for the case where we have to widen the operands
6019    to perform the operation.  It says to use zero-extension.
6020
6021    NORMALIZEP is 1 if we should convert the result to be either zero
6022    or one.  Normalize is -1 if we should convert the result to be
6023    either zero or -1.  If NORMALIZEP is zero, the result will be left
6024    "raw" out of the scc insn.  */
6025
6026 rtx
6027 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6028                  machine_mode mode, int unsignedp, int normalizep)
6029 {
6030   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6031   enum rtx_code rcode;
6032   rtx subtarget;
6033   rtx tem, trueval;
6034   rtx_insn *last;
6035
6036   /* If we compare constants, we shouldn't use a store-flag operation,
6037      but a constant load.  We can get there via the vanilla route that
6038      usually generates a compare-branch sequence, but will in this case
6039      fold the comparison to a constant, and thus elide the branch.  */
6040   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6041     return NULL_RTX;
6042
6043   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6044                            target_mode);
6045   if (tem)
6046     return tem;
6047
6048   /* If we reached here, we can't do this with a scc insn, however there
6049      are some comparisons that can be done in other ways.  Don't do any
6050      of these cases if branches are very cheap.  */
6051   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6052     return 0;
6053
6054   /* See what we need to return.  We can only return a 1, -1, or the
6055      sign bit.  */
6056
6057   if (normalizep == 0)
6058     {
6059       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6060         normalizep = STORE_FLAG_VALUE;
6061
6062       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6063         ;
6064       else
6065         return 0;
6066     }
6067
6068   last = get_last_insn ();
6069
6070   /* If optimizing, use different pseudo registers for each insn, instead
6071      of reusing the same pseudo.  This leads to better CSE, but slows
6072      down the compiler, since there are more pseudos.  */
6073   subtarget = (!optimize
6074                && (target_mode == mode)) ? target : NULL_RTX;
6075   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6076
6077   /* For floating-point comparisons, try the reverse comparison or try
6078      changing the "orderedness" of the comparison.  */
6079   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6080     {
6081       enum rtx_code first_code;
6082       bool and_them;
6083
6084       rcode = reverse_condition_maybe_unordered (code);
6085       if (can_compare_p (rcode, mode, ccp_store_flag)
6086           && (code == ORDERED || code == UNORDERED
6087               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6088               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6089         {
6090           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6091                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6092
6093           /* For the reverse comparison, use either an addition or a XOR.  */
6094           if (want_add
6095               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6096                            optimize_insn_for_speed_p ()) == 0)
6097             {
6098               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6099                                        STORE_FLAG_VALUE, target_mode);
6100               if (tem)
6101                 return expand_binop (target_mode, add_optab, tem,
6102                                      gen_int_mode (normalizep, target_mode),
6103                                      target, 0, OPTAB_WIDEN);
6104             }
6105           else if (!want_add
6106                    && rtx_cost (trueval, mode, XOR, 1,
6107                                 optimize_insn_for_speed_p ()) == 0)
6108             {
6109               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6110                                        normalizep, target_mode);
6111               if (tem)
6112                 return expand_binop (target_mode, xor_optab, tem, trueval,
6113                                      target, INTVAL (trueval) >= 0,
6114                                      OPTAB_WIDEN);
6115             }
6116         }
6117
6118       delete_insns_since (last);
6119
6120       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6121       if (code == ORDERED || code == UNORDERED)
6122         return 0;
6123
6124       and_them = split_comparison (code, mode, &first_code, &code);
6125
6126       /* If there are no NaNs, the first comparison should always fall through.
6127          Effectively change the comparison to the other one.  */
6128       if (!HONOR_NANS (mode))
6129         {
6130           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6131           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6132                                     target_mode);
6133         }
6134
6135       if (!HAVE_conditional_move)
6136         return 0;
6137
6138       /* Do not turn a trapping comparison into a non-trapping one.  */
6139       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6140           && flag_trapping_math)
6141         return 0;
6142
6143       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6144          conditional move.  */
6145       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6146                                normalizep, target_mode);
6147       if (tem == 0)
6148         return 0;
6149
6150       if (and_them)
6151         tem = emit_conditional_move (target, { code, op0, op1, mode },
6152                                      tem, const0_rtx, GET_MODE (tem), 0);
6153       else
6154         tem = emit_conditional_move (target, { code, op0, op1, mode },
6155                                      trueval, tem, GET_MODE (tem), 0);
6156
6157       if (tem == 0)
6158         delete_insns_since (last);
6159       return tem;
6160     }
6161
6162   /* The remaining tricks only apply to integer comparisons.  */
6163
6164   scalar_int_mode int_mode;
6165   if (is_int_mode (mode, &int_mode))
6166     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6167                                 unsignedp, normalizep, trueval);
6168
6169   return 0;
6170 }
6171
6172 /* Like emit_store_flag, but always succeeds.  */
6173
6174 rtx
6175 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6176                        machine_mode mode, int unsignedp, int normalizep)
6177 {
6178   rtx tem;
6179   rtx_code_label *label;
6180   rtx trueval, falseval;
6181
6182   /* First see if emit_store_flag can do the job.  */
6183   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6184   if (tem != 0)
6185     return tem;
6186
6187   /* If one operand is constant, make it the second one.  Only do this
6188      if the other operand is not constant as well.  */
6189   if (swap_commutative_operands_p (op0, op1))
6190     {
6191       std::swap (op0, op1);
6192       code = swap_condition (code);
6193     }
6194
6195   if (mode == VOIDmode)
6196     mode = GET_MODE (op0);
6197
6198   if (!target)
6199     target = gen_reg_rtx (word_mode);
6200
6201   /* If this failed, we have to do this with set/compare/jump/set code.
6202      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6203   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6204   if (code == NE
6205       && GET_MODE_CLASS (mode) == MODE_INT
6206       && REG_P (target)
6207       && op0 == target
6208       && op1 == const0_rtx)
6209     {
6210       label = gen_label_rtx ();
6211       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6212                                NULL_RTX, NULL, label,
6213                                profile_probability::uninitialized ());
6214       emit_move_insn (target, trueval);
6215       emit_label (label);
6216       return target;
6217     }
6218
6219   if (!REG_P (target)
6220       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6221     target = gen_reg_rtx (GET_MODE (target));
6222
6223   /* Jump in the right direction if the target cannot implement CODE
6224      but can jump on its reverse condition.  */
6225   falseval = const0_rtx;
6226   if (! can_compare_p (code, mode, ccp_jump)
6227       && (! FLOAT_MODE_P (mode)
6228           || code == ORDERED || code == UNORDERED
6229           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6230           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6231     {
6232       enum rtx_code rcode;
6233       if (FLOAT_MODE_P (mode))
6234         rcode = reverse_condition_maybe_unordered (code);
6235       else
6236         rcode = reverse_condition (code);
6237
6238       /* Canonicalize to UNORDERED for the libcall.  */
6239       if (can_compare_p (rcode, mode, ccp_jump)
6240           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6241         {
6242           falseval = trueval;
6243           trueval = const0_rtx;
6244           code = rcode;
6245         }
6246     }
6247
6248   emit_move_insn (target, trueval);
6249   label = gen_label_rtx ();
6250   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6251                            label, profile_probability::uninitialized ());
6252
6253   emit_move_insn (target, falseval);
6254   emit_label (label);
6255
6256   return target;
6257 }
6258
6259 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6260    and exclusive ranges in order to create an equivalent comparison.  See
6261    canonicalize_cmp_for_target for the possible cases.  */
6262
6263 static enum rtx_code
6264 equivalent_cmp_code (enum rtx_code code)
6265 {
6266   switch (code)
6267     {
6268     case GT:
6269       return GE;
6270     case GE:
6271       return GT;
6272     case LT:
6273       return LE;
6274     case LE:
6275       return LT;
6276     case GTU:
6277       return GEU;
6278     case GEU:
6279       return GTU;
6280     case LTU:
6281       return LEU;
6282     case LEU:
6283       return LTU;
6284
6285     default:
6286       return code;
6287     }
6288 }
6289
6290 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6291    purpose of this is to end up with an immediate which can be loaded into a
6292    register in fewer moves, if possible.
6293
6294    For each integer comparison there exists an equivalent choice:
6295      i)   a >  b or a >= b + 1
6296      ii)  a <= b or a <  b + 1
6297      iii) a >= b or a >  b - 1
6298      iv)  a <  b or a <= b - 1
6299
6300    MODE is the mode of the first operand.
6301    CODE points to the comparison code.
6302    IMM points to the rtx containing the immediate.  *IMM must satisfy
6303    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6304    on exit.  */
6305
6306 void
6307 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6308 {
6309   if (!SCALAR_INT_MODE_P (mode))
6310     return;
6311
6312   int to_add = 0;
6313   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6314
6315   /* Extract the immediate value from the rtx.  */
6316   wide_int imm_val = rtx_mode_t (*imm, mode);
6317
6318   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6319     to_add = 1;
6320   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6321     to_add = -1;
6322   else
6323     return;
6324
6325   /* Check for overflow/underflow in the case of signed values and
6326      wrapping around in the case of unsigned values.  If any occur
6327      cancel the optimization.  */
6328   wi::overflow_type overflow = wi::OVF_NONE;
6329   wide_int imm_modif;
6330
6331   if (to_add == 1)
6332     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6333   else
6334     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6335
6336   if (overflow)
6337     return;
6338
6339   /* The following creates a pseudo; if we cannot do that, bail out.  */
6340   if (!can_create_pseudo_p ())
6341     return;
6342
6343   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6344   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6345
6346   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6347   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6348
6349   /* Update the immediate and the code.  */
6350   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6351     {
6352       *code = equivalent_cmp_code (*code);
6353       *imm = new_imm;
6354     }
6355 }
6356
6357
6358 \f
6359 /* Perform possibly multi-word comparison and conditional jump to LABEL
6360    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6361    now a thin wrapper around do_compare_rtx_and_jump.  */
6362
6363 static void
6364 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6365                  rtx_code_label *label)
6366 {
6367   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6368   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6369                            NULL, label, profile_probability::uninitialized ());
6370 }